diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml index be730afb1..65d1d7fe5 100644 --- a/.github/workflows/workflow_inference.yml +++ b/.github/workflows/workflow_inference.yml @@ -34,7 +34,7 @@ jobs: name: inference strategy: matrix: - model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, gemma-2b ] + model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, gemma-2b] isPR: - ${{inputs.ci_type == 'pr'}} diff --git a/llm_on_ray/inference/models/hpu/neural-chat-7b-v3-3.yaml b/llm_on_ray/inference/models/hpu/neural-chat-7b-v3-3.yaml new file mode 100644 index 000000000..4e4229bc8 --- /dev/null +++ b/llm_on_ray/inference/models/hpu/neural-chat-7b-v3-3.yaml @@ -0,0 +1,26 @@ +port: 8000 +name: neural-chat-7b-v3-3 +route_prefix: /neural-chat-7b-v3-3 +num_replicas: 1 +cpus_per_worker: 0 +gpus_per_worker: 0 +hpus_per_worker: 1 +deepspeed: false +workers_per_group: 2 +device: "hpu" +ipex: + enabled: false + precision: bf16 +model_description: + model_id_or_path: Intel/neural-chat-7b-v3-3 + tokenizer_name_or_path: Intel/neural-chat-7b-v3-3 + chat_processor: ChatModelGptJ + prompt: + intro: '### System: + You are a chatbot developed by Intel. Please answer all questions to the best of your ability.' + human_id: ' + + ### User' + bot_id: ' + + ### Assistant' diff --git a/llm_on_ray/inference/models/sqlcoder-7b-2.yaml b/llm_on_ray/inference/models/sqlcoder-7b-2.yaml new file mode 100644 index 000000000..480453fd3 --- /dev/null +++ b/llm_on_ray/inference/models/sqlcoder-7b-2.yaml @@ -0,0 +1,22 @@ +port: 8000 +name: sqlcoder-7b-2 +route_prefix: /sqlcoder-7b-2 +cpus_per_worker: 22 +gpus_per_worker: 0 +deepspeed: false +workers_per_group: 2 +device: "cpu" +ipex: + enabled: false + precision: bf16 +model_description: + model_id_or_path: defog/sqlcoder-7b-2 + tokenizer_name_or_path: defog/sqlcoder-7b-2 + chat_processor: ChatModelLLama + prompt: + intro: '' + human_id: '' + bot_id: '' + stop_words: ["```"] + config: + use_auth_token: ''