[Inference] Added Two Examples for GenAI Application (intel#164)

* added two example for genAI application * add the ci test for newly-added models * remove the sqlcoder CI tests * refine the sqlcoder file and test its with CI * remove the sqlcoder CI
harborn · Apr 1, 2024 · 674b800 · 674b800
1 parent 6f47e75
commit 674b800
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 1 deletion.
diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
@@ -34,7 +34,7 @@ jobs:
     name: inference
     strategy:
       matrix:
-        model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, gemma-2b ]
+        model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, gemma-2b]
         isPR:
           - ${{inputs.ci_type == 'pr'}}
 

diff --git a/llm_on_ray/inference/models/hpu/neural-chat-7b-v3-3.yaml b/llm_on_ray/inference/models/hpu/neural-chat-7b-v3-3.yaml
@@ -0,0 +1,26 @@
+port: 8000
+name: neural-chat-7b-v3-3
+route_prefix: /neural-chat-7b-v3-3
+num_replicas: 1
+cpus_per_worker: 0
+gpus_per_worker: 0
+hpus_per_worker: 1
+deepspeed: false
+workers_per_group: 2
+device: "hpu"
+ipex:
+  enabled: false
+  precision: bf16
+model_description:  
+  model_id_or_path: Intel/neural-chat-7b-v3-3
+  tokenizer_name_or_path: Intel/neural-chat-7b-v3-3
+  chat_processor: ChatModelGptJ
+  prompt:
+    intro: '### System:
+      You are a chatbot developed by Intel. Please answer all questions to the best of your ability.'
+    human_id: '
+
+      ### User'
+    bot_id: '
+
+      ### Assistant'
diff --git a/llm_on_ray/inference/models/sqlcoder-7b-2.yaml b/llm_on_ray/inference/models/sqlcoder-7b-2.yaml
@@ -0,0 +1,22 @@
+port: 8000
+name: sqlcoder-7b-2
+route_prefix: /sqlcoder-7b-2
+cpus_per_worker: 22
+gpus_per_worker: 0
+deepspeed: false
+workers_per_group: 2
+device: "cpu"
+ipex:
+  enabled: false
+  precision: bf16
+model_description:
+  model_id_or_path: defog/sqlcoder-7b-2
+  tokenizer_name_or_path: defog/sqlcoder-7b-2
+  chat_processor: ChatModelLLama
+  prompt:
+    intro: ''
+    human_id: ''
+    bot_id: ''
+    stop_words: ["```"]
+  config:
+    use_auth_token: ''