Merge branch 'comments' into 'main'

Backend commentiert + etwas aufgeräumt See merge request kicc/mucgpt!14
it-at-m · Jun 25, 2024 · b85de64 · b85de64
2 parents 840503f + 19b98ac
commit b85de64
Show file tree

Hide file tree

Showing 28 changed files with 474 additions and 87,953 deletions.
diff --git a/.gitignore b/.gitignore
@@ -143,4 +143,7 @@ cython_debug/
 # NPM
 npm-debug.log*
 node_modules
-static/
+static/
+
+# notebooks
+notebooks
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -2,15 +2,14 @@
 import logging
 import os
 import time
-from typing import AsyncGenerator, cast
-import csv
-import io
+from typing import cast
 from azure.monitor.opentelemetry import configure_azure_monitor
 from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor
 from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
 from quart import (
     Blueprint,
     Quart,
+    Request,
     current_app,
     jsonify,
     make_response,
@@ -20,11 +19,11 @@
 )
 from init_app import initApp
 from core.modelhelper import num_tokens_from_message
-from core.types.Chunk import Chunk
-from core.datahelper import Requestinfo
 from core.authentification import AuthentificationHelper, AuthError
 from core.types.AppConfig import AppConfig
-from core.types.SupportedModels import SupportedModels
+from core.types.countresult import CountResult
+from core.helper import format_as_ndjson
+
 bp = Blueprint("routes", __name__, static_folder='static')
 
 APPCONFIG_KEY = "APPCONFIG"
@@ -66,7 +65,7 @@ async def sum():
         text = request_json["text"] if file is None else None
         splits = impl.split(detaillevel=detaillevel, file=file, text=text)
 
-        r = await impl.run(splits = splits, department=department, language=request_json["language"] or "Deutsch")
+        r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch")
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /sum")
@@ -83,7 +82,7 @@ async def brainstorm():
 
     try:
         impl = cfg["brainstorm_approaches"]
-        r = await impl.run(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department)
+        r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department)
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /brainstorm")
@@ -129,12 +128,12 @@ async def chat():
         max_tokens=request_json['max_tokens'] or 4096
         system_message = request_json['system_message'] or None
         history =  request_json["history"]
-        response = impl.run_without_streaming(history= history,
+        chatResult = impl.run_without_streaming(history= history,
                                                     temperature=temperature,
                                                     max_tokens=max_tokens,
                                                     system_message=system_message,
                                                     department= department)
-        return jsonify({"content": response})
+        return jsonify(chatResult)
     except Exception as e:
         logging.exception("Exception in /chat")
         return jsonify({"error": str(e)}), 500
@@ -165,25 +164,14 @@ async def counttokens():
     request_json = await request.get_json()
     message=request_json['text'] or ""
     counted_tokens = num_tokens_from_message(message,model)
-    return jsonify({
-        "count": counted_tokens
-    })
+    return jsonify(CountResult(count=counted_tokens))
 
 @bp.route("/statistics/export", methods=["GET"])
 async def getStatisticsCSV():
     cfg = get_config_and_authentificate()
     repo = cfg["repository"]
-
-    memfile = io.StringIO()
-    outcsv = csv.writer(memfile, delimiter=',',quotechar='"', quoting = csv.QUOTE_MINIMAL)
-    outcsv.writerow([column.name for column in Requestinfo.__mapper__.columns])
-    [outcsv.writerow([getattr(curr, column.name) for column in Requestinfo.__mapper__.columns]) for curr in repo.getAll()]
-
-    memfile.seek(0)
-    # Das StringIO-Objekt in ein BytesIO-Objekt umwandeln
-    memfile_bytesio = io.BytesIO(memfile.getvalue().encode())
-    memfile_bytesio.getvalue()
-    return await send_file(memfile_bytesio,
+    export = repo.export()
+    return await send_file(export,
                  attachment_filename='statistics.csv',
                  as_attachment=True)
 
@@ -200,14 +188,14 @@ def get_config_and_authentificate():
         ensure_authentification(request=request)
     return cfg
 
-def ensure_authentification(request: request):
+def ensure_authentification(request: Request):
     cfg = get_config()
     ssoaccesstoken = request.headers.get("X-Ms-Token-Lhmsso-Access-Token")
     auth_client : AuthentificationHelper = cfg["authentification_client"]
     claims = auth_client.authentificate(ssoaccesstoken)
     return auth_client,claims
 
-def get_department(request: request):
+def get_department(request: Request):
     cfg = get_config()
     if cfg["configuration_features"]["backend"]["enable_auth"]:
         ssoidtoken = request.headers.get('X-Ms-Token-Lhmsso-Id-Token')
@@ -219,16 +207,6 @@ def get_department(request: request):
 
 
 
-async def format_as_ndjson(r: AsyncGenerator[Chunk, None]) -> AsyncGenerator[str, None]:
-    try:
-        async for event in r:
-            yield json.dumps(event, ensure_ascii=False) + "\n"
-    except Exception as e:
-        logging.exception("Exception while generating response stream: %s", e)
-        msg = "Momentan liegt eine starke Auslastung vor. Bitte in einigen Sekunden erneut versuchen." if "Rate limit" in str(e) else str(e)
-        yield json.dumps(Chunk(type="E", message=msg))
-
-
 @bp.before_request
 async def ensure_openai_token():
     cfg = get_config()

diff --git a/app/backend/approaches/__init__.py → app/backend/brainstorm/__init__.py b/app/backend/approaches/__init__.py → app/backend/brainstorm/__init__.py
diff --git a/app/backend/approaches/brainstorm.py → app/backend/brainstorm/brainstorm.py b/app/backend/approaches/brainstorm.py → app/backend/brainstorm/brainstorm.py
@@ -5,6 +5,7 @@
 from langchain_community.callbacks import get_openai_callback
 from langchain_core.runnables.base import RunnableSerializable
 
+from brainstorm.brainstormresult import BrainstormResult
 from core.types.LlmConfigs import LlmConfigs
 from core.datahelper import Repository
 from core.types.Config import ApproachConfig
@@ -13,7 +14,7 @@
 
 class Brainstorm():
     """
-    Simple brainstorm implementation. One shot generation of certain markdown files
+    Simple brainstorm implementation. One shot generation of certain markdown files. Translates the result into a target language.
     """
     user_mindmap_prompt = """
       In a markdown file (MD) plan out a mind map on the topic {topic}. Follow the format in this example. Write it in a code snippet I can copy from directly. Provide only code, no description. Include the exact format I used below.
@@ -62,17 +63,39 @@ def __init__(self, llm: RunnableSerializable, config: ApproachConfig, model_info
         self.repo = repo
 
     def getBrainstormPrompt(self) -> PromptTemplate:
+        """Returns the brainstrom prompt template
+        Returns:
+            PromptTemplate: prompt template, has an input variable topic
+        """
         return PromptTemplate(input_variables=["topic"], template=self.user_mindmap_prompt)
 
     def getTranslationPrompt(self) -> PromptTemplate:
+        """Returns the translation prompt, translates the output of the brainstorming to a given langugage
+
+        Returns:
+            PromptTemplate: prompt template for translation, uses the target language and the brainstorm output as an input
+        """
         return PromptTemplate(input_variables=["language", "brainstorm"], template=self.user_translate_prompt)
 
 
-    async def run(self, topic: str, language: str, department: Optional[str]) -> Any:
+    async def brainstorm(self, topic: str, language: str, department: Optional[str]) -> BrainstormResult:
+        """Generates ideas for a given topic structured in markdown, translates the result into the target language 
+
+        Args:
+            topic (str): topic of the brainstorming
+            language (str): target language
+            department (Optional[str]): department, who is responsible for the call
+
+        Returns:
+            BrainstormResult: the structured markdown with ideas about the topic
+        """
+        # configure
         config: LlmConfigs = {
             "llm_api_key": self.model_info["openai_api_key"]
         }
         llm = self.llm.with_config(configurable=config)
+
+        # construct chains
         brainstormChain = LLMChain(llm=llm, prompt=self.getBrainstormPrompt(), output_key="brainstorm")
         translationChain = LLMChain(llm=llm, prompt=self.getTranslationPrompt(), output_key="translation")
         overall_chain = SequentialChain(
@@ -85,20 +108,29 @@ async def run(self, topic: str, language: str, department: Optional[str]) -> Any
             result = await overall_chain.acall({"topic": topic, "language": language})
         total_tokens = cb.total_tokens
 
-        chat_translate_result = result['translation']     
-        #Falls Erklärungen um das Markdown außen rum existieren.
-        if("```" in str(chat_translate_result)):
-            splitted = str(chat_translate_result).split("```")
-            if(len(splitted) == 3):
-                chat_translate_result = splitted[1]
+        translation = result['translation']     
+        translation = self.cleanup(str(translation))
 
-
         if self.config["log_tokens"]:
             self.repo.addInfo(Requestinfo( 
                 tokencount = total_tokens,
                 department = department,
                 messagecount=  1,
                 method = "Brainstorm"))
+        return BrainstormResult(answer=translation)
+
+    def cleanup(self, chat_translate_result: str) -> str:
+        """removes leading explanation
+
+        Args:
+            chat_translate_result (str): the result of the brainstorming, sometimes has leading explanation before the markdown part
 
-        return {"answer": chat_translate_result} 
+        Returns:
+            str: _description_ the result of the brainstorming without leading explanations
+        """
+        if("```" in chat_translate_result):
+            splitted = str(chat_translate_result).split("```")
+            if(len(splitted) == 3):
+                chat_translate_result = splitted[1]
+        return chat_translate_result
 
diff --git a/app/backend/brainstorm/brainstormresult.py b/app/backend/brainstorm/brainstormresult.py
@@ -0,0 +1,5 @@
+from typing import TypedDict
+
+
+class BrainstormResult(TypedDict):
+    answer: str
diff --git a/app/backend/chat/__init__.py b/app/backend/chat/__init__.py