livepeer · benya7 · Aug 25, 2024 · Aug 25, 2024 · Aug 25, 2024 · Aug 27, 2024
@@ -50,6 +50,10 @@ def load_pipeline(pipeline: str, model_id: str) -> any:
             from app.pipelines.upscale import UpscalePipeline
 
             return UpscalePipeline(model_id)
+        case "text-sentiment-analysis":
+            from app.pipelines.text_sentiment_analysis import TextSentimentAnalysisPipeline
+
+            return TextSentimentAnalysisPipeline(model_id)
         case "segment-anything-2":
             from app.pipelines.segment_anything_2 import SegmentAnything2Pipeline
 
@@ -84,6 +88,10 @@ def load_route(pipeline: str) -> any:
             from app.routes import upscale
 
             return upscale.router
+        case "text-sentiment-analysis":
+            from app.routes import text_sentiment_analysis
+
+            return text_sentiment_analysis.router
         case "segment-anything-2":
             from app.routes import segment_anything_2
 

@@ -0,0 +1,34 @@
+import logging
+from app.pipelines.base import Pipeline
+from app.pipelines.utils import get_model_dir, get_torch_device
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+
+logger = logging.getLogger(__name__)
+
+
+class TextSentimentAnalysisPipeline(Pipeline):
+    def __init__(self, model_id: str):
+        self.model_id = model_id
+        kwargs = {}
+
+        torch_device = get_torch_device()
+        model = AutoModelForSequenceClassification.from_pretrained(
+            model_id,
+            cache_dir=get_model_dir(),
+            **kwargs,
+        ).to(torch_device)
+
+        tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=get_model_dir())
+
+        self.ldm = pipeline(
+            "sentiment-analysis",
+            model=model,
+            tokenizer=tokenizer,
+            **kwargs,
+        )
+
+    def __call__(self, text: str, **kwargs):
+        return self.ldm(text, **kwargs)
+
+    def __str__(self) -> str:
+        return f"TextSentimentAnalysisPipeline model_id={self.model_id}"
@@ -0,0 +1,105 @@
+import logging
+import os
+from typing import Annotated
+
+from app.dependencies import get_pipeline
+from app.pipelines.base import Pipeline
+from app.routes.util import HTTPError, TextSentimentAnalysisResponse, http_error
+from fastapi import APIRouter, Depends, status
+from fastapi.responses import JSONResponse
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from pydantic import Field
+
+router = APIRouter()
+
+logger = logging.getLogger(__name__)
+
+RESPONSES = {
+    status.HTTP_200_OK: {
+        "content": {
+            "application/json": {
+                "schema": {
+                    "x-speakeasy-name-override": "data",
+                }
+            }
+        },
+    },
+    status.HTTP_400_BAD_REQUEST: {"model": HTTPError},
+    status.HTTP_401_UNAUTHORIZED: {"model": HTTPError},
+    status.HTTP_500_INTERNAL_SERVER_ERROR: {"model": HTTPError},
+}
+
+
+def handle_pipeline_error(e: Exception) -> JSONResponse:
+    """Handles exceptions raised during text processing.
+
+    Args:
+        e: The exception raised during text processing.
+
+    Returns:
+        A JSONResponse with the appropriate error message and status code.
+    """
+    logger.error(f"Text classification processing error: {str(e)}")
+    status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
+    error_message = "Internal server error during text processing."
+
+    return JSONResponse(
+        status_code=status_code,
+        content=http_error(error_message),
+    )
+
+
+@router.post(
+    "/text-sentiment-analysis",
+    response_model=TextSentimentAnalysisResponse,
+    responses=RESPONSES,
+    description="Analyze the sentiment of a given text inputs.",
+    operation_id="analyzeSentiment",
+    summary="Text Sentiment Analysis",
+    tags=["analysis"],
+    openapi_extra={"x-speakeasy-name-override": "textSentimentAnalysis"},
+)
+@router.post(
+    "/text-sentiment-analysis/",
+    response_model=TextSentimentAnalysisResponse,
+    responses=RESPONSES,
+    include_in_schema=False,
+)
+async def text_sentiment_analysis(
+    model_id: Annotated[
+        str,
+        Field(
+            default="",
+            description="Hugging Face model ID used for text classsification."
+        ),
+    ],
+    text_input: Annotated[
+        str,
+        Field(description="Text to analyze. For multiple sentences, separate them with commas.")
+    ],
+    pipeline: Pipeline = Depends(get_pipeline),
+    token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)),
+):
+    auth_token = os.environ.get("AUTH_TOKEN")
+    if auth_token:
+        if not token or token.credentials != auth_token:
+            return JSONResponse(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                headers={"WWW-Authenticate": "Bearer"},
+                content=http_error("Invalid bearer token"),
+            )
+
+    if model_id != "" and model_id != pipeline.model_id:
+        return JSONResponse(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            content=http_error(
+                f"pipeline configured with {pipeline.model_id} but called with "
+                f"{model_id}"
+            ),
+        )
+
+    try:
+        scores = pipeline(text=text_input.split(","))
+        return {"results": scores}
+    except Exception as e:
+        return handle_pipeline_error(e)
@@ -2,7 +2,7 @@
 import io
 import json
 import os
-from typing import List, Optional
+from typing import List, Literal, Optional
 
 import numpy as np
 from fastapi import UploadFile
@@ -58,6 +58,15 @@ class TextResponse(BaseModel):
     chunks: List[chunk] = Field(..., description="The generated text chunks.")
 
 
+class LabelScore(BaseModel):
+    label: Literal["negative", "neutral", "positive"]
+    score: float
+
+
+class TextSentimentAnalysisResponse(BaseModel):
+    results: list[LabelScore]
+
+
 class APIError(BaseModel):
     """API error response model."""
 

@@ -35,6 +35,9 @@ function download_beta_models() {
     # Download audio-to-text models.
     huggingface-cli download openai/whisper-large-v3 --include "*.safetensors" "*.json" --cache-dir models
 
+    # Download sentiment-analysis models.
+    huggingface-cli download cardiffnlp/twitter-roberta-base-sentiment-latest --include "*.json" "*.txt" "*.bin" --cache-dir models
+
     printf "\nDownloading token-gated models...\n"
 
     # Download image-to-video models (token-gated).

@@ -303,6 +303,64 @@ paths:
       security:
       - HTTPBearer: []
       x-speakeasy-name-override: segmentAnything2
+  /text-sentiment-analysis:
+    post:
+      tags:
+      - analysis
+      summary: Text Sentiment Analysis
+      description: Analyze the sentiment of a given text inputs.
+      operationId: analyzeSentiment
+      security:
+      - HTTPBearer: []
+      parameters:
+      - name: model_id
+        in: query
+        required: true
+        schema:
+          type: string
+          description: Hugging Face model ID used for text classsification.
+          title: Model Id
+      - name: text_input
+        in: query
+        required: true
+        schema:
+          type: string
+          description: Text to analyze. For multiple sentences, separate them with
+            commas.
+          title: Text Input
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/TextSentimentAnalysisResponse'
+                x-speakeasy-name-override: data
+        '400':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPError'
+          description: Bad Request
+        '401':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPError'
+          description: Unauthorized
+        '500':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPError'
+          description: Internal Server Error
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+      x-speakeasy-name-override: textSentimentAnalysis
 components:
   schemas:
     APIError:
@@ -586,6 +644,23 @@ components:
       - images
       title: ImageResponse
       description: Response model for image generation.
+    LabelScore:
+      properties:
+        label:
+          type: string
+          enum:
+          - negative
+          - neutral
+          - positive
+          title: Label
+        score:
+          type: number
+          title: Score
+      type: object
+      required:
+      - label
+      - score
+      title: LabelScore
     MasksResponse:
       properties:
         masks:
@@ -646,6 +721,17 @@ components:
       - chunks
       title: TextResponse
       description: Response model for text generation.
+    TextSentimentAnalysisResponse:
+      properties:
+        results:
+          items:
+            $ref: '#/components/schemas/LabelScore'
+          type: array
+          title: Results
+      type: object
+      required:
+      - results
+      title: TextSentimentAnalysisResponse
     TextToImageParams:
       properties:
         model_id:

@@ -10,6 +10,7 @@
     image_to_image,
     image_to_video,
     segment_anything_2,
+    text_sentiment_analysis,
     text_to_image,
     upscale,
 )
@@ -123,7 +124,8 @@ def write_openapi(fname: str, entrypoint: str = "runner", version: str = "0.0.0"
     app.include_router(upscale.router)
     app.include_router(audio_to_text.router)
     app.include_router(segment_anything_2.router)
-
+    app.include_router(text_sentiment_analysis.router)
+
     logger.info(f"Generating OpenAPI schema for '{entrypoint}' entrypoint...")
     openapi = get_openapi(
         title="Livepeer AI Runner",