Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sentiment analysis pipeline #168

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions runner/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ def load_pipeline(pipeline: str, model_id: str) -> any:
from app.pipelines.upscale import UpscalePipeline

return UpscalePipeline(model_id)
case "text-sentiment-analysis":
from app.pipelines.text_sentiment_analysis import TextSentimentAnalysisPipeline

return TextSentimentAnalysisPipeline(model_id)
case "segment-anything-2":
from app.pipelines.segment_anything_2 import SegmentAnything2Pipeline

Expand Down Expand Up @@ -84,6 +88,10 @@ def load_route(pipeline: str) -> any:
from app.routes import upscale

return upscale.router
case "text-sentiment-analysis":
from app.routes import text_sentiment_analysis

return text_sentiment_analysis.router
case "segment-anything-2":
from app.routes import segment_anything_2

Expand Down
34 changes: 34 additions & 0 deletions runner/app/pipelines/text_sentiment_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import logging
from app.pipelines.base import Pipeline
from app.pipelines.utils import get_model_dir, get_torch_device
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

logger = logging.getLogger(__name__)


class TextSentimentAnalysisPipeline(Pipeline):
def __init__(self, model_id: str):
self.model_id = model_id
kwargs = {}

torch_device = get_torch_device()
model = AutoModelForSequenceClassification.from_pretrained(
model_id,
cache_dir=get_model_dir(),
**kwargs,
).to(torch_device)

tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=get_model_dir())

self.ldm = pipeline(
"sentiment-analysis",
model=model,
tokenizer=tokenizer,
**kwargs,
)

def __call__(self, text: str, **kwargs):
return self.ldm(text, **kwargs)

def __str__(self) -> str:
return f"TextSentimentAnalysisPipeline model_id={self.model_id}"
105 changes: 105 additions & 0 deletions runner/app/routes/text_sentiment_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import logging
import os
from typing import Annotated

from app.dependencies import get_pipeline
from app.pipelines.base import Pipeline
from app.routes.util import HTTPError, TextSentimentAnalysisResponse, http_error
from fastapi import APIRouter, Depends, status
from fastapi.responses import JSONResponse
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from pydantic import Field

router = APIRouter()

logger = logging.getLogger(__name__)

RESPONSES = {
status.HTTP_200_OK: {
"content": {
"application/json": {
"schema": {
"x-speakeasy-name-override": "data",
}
}
},
},
status.HTTP_400_BAD_REQUEST: {"model": HTTPError},
status.HTTP_401_UNAUTHORIZED: {"model": HTTPError},
status.HTTP_500_INTERNAL_SERVER_ERROR: {"model": HTTPError},
}


def handle_pipeline_error(e: Exception) -> JSONResponse:
"""Handles exceptions raised during text processing.

Args:
e: The exception raised during text processing.

Returns:
A JSONResponse with the appropriate error message and status code.
"""
logger.error(f"Text classification processing error: {str(e)}")
status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
error_message = "Internal server error during text processing."

return JSONResponse(
status_code=status_code,
content=http_error(error_message),
)


@router.post(
"/text-sentiment-analysis",
response_model=TextSentimentAnalysisResponse,
responses=RESPONSES,
description="Analyze the sentiment of a given text inputs.",
operation_id="analyzeSentiment",
summary="Text Sentiment Analysis",
tags=["analysis"],
openapi_extra={"x-speakeasy-name-override": "textSentimentAnalysis"},
)
@router.post(
"/text-sentiment-analysis/",
response_model=TextSentimentAnalysisResponse,
responses=RESPONSES,
include_in_schema=False,
)
async def text_sentiment_analysis(
model_id: Annotated[
str,
Field(
default="",
description="Hugging Face model ID used for text classsification."
),
],
text_input: Annotated[
str,
Field(description="Text to analyze. For multiple sentences, separate them with commas.")
],
pipeline: Pipeline = Depends(get_pipeline),
token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)),
):
auth_token = os.environ.get("AUTH_TOKEN")
if auth_token:
if not token or token.credentials != auth_token:
return JSONResponse(
status_code=status.HTTP_401_UNAUTHORIZED,
headers={"WWW-Authenticate": "Bearer"},
content=http_error("Invalid bearer token"),
)

if model_id != "" and model_id != pipeline.model_id:
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content=http_error(
f"pipeline configured with {pipeline.model_id} but called with "
f"{model_id}"
),
)

try:
scores = pipeline(text=text_input.split(","))
return {"results": scores}
except Exception as e:
return handle_pipeline_error(e)
11 changes: 10 additions & 1 deletion runner/app/routes/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import io
import json
import os
from typing import List, Optional
from typing import List, Literal, Optional

import numpy as np
from fastapi import UploadFile
Expand Down Expand Up @@ -58,6 +58,15 @@ class TextResponse(BaseModel):
chunks: List[chunk] = Field(..., description="The generated text chunks.")


class LabelScore(BaseModel):
label: Literal["negative", "neutral", "positive"]
score: float


class TextSentimentAnalysisResponse(BaseModel):
results: list[LabelScore]


class APIError(BaseModel):
"""API error response model."""

Expand Down
3 changes: 3 additions & 0 deletions runner/dl_checkpoints.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ function download_beta_models() {
# Download audio-to-text models.
huggingface-cli download openai/whisper-large-v3 --include "*.safetensors" "*.json" --cache-dir models

# Download sentiment-analysis models.
huggingface-cli download cardiffnlp/twitter-roberta-base-sentiment-latest --include "*.json" "*.txt" "*.bin" --cache-dir models

printf "\nDownloading token-gated models...\n"

# Download image-to-video models (token-gated).
Expand Down
86 changes: 86 additions & 0 deletions runner/gateway.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,64 @@ paths:
security:
- HTTPBearer: []
x-speakeasy-name-override: segmentAnything2
/text-sentiment-analysis:
post:
tags:
- analysis
summary: Text Sentiment Analysis
description: Analyze the sentiment of a given text inputs.
operationId: analyzeSentiment
security:
- HTTPBearer: []
parameters:
- name: model_id
in: query
required: true
schema:
type: string
description: Hugging Face model ID used for text classsification.
title: Model Id
- name: text_input
in: query
required: true
schema:
type: string
description: Text to analyze. For multiple sentences, separate them with
commas.
title: Text Input
responses:
'200':
description: Successful Response
content:
application/json:
schema:
$ref: '#/components/schemas/TextSentimentAnalysisResponse'
x-speakeasy-name-override: data
'400':
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPError'
description: Bad Request
'401':
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPError'
description: Unauthorized
'500':
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPError'
description: Internal Server Error
'422':
description: Validation Error
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
x-speakeasy-name-override: textSentimentAnalysis
components:
schemas:
APIError:
Expand Down Expand Up @@ -586,6 +644,23 @@ components:
- images
title: ImageResponse
description: Response model for image generation.
LabelScore:
properties:
label:
type: string
enum:
- negative
- neutral
- positive
title: Label
score:
type: number
title: Score
type: object
required:
- label
- score
title: LabelScore
MasksResponse:
properties:
masks:
Expand Down Expand Up @@ -646,6 +721,17 @@ components:
- chunks
title: TextResponse
description: Response model for text generation.
TextSentimentAnalysisResponse:
properties:
results:
items:
$ref: '#/components/schemas/LabelScore'
type: array
title: Results
type: object
required:
- results
title: TextSentimentAnalysisResponse
TextToImageParams:
properties:
model_id:
Expand Down
4 changes: 3 additions & 1 deletion runner/gen_openapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
image_to_image,
image_to_video,
segment_anything_2,
text_sentiment_analysis,
text_to_image,
upscale,
)
Expand Down Expand Up @@ -123,7 +124,8 @@ def write_openapi(fname: str, entrypoint: str = "runner", version: str = "0.0.0"
app.include_router(upscale.router)
app.include_router(audio_to_text.router)
app.include_router(segment_anything_2.router)

app.include_router(text_sentiment_analysis.router)

logger.info(f"Generating OpenAPI schema for '{entrypoint}' entrypoint...")
openapi = get_openapi(
title="Livepeer AI Runner",
Expand Down
Loading