Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Completed Extended File Support for Flashcard Generator #98

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ COPY requirements.txt /code/requirements.txt

RUN pip install --no-cache-dir -r /code/requirements.txt

COPY ./app /code/app
COPY ./app /code/app

COPY .env /code/.env

# Local development key set
# ENV TYPES: dev, production
Expand Down
49 changes: 36 additions & 13 deletions app/api/router.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query, Form
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from typing import Union
from typing import Union, List, Dict, Any, Optional
from app.services.schemas import ToolRequest, ChatRequest, Message, ChatResponse, ToolResponse
from app.utils.auth import key_check
from app.services.logger import setup_logger
from app.api.error_utilities import InputValidationError, ErrorResponse
from app.api.tool_utilities import load_tool_metadata, execute_tool, finalize_inputs
from app.features.dynamo.core import executor as dynamo_executor
import json

logger = setup_logger(__name__)
router = APIRouter()
Expand All @@ -16,22 +18,36 @@ def read_root():
return {"Hello": "World"}

@router.post("/submit-tool", response_model=Union[ToolResponse, ErrorResponse])
async def submit_tool( data: ToolRequest, _ = Depends(key_check)):
try:
# Unpack GenericRequest for tool data
request_data = data.tool_data
async def submit_tool(
data: Optional[str] = Form(""),
youtube_url: str = Form(""),
files: List[UploadFile] = File(None),
max_flashcards: int = Query(10),
_ = Depends(key_check)
):
try:
result = {}

if data:
request_data = tool_request.tool_data
requested_tool = load_tool_metadata(request_data.tool_id)
request_inputs_dict = finalize_inputs(request_data.inputs, requested_tool['inputs'])

requested_tool = load_tool_metadata(request_data.tool_id)
result = execute_tool(request_data.tool_id, request_inputs_dict)

request_inputs_dict = finalize_inputs(request_data.inputs, requested_tool['inputs'])

result = execute_tool(request_data.tool_id, request_inputs_dict)
# Handle additional features
if youtube_url or files:
try:
flashcards = dynamo_executor(youtube_url=youtube_url, files=files, verbose=True, max_flashcards=max_flashcards)
result['flashcards'] = flashcards
except Exception as e:
logger.error(f"Error processing content: {e}")
raise HTTPException(status_code=500, detail="Failed to process content.")

return ToolResponse(data=result)

except InputValidationError as e:
logger.error(f"InputValidationError: {e}")

return JSONResponse(
status_code=400,
content=jsonable_encoder(ErrorResponse(status=400, message=e.message))
Expand All @@ -43,9 +59,16 @@ async def submit_tool( data: ToolRequest, _ = Depends(key_check)):
status_code=e.status_code,
content=jsonable_encoder(ErrorResponse(status=e.status_code, message=e.detail))
)

except Exception as e:
logger.error(f"Error processing request: {e}")
return JSONResponse(
status_code=500,
content=jsonable_encoder(ErrorResponse(status=500, message="Internal Server Error"))
)

@router.post("/chat", response_model=ChatResponse)
async def chat( request: ChatRequest, _ = Depends(key_check) ):
async def chat(request: ChatRequest, _ = Depends(key_check)):
from app.features.Kaichat.core import executor as kaichat_executor

user_name = request.user.fullName
Expand All @@ -60,4 +83,4 @@ async def chat( request: ChatRequest, _ = Depends(key_check) ):
payload={"text": response}
)

return ChatResponse(data=[formatted_response])
return ChatResponse(data=[formatted_response])
62 changes: 48 additions & 14 deletions app/features/dynamo/core.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,55 @@
from app.features.dynamo.tools import summarize_transcript, generate_flashcards
from fastapi import UploadFile
from app.services.logger import setup_logger
from app.api.error_utilities import VideoTranscriptError
from app.features.dynamo.tools import get_loader, summarize_transcript, generate_flashcards, summarize_documents

logger = setup_logger(__name__)

def executor(youtube_url: str, verbose=False):
summary = summarize_transcript(youtube_url, verbose=verbose)
flashcards = generate_flashcards(summary)

def executor(youtube_url: str = None, files: list[UploadFile] = None, verbose=False, max_flashcards=10):
sanitized_flashcards = []
for flashcard in flashcards:
if 'concept' in flashcard and 'definition' in flashcard:
sanitized_flashcards.append({
"concept": flashcard['concept'],
"definition": flashcard['definition']
})
else:
logger.warning(f"Malformed flashcard skipped: {flashcard}")

return sanitized_flashcards
if youtube_url:
try:
logger.info(f"Processing YouTube URL: {youtube_url}")
summary = summarize_transcript(youtube_url, verbose=verbose)
logger.info(f"Summary for YouTube URL: {summary}")
flashcards = generate_flashcards(summary, max_flashcards=max_flashcards, verbose=verbose)
for flashcard in flashcards:
if 'concept' in flashcard and 'definition' in flashcard:
sanitized_flashcards.append({
"concept": flashcard['concept'],
"definition": flashcard['definition']
})
else:
logger.warning(f"Malformed flashcard skipped: {flashcard}")
except VideoTranscriptError as e:
logger.error(f"Error in processing YouTube URL -> {e}")
raise ValueError(f"Error in processing YouTube URL: {e}")
except Exception as e:
logger.error(f"Error in executor: {e}")
raise ValueError(f"Error in executor: {e}")

if files:
for file in files:
try:
logger.info(f"Processing file: {file.filename}")
loader_class = get_loader(file)

# 문서 로드
loader = loader_class([file])
documents = loader.load()
logger.info(f"Documents loaded: {documents}")

# 문서 요약
summary = summarize_documents(documents)
logger.info(f"Summary for file {file.filename}: {summary}")

# 플래시카드 생성
flashcards = generate_flashcards(summary, verbose=verbose, max_flashcards=max_flashcards)
sanitized_flashcards.extend(flashcards[:max_flashcards])
except Exception as e:
logger.error(f"Error in processing {file.filename} -> {e}")
raise ValueError(f"Error in processing {file.filename}: {e}")

return sanitized_flashcards

Empty file.
25 changes: 25 additions & 0 deletions app/features/dynamo/loaders/csv_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import csv
from fastapi import UploadFile
from langchain_core.documents import Document

class CSVLoader:
def __init__(self, files: list[UploadFile]):
self.files = files

def load(self) -> list:
documents = []

for upload_file in self.files:
full_text = []
with upload_file.file as csv_file:
csv_file.seek(0)
text = csv_file.read().decode('utf-8')
reader = csv.reader(text.splitlines())
for row in reader:
full_text.append(", ".join(row))
content = "\n".join(full_text)
metadata = {"source": upload_file.filename}
doc = Document(page_content=content, metadata=metadata)
documents.append(doc)

return documents
22 changes: 22 additions & 0 deletions app/features/dynamo/loaders/docx_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import docx
from fastapi import UploadFile
from langchain_core.documents import Document

class DOCXLoader:
def __init__(self, files: list[UploadFile]):
self.files = files

def load(self) -> list:
documents = []

for upload_file in self.files:
doc = docx.Document(upload_file.file)
full_text = []
for para in doc.paragraphs:
full_text.append(para.text)
content = "\n".join(full_text)
metadata = {"source": upload_file.filename}
doc = Document(page_content=content, metadata=metadata)
documents.append(doc)

return documents
21 changes: 21 additions & 0 deletions app/features/dynamo/loaders/pdf_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pypdf import PdfReader
from fastapi import UploadFile
from langchain_core.documents import Document

class PDFLoader:
def __init__(self, files: list[UploadFile]):
self.files = files

def load(self) -> list:
documents = []

for upload_file in self.files:
with upload_file.file as pdf_file:
pdf_reader = PdfReader(pdf_file)
for i, page in enumerate(pdf_reader.pages):
page_content = page.extract_text()
metadata = {"source": upload_file.filename, "page_number": i + 1}
doc = Document(page_content=page_content, metadata=metadata)
documents.append(doc)

return documents
24 changes: 24 additions & 0 deletions app/features/dynamo/loaders/pptx_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from pptx import Presentation
from fastapi import UploadFile
from langchain_core.documents import Document

class PPTXLoader:
def __init__(self, files: list[UploadFile]):
self.files = files

def load(self) -> list:
documents = []

for upload_file in self.files:
prs = Presentation(upload_file.file)
full_text = []
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
full_text.append(shape.text)
content = "\n".join(full_text)
metadata = {"source": upload_file.filename}
doc = Document(page_content=content, metadata=metadata)
documents.append(doc)

return documents
23 changes: 23 additions & 0 deletions app/features/dynamo/loaders/xlsx_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import openpyxl
from fastapi import UploadFile
from langchain_core.documents import Document

class XLSXLoader:
def __init__(self, files: list[UploadFile]):
self.files = files

def load(self) -> list:
documents = []

for upload_file in self.files:
full_text = []
wb = openpyxl.load_workbook(upload_file.file)
for sheet in wb.worksheets:
for row in sheet.iter_rows(values_only=True):
full_text.append(", ".join([str(cell) for cell in row]))
content = "\n".join(full_text)
metadata = {"source": upload_file.filename}
doc = Document(page_content=content, metadata=metadata)
documents.append(doc)

return documents
14 changes: 14 additions & 0 deletions app/features/dynamo/loaders/youtube_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from langchain_community.document_loaders import YoutubeLoader
from app.api.error_utilities import VideoTranscriptError

class YoutubeTranscriptLoader:
def __init__(self, url: str):
self.url = url

def load(self) -> list:
try:
loader = YoutubeLoader.from_youtube_url(self.url, add_video_info=True)
docs = loader.load()
except Exception as e:
raise VideoTranscriptError(f"No video found or failed to load transcript: {e}")
return docs
27 changes: 26 additions & 1 deletion app/features/dynamo/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,36 @@
"label": "Youtube URL",
"name": "youtube_url",
"type": "string"
},
{
"label": "PDF File",
"name": "pdf_files",
"type": "file"
},
{
"label": "DOCX File",
"name": "docx_files",
"type": "file"
},
{
"label": "PPTX File",
"name": "pptx_files",
"type": "file"
},
{
"label": "XLSX File",
"name": "xlsx_files",
"type": "file"
},
{
"label": "CSV File",
"name": "csv_files",
"type": "file"
}
],
"models": {
"Gemini 1.0": "gemini-1.0-pro",
"Gemini 1.5 Flash": "gemini-1.5-flash",
"Gemini 1.5 Pro": "gemini-1.5-pro"
}
}
}
Loading