livepeer · eliteprox · Aug 15, 2024 · Aug 17, 2024 · Sep 4, 2024 · Sep 4, 2024
@@ -15,6 +15,8 @@
 
 MODEL_INCOMPATIBLE_EXTENSIONS = {
     "openai/whisper-large-v3": ["mp4", "m4a", "ac3"],
+    "openai/whisper-medium": ["mp4", "m4a", "ac3"],
+    "distil-whisper/distil-large-v3": ["mp4", "m4a", "ac3"]
 }
 
 
@@ -40,7 +42,14 @@ def __init__(self, model_id: str):
             kwargs["torch_dtype"] = torch.float16
             kwargs["variant"] = "fp16"
 
-        if os.environ.get("BFLOAT16"):
+        float16_enabled = os.getenv("FLOAT16", "").strip().lower() == "true"
+        bfloat16_enabled = os.getenv("BFLOAT16", "").strip().lower() == "true"
+
+        if float16_enabled:
+            logger.info("AudioToTextPipeline using float16 precision for %s", model_id)
+            kwargs["torch_dtype"] = torch.float16
+
+        if bfloat16_enabled:
             logger.info("AudioToTextPipeline using bfloat16 precision for %s", model_id)
             kwargs["torch_dtype"] = torch.bfloat16
 
@@ -75,7 +84,7 @@ def __call__(self, audio: UploadFile, **kwargs) -> List[File]:
             audio_converter = AudioConverter()
             converted_bytes = audio_converter.convert(audio, "mp3")
             audio_converter.write_bytes_to_file(converted_bytes, audio)
-
+        
         return self.ldm(audio.file.read(), **kwargs)
 
     def __str__(self) -> str: