92 lines
2.7 KiB
Python
92 lines
2.7 KiB
Python
import io
|
|
import threading
|
|
|
|
import numpy as np
|
|
from faster_whisper import WhisperModel
|
|
import faster_whisper
|
|
from typing import Literal, Iterable, Tuple
|
|
|
|
_model: WhisperModel | None = None
|
|
|
|
ModelSize = Literal["tiny", "base", "small", "medium", "large-v1", "large-v2"]
|
|
Device = Literal["cuda", "cpu", "auto"]
|
|
ComputeType = Literal["8bit", "16bit", "32bit"]
|
|
|
|
|
|
def set_model(size: ModelSize, device: Device): #, compute_type: ComputeType):
|
|
|
|
'''
|
|
compute = None
|
|
if compute_type == '8bit':
|
|
if device == 'cuda' or device == 'auto':
|
|
compute = 'int8_float16'
|
|
else:
|
|
compute = 'int8'
|
|
elif compute_type == '16bit':
|
|
if device == 'cuda' or device == 'auto':
|
|
compute = 'int8'
|
|
else:
|
|
raise Exception("Cannot do 16 bit computing on CPU")
|
|
elif compute_type == '32bit':
|
|
compute = 'float'
|
|
else:
|
|
raise Exception(f"Invalid Compute / Device configuration (device {device} with {compute_type})")
|
|
'''
|
|
|
|
global _model
|
|
_model = WhisperModel(size, device=device)
|
|
|
|
|
|
def unload_model():
|
|
if not is_model_loaded():
|
|
return
|
|
|
|
global _model
|
|
_model = None # TODO: check if this works
|
|
|
|
def is_model_loaded() -> bool:
|
|
global _model
|
|
return _model is not None
|
|
|
|
|
|
def transcribe_from_i16_audio(audio: bytes) -> Tuple[Iterable[
|
|
faster_whisper.transcribe.Segment], faster_whisper.transcribe.TranscriptionInfo] | None:
|
|
"""
|
|
Transcribe audio from an MP3 file.
|
|
Note that this can - and will - crash if you don't catch exceptions.
|
|
|
|
If the model isn't loaded yet, this will return None.
|
|
Otherwise, it will return the raw transcription from `faster-whisper`.
|
|
"""
|
|
if not is_model_loaded():
|
|
return None
|
|
|
|
data = np.frombuffer(audio, dtype=np.int16)
|
|
|
|
# Convert s16 to f32.
|
|
data = data.astype(np.float32) / 32768.0
|
|
|
|
global _model
|
|
segments, info = _model.transcribe(data, beam_size=5)
|
|
# transcribe, and throw all exceptions to application to handle
|
|
|
|
return segments, info
|
|
|
|
|
|
def transcribe_from_file(mp3_path: str) -> Tuple[Iterable[faster_whisper.transcribe.Segment], faster_whisper.transcribe.TranscriptionInfo] | None:
|
|
"""
|
|
Transcribe audio from an MP3 file.
|
|
Note that this can - and will - crash if you don't catch exceptions.
|
|
|
|
If the model isn't loaded yet, this will return None.
|
|
Otherwise, it will return the raw transcription from `faster-whisper`.
|
|
"""
|
|
if not is_model_loaded():
|
|
return None
|
|
|
|
global _model
|
|
segments, info = _model.transcribe(mp3_path, beam_size=5)
|
|
# transcribe, and throw all exceptions to application to handle
|
|
|
|
return segments, info
|