fluesterpost/whisper_webservice_interface.py

87 lines
2.6 KiB
Python

from typing import Optional, Union, Dict, Any
import requests
from utils import convert_to_mp3
def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Optional[str] = None, language: Optional[str] = None,
initial_prompt: Optional[str] = None, encode: Optional[bool] = None,
output: Optional[str] = None, word_timestamps: Optional[bool] = None) -> tuple[int, str]:
"""
Send a request to the ASR endpoint.
Returns the text represented by the audio file if everything worked out,
and a tuple of the form (status_code, response_text) otherwise
"""
endpoint = f"{url}/asr"
params = {
"task": task,
"language": language,
"initial_prompt": initial_prompt,
"encode": encode,
"output": output,
"word_timestamps": word_timestamps
}
params = {k: v for k, v in params.items() if v is not None}
if isinstance(audio_file_path_or_bytes, str):
with open(audio_file_path_or_bytes, 'rb') as f:
audio_file = f.read()
else:
audio_file = convert_to_mp3(audio_file_path_or_bytes, sample_width=2, frame_rate=16000, channels=1)
files = {
'audio_file': audio_file
}
response = requests.post(endpoint, params=params, files=files)
return response.status_code, response.text
def detect_language(url: str, audio_file_path: str, encode: Optional[bool] = None) -> Dict[str, Any] | tuple[int, str]:
"""
Send a request to the Detect Language endpoint.
Returns either a dictionary of the form {'detected_language': '<LANG>', 'language_code': '<LANG_CODE>'} if the request
was successful, or a tuple of the form (status_code, response_text) otherwise.
"""
endpoint = f"{url}/detect-language"
params = {
"encode": encode
}
params = {k: v for k, v in params.items() if v is not None}
with open(audio_file_path, 'rb') as f:
audio_file = f.read()
files = {
'audio_file': audio_file
}
response = requests.post(endpoint, params=params, files=files)
if response.status_code == 200:
return response.json()
else:
return response.status_code, response.text
# Example usage
def main():
url = "http://127.0.0.1:9000" # Replace with the actual URL of the webservice
audio_file_path = "/run/media/yannik/IC RECORDER/REC_FILE/Interview01/231021_1541.mp3"
response = send_asr_request(url, audio_file_path, task="transcribe", language="en")
print(response)
response = detect_language(url, audio_file_path)
print(response)
if __name__ == "__main__":
main()