fluesterpost/whisper_webservice_interfac...

from typing import Optional, Union, Dict, Any

import requests

from utils import convert_to_mp3


def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Optional[str] = None, language: Optional[str] = None,
                     initial_prompt: Optional[str] = None, encode: Optional[bool] = None,
                     output: Optional[str] = None, word_timestamps: Optional[bool] = None) -> tuple[int, str]:
    """
    Send a request to the ASR endpoint.
    Returns the text represented by the audio file if everything worked out,
    and a tuple of the form (status_code, response_text) otherwise
    """
    endpoint = f"{url}/asr"

    params = {
        "task": task,
        "language": language,
        "initial_prompt": initial_prompt,
        "encode": encode,
        "output": output,
        "word_timestamps": word_timestamps
    }

    params = {k: v for k, v in params.items() if v is not None}

    if isinstance(audio_file_path_or_bytes, str):
        with open(audio_file_path_or_bytes, 'rb') as f:
            audio_file = f.read()
    else:
        audio_file = convert_to_mp3(audio_file_path_or_bytes, sample_width=2, frame_rate=16000, channels=1)

    files = {
        'audio_file': audio_file
    }

    response = requests.post(endpoint, params=params, files=files)

    return response.status_code, response.text


def detect_language(url: str, audio_file_path: str, encode: Optional[bool] = None) -> Dict[str, Any] | tuple[int, str]:
    """
    Send a request to the Detect Language endpoint.
    Returns either a dictionary of the form {'detected_language': '<LANG>', 'language_code': '<LANG_CODE>'} if the request
    was successful, or a tuple of the form (status_code, response_text) otherwise.
    """
    endpoint = f"{url}/detect-language"

    params = {
        "encode": encode
    }

    params = {k: v for k, v in params.items() if v is not None}

    with open(audio_file_path, 'rb') as f:
        audio_file = f.read()

    files = {
        'audio_file': audio_file
    }

    response = requests.post(endpoint, params=params, files=files)

    if response.status_code == 200:
        return response.json()
    else:
        return response.status_code, response.text


# Example usage
def main():
    url = "http://127.0.0.1:9000"  # Replace with the actual URL of the webservice
    audio_file_path = "/run/media/yannik/IC RECORDER/REC_FILE/Interview01/231021_1541.mp3"

    response = send_asr_request(url, audio_file_path, task="transcribe", language="en")
    print(response)

    response = detect_language(url, audio_file_path)
    print(response)


if __name__ == "__main__":
    main()