feat: implemented live recording transcription
This commit is contained in:
parent
9121ad819d
commit
d721eb3a5b
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
/build
|
||||||
|
/dist
|
||||||
|
/__pycache__
|
3
.idea/.gitignore
vendored
Normal file
3
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
12
.idea/fluesterpost.iml
Normal file
12
.idea/fluesterpost.iml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="PyDocumentationSettings">
|
||||||
|
<option name="format" value="PLAIN" />
|
||||||
|
<option name="myDocStringFormat" value="Plain" />
|
||||||
|
</component>
|
||||||
|
</module>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
7
.idea/misc.xml
Normal file
7
.idea/misc.xml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Black">
|
||||||
|
<option name="sdkName" value="Poetry (fluesterpost)" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (fluesterpost)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/fluesterpost.iml" filepath="$PROJECT_DIR$/.idea/fluesterpost.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
Binary file not shown.
BIN
__pycache__/whisper_webservice_interface.cpython-311.pyc
Normal file
BIN
__pycache__/whisper_webservice_interface.cpython-311.pyc
Normal file
Binary file not shown.
468
main.py
468
main.py
@ -1,6 +1,10 @@
|
|||||||
import os
|
import os
|
||||||
import pprint
|
import pprint
|
||||||
import traceback
|
import traceback
|
||||||
|
import typing
|
||||||
|
|
||||||
|
import requests.exceptions
|
||||||
|
import validators
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
import flet as ft
|
import flet as ft
|
||||||
@ -10,30 +14,154 @@ from typing import DefaultDict
|
|||||||
import pygame
|
import pygame
|
||||||
|
|
||||||
import nn_model_manager as mm
|
import nn_model_manager as mm
|
||||||
|
import whisper_webservice_interface
|
||||||
|
|
||||||
|
import wave
|
||||||
|
import sys
|
||||||
|
import pyaudio
|
||||||
|
|
||||||
|
|
||||||
|
# === TEMP ===
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.basicConfig()
|
||||||
|
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
# === END ===
|
||||||
|
|
||||||
|
|
||||||
|
# globals
|
||||||
|
transcribe_ready: bool = False
|
||||||
|
recording: bool = False
|
||||||
|
rec_stream: pyaudio.Stream | None = None
|
||||||
|
sound_chunks = []
|
||||||
|
recorded_audio = []
|
||||||
|
|
||||||
|
# AUDIO stuff
|
||||||
|
REC_CHUNK = 1024
|
||||||
|
REC_FORMAT = pyaudio.paInt16
|
||||||
|
REC_CHANNELS = 1
|
||||||
|
REC_RATE = 16000
|
||||||
|
REC_RECORD_SECONDS = 5
|
||||||
|
|
||||||
|
|
||||||
def main(page):
|
def main(page):
|
||||||
pygame.mixer.init()
|
pygame.mixer.init()
|
||||||
|
|
||||||
first_name = ft.Ref[ft.TextField]()
|
# get audio device names
|
||||||
last_name = ft.Ref[ft.TextField]()
|
p = pyaudio.PyAudio()
|
||||||
greetings = ft.Ref[ft.Column]()
|
|
||||||
|
capture_devices = [(i, p.get_device_info_by_index(i)['name']) for i in range(p.get_device_count()) if
|
||||||
|
p.get_device_info_by_index(i)['maxInputChannels'] > 0]
|
||||||
|
|
||||||
|
record_button = ft.Ref[ft.IconButton]()
|
||||||
|
mic_select = ft.Ref[ft.Dropdown]()
|
||||||
|
|
||||||
file_tree = ft.Ref[ft.Column]()
|
file_tree = ft.Ref[ft.Column]()
|
||||||
file_tree_empty_text = ft.Ref[ft.Text]()
|
file_tree_empty_text = ft.Ref[ft.Text]()
|
||||||
|
|
||||||
load_model_text = ft.Ref[ft.Text]()
|
# mode select
|
||||||
|
current_mode_select = ft.Ref[ft.Dropdown]()
|
||||||
|
current_mode_info_text = ft.Ref[ft.Text]()
|
||||||
|
processing_spinner = ft.Ref[ft.ProgressRing]()
|
||||||
|
|
||||||
|
# local model mode
|
||||||
model_size_select = ft.Ref[ft.Dropdown]()
|
model_size_select = ft.Ref[ft.Dropdown]()
|
||||||
model_device_select = ft.Ref[ft.Dropdown]()
|
model_device_select = ft.Ref[ft.Dropdown]()
|
||||||
# model_bits_select = ft.Ref[ft.Dropdown]()
|
# model_bits_select = ft.Ref[ft.Dropdown]()
|
||||||
model_load_unload_button = ft.Ref[ft.IconButton]()
|
model_load_unload_button = ft.Ref[ft.IconButton]()
|
||||||
model_loading_spinner = ft.Ref[ft.ProgressRing]()
|
|
||||||
|
# docker whisper webservice mode
|
||||||
|
whisper_webservice_url_input = ft.Ref[ft.TextField]()
|
||||||
|
|
||||||
transcribe_buttons: list[ft.Ref[ft.IconButton]] = []
|
transcribe_buttons: list[ft.Ref[ft.IconButton]] = []
|
||||||
|
|
||||||
output_text_container = ft.Ref[ft.Container]()
|
output_text_container = ft.Ref[ft.Container]()
|
||||||
output_text_col = ft.Ref[ft.Column]()
|
output_text_col = ft.Ref[ft.Column]()
|
||||||
|
|
||||||
|
def transcribe(fileOrBytes: str | bytes):
|
||||||
|
print(f"DEBUG: trying to transcribe audio {fileOrBytes if isinstance(fileOrBytes, str) else f'with len {len(fileOrBytes)}'}")
|
||||||
|
|
||||||
|
# === LOCAL MODEL CODE ===
|
||||||
|
if current_mode_select.current.value == 'local':
|
||||||
|
if not mm.is_model_loaded() or (isinstance(fileOrBytes, str) and not fileOrBytes.endswith('.mp3')):
|
||||||
|
print("DEBUG: can't transcribe a non-MP3 file or while no model is loaded")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"DEBUG: starting transcription")
|
||||||
|
output_text_container.current.alignment = ft.alignment.center
|
||||||
|
output_text_col.current.controls = [ft.ProgressRing()]
|
||||||
|
|
||||||
|
# set all transcribe buttons to disabled
|
||||||
|
for btn in transcribe_buttons:
|
||||||
|
btn.current.disabled = True
|
||||||
|
page.update()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if isinstance(fileOrBytes, str):
|
||||||
|
segments, info = mm.transcribe_from_file(fileOrBytes)
|
||||||
|
else:
|
||||||
|
segments, info = mm.transcribe_from_i16_audio(fileOrBytes)
|
||||||
|
|
||||||
|
txt = ''
|
||||||
|
|
||||||
|
for seg in segments:
|
||||||
|
txt += seg.text + '\n'
|
||||||
|
|
||||||
|
output_text_container.current.alignment = ft.alignment.top_left
|
||||||
|
output_text_col.current.controls = [ft.Text(txt, selectable=True)] # TODO
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
output_text_container.current.alignment = ft.alignment.center
|
||||||
|
output_text_col.current.controls = [ft.Text(f"Transcribing failed: {str(e)}")] # TODO
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# set all transcribe buttons to disabled
|
||||||
|
for btn in transcribe_buttons:
|
||||||
|
btn.current.disabled = False
|
||||||
|
page.update()
|
||||||
|
|
||||||
|
# === WEBSERVICE MODE CODE ===
|
||||||
|
elif current_mode_select.current.value == 'webservice':
|
||||||
|
url = whisper_webservice_url_input.current.value
|
||||||
|
print(f"DEBUG: starting web transcription")
|
||||||
|
if validators.url(url, simple_host=True):
|
||||||
|
|
||||||
|
output_text_container.current.alignment = ft.alignment.center
|
||||||
|
output_text_col.current.controls = [ft.ProgressRing()]
|
||||||
|
# set all transcribe buttons to disabled
|
||||||
|
for btn in transcribe_buttons:
|
||||||
|
btn.current.disabled = True
|
||||||
|
page.update()
|
||||||
|
|
||||||
|
try:
|
||||||
|
print(f'DEBUG: sending web request...')
|
||||||
|
code, text = whisper_webservice_interface.send_asr_request(url, fileOrBytes, task="transcribe")
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
output_text_container.current.alignment = ft.alignment.center
|
||||||
|
print(f'web transcription failed: {str(e)}')
|
||||||
|
output_text_col.current.controls = \
|
||||||
|
[ft.Text(f"HTTP Request to {url}/asr failed. Reason:\n{str(e)}")]
|
||||||
|
# set all transcribe buttons to enabled
|
||||||
|
for btn in transcribe_buttons:
|
||||||
|
btn.current.disabled = False
|
||||||
|
page.update()
|
||||||
|
return
|
||||||
|
|
||||||
|
# set all transcribe buttons to enabled
|
||||||
|
for btn in transcribe_buttons:
|
||||||
|
btn.current.disabled = False
|
||||||
|
|
||||||
|
if code == 200:
|
||||||
|
output_text_container.current.alignment = ft.alignment.top_left
|
||||||
|
output_text_col.current.controls = [ft.Text(text, selectable=True)]
|
||||||
|
else:
|
||||||
|
output_text_container.current.alignment = ft.alignment.center
|
||||||
|
output_text_col.current.controls = \
|
||||||
|
[ft.Text(f"HTTP Request to {url}/asr failed ({code}):\n{text}")]
|
||||||
|
|
||||||
|
page.update()
|
||||||
|
|
||||||
def generate_file_tree(path: str, tree_dict: dict | DefaultDict):
|
def generate_file_tree(path: str, tree_dict: dict | DefaultDict):
|
||||||
if path[-1] == os.sep:
|
if path[-1] == os.sep:
|
||||||
path = path[:-1]
|
path = path[:-1]
|
||||||
@ -99,47 +227,17 @@ def main(page):
|
|||||||
_button_ref = ft.Ref[ft.IconButton]()
|
_button_ref = ft.Ref[ft.IconButton]()
|
||||||
|
|
||||||
control.append(ft.IconButton(icon=ft.icons.PLAY_CIRCLE_OUTLINED, ref=_button_ref,
|
control.append(ft.IconButton(icon=ft.icons.PLAY_CIRCLE_OUTLINED, ref=_button_ref,
|
||||||
on_click=lambda _, f=full_file_path, r=_button_ref: start_playing(f, r)))
|
on_click=lambda _, f=full_file_path, r=_button_ref: start_playing(f, r)))
|
||||||
|
|
||||||
def transcribe(filepath: str):
|
|
||||||
print(f"DEBUG: trying to transcribe file {filepath}")
|
|
||||||
if not mm.is_model_loaded() or not filepath.endswith('.mp3'):
|
|
||||||
return
|
|
||||||
|
|
||||||
print(f"DEBUG: starting transcription")
|
|
||||||
output_text_container.current.alignment = ft.alignment.center
|
|
||||||
output_text_col.current.controls = [ft.ProgressRing()]
|
|
||||||
|
|
||||||
# set all transcribe buttons to disabled
|
|
||||||
for btn in transcribe_buttons:
|
|
||||||
btn.current.disabled = True
|
|
||||||
page.update()
|
|
||||||
|
|
||||||
try:
|
|
||||||
segments, info = mm.transcribe_from_file(filepath)
|
|
||||||
|
|
||||||
txt = ''
|
|
||||||
|
|
||||||
for seg in segments:
|
|
||||||
txt += seg.text + '\n'
|
|
||||||
|
|
||||||
output_text_container.current.alignment = ft.alignment.top_left
|
|
||||||
output_text_col.current.controls = [ft.Text(txt, selectable=True)] # TODO
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
output_text_container.current.alignment = ft.alignment.center
|
|
||||||
output_text_col.current.controls = [ft.Text(f"Transcribing failed: {str(e)}")] # TODO
|
|
||||||
|
|
||||||
finally:
|
|
||||||
# set all transcribe buttons to disabled
|
|
||||||
for btn in transcribe_buttons:
|
|
||||||
btn.current.disabled = False
|
|
||||||
page.update()
|
|
||||||
|
|
||||||
|
|
||||||
transcribe_button_ref = ft.Ref[ft.IconButton]()
|
transcribe_button_ref = ft.Ref[ft.IconButton]()
|
||||||
|
|
||||||
control.append(ft.IconButton(icon=ft.icons.FORMAT_ALIGN_LEFT, disabled=not mm.is_model_loaded(), ref=transcribe_button_ref,
|
# check enabled
|
||||||
|
enabled = (current_mode_select.current.value == 'local' and mm.is_model_loaded()) or (
|
||||||
|
current_mode_select.current.value == 'webservice' and
|
||||||
|
validators.url(whisper_webservice_url_input.current.value, simple_host=True))
|
||||||
|
|
||||||
|
control.append(ft.IconButton(icon=ft.icons.FORMAT_ALIGN_LEFT, disabled=not enabled,
|
||||||
|
ref=transcribe_button_ref,
|
||||||
on_click=lambda _, f=full_file_path: transcribe(f)))
|
on_click=lambda _, f=full_file_path: transcribe(f)))
|
||||||
|
|
||||||
transcribe_buttons.append(transcribe_button_ref)
|
transcribe_buttons.append(transcribe_button_ref)
|
||||||
@ -155,15 +253,6 @@ def main(page):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
def btn_click(e):
|
|
||||||
greetings.current.controls.append(
|
|
||||||
ft.Text(f"Hello, {first_name.current.value} {last_name.current.value}!")
|
|
||||||
)
|
|
||||||
first_name.current.value = ""
|
|
||||||
last_name.current.value = ""
|
|
||||||
page.update()
|
|
||||||
first_name.current.focus()
|
|
||||||
|
|
||||||
def on_dialog_result(e: ft.FilePickerResultEvent):
|
def on_dialog_result(e: ft.FilePickerResultEvent):
|
||||||
path = e.path
|
path = e.path
|
||||||
if path:
|
if path:
|
||||||
@ -183,16 +272,56 @@ def main(page):
|
|||||||
except e:
|
except e:
|
||||||
print("didn't work aaa") # TODO: fix
|
print("didn't work aaa") # TODO: fix
|
||||||
|
|
||||||
def load_model():
|
def mode_select():
|
||||||
|
global transcribe_ready
|
||||||
|
if mm.is_model_loaded():
|
||||||
|
print("BUG: cannot change mode while model is loaded!")
|
||||||
|
return
|
||||||
|
|
||||||
load_model_text.current.value = 'Loading... This may take a while.'
|
next_mode = current_mode_select.current.value
|
||||||
|
if next_mode == 'local':
|
||||||
|
# enable model selects & loads
|
||||||
|
model_size_select.current.visible = True
|
||||||
|
model_device_select.current.visible = True
|
||||||
|
model_load_unload_button.current.visible = True
|
||||||
|
model_size_select.current.disabled = False
|
||||||
|
model_device_select.current.disabled = False
|
||||||
|
|
||||||
|
whisper_webservice_url_input.current.visible = False
|
||||||
|
|
||||||
|
for btn in transcribe_buttons:
|
||||||
|
btn.current.disabled = True
|
||||||
|
|
||||||
|
set_transcribe_ready(False)
|
||||||
|
|
||||||
|
elif next_mode == 'webservice':
|
||||||
|
# enable model selects & loads
|
||||||
|
model_size_select.current.visible = False
|
||||||
|
model_device_select.current.visible = False
|
||||||
|
model_load_unload_button.current.visible = False
|
||||||
|
model_size_select.current.disabled = True
|
||||||
|
model_device_select.current.disabled = True
|
||||||
|
model_load_unload_button.current.disabled = True
|
||||||
|
current_mode_info_text.current.value = 'Input the URL of the onerahmet/openai-whisper-asr-webservice docker container'
|
||||||
|
|
||||||
|
whisper_webservice_url_input.current.visible = True
|
||||||
|
whisper_webservice_url_input.current.disabled = False
|
||||||
|
|
||||||
|
on_url_input(None)
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise Exception(f'BUG: Impossible mode {next_mode} received!')
|
||||||
|
|
||||||
model_size_select.current.disabled = True
|
|
||||||
model_device_select.current.disabled = True
|
|
||||||
# model_bits_select.current.disabled = True
|
|
||||||
model_load_unload_button.current.disabled = True
|
|
||||||
model_loading_spinner.current.visible = True
|
|
||||||
page.update()
|
page.update()
|
||||||
|
page.client_storage.set('selected_mode', next_mode)
|
||||||
|
|
||||||
|
def load_model():
|
||||||
|
current_mode_info_text.current.value = 'Loading... This may take a while.'
|
||||||
|
|
||||||
|
page.update()
|
||||||
|
|
||||||
|
paralyze_ui()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
mm.set_model(
|
mm.set_model(
|
||||||
@ -203,55 +332,148 @@ def main(page):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"loading model failed. Exception: {str(e)}")
|
print(f"loading model failed. Exception: {str(e)}")
|
||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
load_model_text.current.value = f'Loading failed. Reason:\n{str(e)}'
|
current_mode_info_text.current.value = f'Loading failed. Reason:\n{str(e)}'
|
||||||
model_size_select.current.disabled = False
|
set_transcribe_ready(False)
|
||||||
model_device_select.current.disabled = False
|
|
||||||
# model_bits_select.current.disabled = False
|
|
||||||
|
|
||||||
# raise e
|
# raise e
|
||||||
|
|
||||||
model_loading_spinner.current.visible = False
|
processing_spinner.current.visible = False
|
||||||
model_load_unload_button.current.disabled = False
|
|
||||||
|
|
||||||
if mm.is_model_loaded():
|
if mm.is_model_loaded():
|
||||||
load_model_text.current.value = f'Loaded.'
|
current_mode_info_text.current.value = f'Loaded.'
|
||||||
model_load_unload_button.current.icon = ft.icons.CLOSE
|
|
||||||
model_load_unload_button.current.on_click = lambda _: unload_model()
|
|
||||||
|
|
||||||
# if successful, save to shared preferences
|
# if successful, save to shared preferences
|
||||||
page.client_storage.set('model_size', model_size_select.current.value)
|
page.client_storage.set('model_size', model_size_select.current.value)
|
||||||
page.client_storage.set('device_select', model_device_select.current.value)
|
page.client_storage.set('device_select', model_device_select.current.value)
|
||||||
|
|
||||||
# set all transcribe buttons to enabled
|
# set all transcribe buttons to enabled
|
||||||
for btn in transcribe_buttons:
|
set_transcribe_ready(True)
|
||||||
btn.current.disabled = False
|
else:
|
||||||
|
set_transcribe_ready(False)
|
||||||
page.update()
|
|
||||||
|
|
||||||
def unload_model():
|
def unload_model():
|
||||||
model_load_unload_button.current.disabled = True
|
|
||||||
|
|
||||||
# set all transcribe buttons to disabled
|
# set all transcribe buttons to disabled
|
||||||
for btn in transcribe_buttons:
|
paralyze_ui()
|
||||||
btn.current.disabled = True
|
|
||||||
|
|
||||||
page.update()
|
|
||||||
|
|
||||||
if mm.is_model_loaded():
|
if mm.is_model_loaded():
|
||||||
mm.unload_model()
|
mm.unload_model()
|
||||||
|
|
||||||
load_model_text.current.value = 'Select parameters, and then load transcription model.'
|
set_transcribe_ready(False)
|
||||||
model_size_select.current.disabled = False
|
|
||||||
model_device_select.current.disabled = False
|
def paralyze_ui():
|
||||||
# model_bits_select.current.disabled = False
|
model_size_select.current.disabled = True
|
||||||
|
model_device_select.current.disabled = True
|
||||||
|
# model_bits_select.current.disabled = True
|
||||||
|
model_load_unload_button.current.disabled = True
|
||||||
|
processing_spinner.current.visible = True
|
||||||
|
current_mode_select.current.disabled = True
|
||||||
|
|
||||||
|
model_load_unload_button.current.icon = ft.icons.CLOSE
|
||||||
model_load_unload_button.current.disabled = False
|
model_load_unload_button.current.disabled = False
|
||||||
model_load_unload_button.current.icon = ft.icons.START
|
for btn in transcribe_buttons:
|
||||||
model_load_unload_button.current.on_click = lambda _: load_model()
|
btn.current.disabled = True
|
||||||
model_loading_spinner.current.visible = False
|
model_load_unload_button.current.disabled = True
|
||||||
page.update()
|
page.update()
|
||||||
|
|
||||||
|
def set_transcribe_ready(rdy: bool):
|
||||||
|
global transcribe_ready
|
||||||
|
transcribe_ready = rdy
|
||||||
|
|
||||||
|
if transcribe_ready:
|
||||||
|
for btn in transcribe_buttons:
|
||||||
|
btn.current.disabled = False
|
||||||
|
model_size_select.current.disabled = True
|
||||||
|
model_device_select.current.disabled = True
|
||||||
|
# model_bits_select.current.disabled = True
|
||||||
|
model_load_unload_button.current.disabled = True
|
||||||
|
processing_spinner.current.visible = False
|
||||||
|
model_load_unload_button.current.on_click = lambda _: unload_model()
|
||||||
|
|
||||||
|
model_load_unload_button.current.icon = ft.icons.CLOSE
|
||||||
|
model_load_unload_button.current.disabled = False
|
||||||
|
|
||||||
|
if mm.is_model_loaded():
|
||||||
|
current_mode_select.current.disabled = True
|
||||||
|
else:
|
||||||
|
for btn in transcribe_buttons:
|
||||||
|
btn.current.disabled = True
|
||||||
|
model_size_select.current.disabled = False
|
||||||
|
model_device_select.current.disabled = False
|
||||||
|
# model_bits_select.current.disabled = False
|
||||||
|
model_load_unload_button.current.disabled = False
|
||||||
|
model_load_unload_button.current.icon = ft.icons.START
|
||||||
|
model_load_unload_button.current.on_click = lambda _: load_model()
|
||||||
|
processing_spinner.current.visible = False
|
||||||
|
current_mode_select.current.disabled = False
|
||||||
|
|
||||||
|
page.update()
|
||||||
|
|
||||||
|
def on_url_input(e):
|
||||||
|
url_value = whisper_webservice_url_input.current.value
|
||||||
|
# print(url_value)
|
||||||
|
|
||||||
|
if validators.url(url_value, simple_host=True):
|
||||||
|
# print('valid')
|
||||||
|
page.client_storage.set('webservice_url', url_value)
|
||||||
|
# set all transcribe buttons to enabled
|
||||||
|
set_transcribe_ready(True)
|
||||||
|
else:
|
||||||
|
# print('invalid')
|
||||||
|
# set all transcribe buttons to disabled
|
||||||
|
set_transcribe_ready(False)
|
||||||
|
|
||||||
|
page.update()
|
||||||
|
|
||||||
|
print(tuple(page.client_storage.get('selected_mic')))
|
||||||
|
|
||||||
|
def toggle_recording():
|
||||||
|
global recording
|
||||||
|
global rec_stream
|
||||||
|
global sound_chunks
|
||||||
|
global recorded_audio
|
||||||
|
|
||||||
|
if recording:
|
||||||
|
print("Stopping recording...")
|
||||||
|
|
||||||
|
rec_stream.stop_stream()
|
||||||
|
|
||||||
|
while not rec_stream.is_stopped():
|
||||||
|
pass # wait until stopped
|
||||||
|
|
||||||
|
recorded_audio = b"".join(sound_chunks)
|
||||||
|
|
||||||
|
transcribe(recorded_audio)
|
||||||
|
|
||||||
|
recording = False
|
||||||
|
|
||||||
|
# sound = pygame.mixer.Sound(buffer=recorded_audio) # doesn't work because sampling rate is wrong
|
||||||
|
|
||||||
|
print("playing back recorded sound")
|
||||||
|
# sound.play()
|
||||||
|
else:
|
||||||
|
print("Starting Recording...")
|
||||||
|
recording = True
|
||||||
|
|
||||||
|
sound_chunks = []
|
||||||
|
|
||||||
|
def cb(in_data, _frame_count, _time_info, _status):
|
||||||
|
sound_chunks.append(in_data)
|
||||||
|
print(_time_info)
|
||||||
|
return in_data, pyaudio.paContinue
|
||||||
|
|
||||||
|
rec_stream = p.open(
|
||||||
|
format=REC_FORMAT,
|
||||||
|
channels=REC_CHANNELS,
|
||||||
|
rate=REC_RATE,
|
||||||
|
input=True,
|
||||||
|
frames_per_buffer=REC_CHUNK,
|
||||||
|
stream_callback=cb
|
||||||
|
)
|
||||||
|
|
||||||
|
rec_stream.start_stream()
|
||||||
|
|
||||||
|
def find_recordingdevice_tuple_by_name(search_name: str) -> typing.Tuple[int, str] | None:
|
||||||
|
return next(((device_id, name) for device_id, name in capture_devices if name == search_name))
|
||||||
|
|
||||||
# set up file picker
|
# set up file picker
|
||||||
file_picker = ft.FilePicker(on_result=on_dialog_result)
|
file_picker = ft.FilePicker(on_result=on_dialog_result)
|
||||||
@ -263,11 +485,29 @@ def main(page):
|
|||||||
ft.Divider()
|
ft.Divider()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mode = page.client_storage.get('selected_mode') if page.client_storage.contains_key('selected_mode') else 'local'
|
||||||
|
|
||||||
page.add(
|
page.add(
|
||||||
ft.ResponsiveRow([
|
ft.ResponsiveRow([
|
||||||
ft.Container(
|
ft.Container(
|
||||||
ft.Column([
|
ft.Column([
|
||||||
ft.ElevatedButton("Add Folder", on_click=lambda _: file_picker.get_directory_path()),
|
ft.Row([
|
||||||
|
ft.ElevatedButton("Add Folder", on_click=lambda _: file_picker.get_directory_path()),
|
||||||
|
ft.Container(expand=True),
|
||||||
|
ft.IconButton(ft.icons.RECORD_VOICE_OVER, ref=record_button,
|
||||||
|
on_click=lambda _: toggle_recording()),
|
||||||
|
]),
|
||||||
|
ft.Dropdown(
|
||||||
|
ref=mic_select,
|
||||||
|
options=[ft.dropdown.Option(x[1]) for x in capture_devices],
|
||||||
|
value=page.client_storage.get('selected_mic')[1] if (
|
||||||
|
page.client_storage.contains_key('selected_mic') and tuple(
|
||||||
|
page.client_storage.get('selected_mic')) in capture_devices) else capture_devices[0][1],
|
||||||
|
height=36,
|
||||||
|
content_padding=2,
|
||||||
|
on_change=lambda _: page.client_storage.set('selected_mic', find_recordingdevice_tuple_by_name(
|
||||||
|
mic_select.current.value)) if mic_select.current.value else None
|
||||||
|
),
|
||||||
ft.Column(ref=file_tree, scroll=ft.ScrollMode.ALWAYS, expand=True),
|
ft.Column(ref=file_tree, scroll=ft.ScrollMode.ALWAYS, expand=True),
|
||||||
# ft.ListView(ref=file_tree),
|
# ft.ListView(ref=file_tree),
|
||||||
ft.Text("No Folder Open Yet", style=ft.TextTheme.body_small, color="grey",
|
ft.Text("No Folder Open Yet", style=ft.TextTheme.body_small, color="grey",
|
||||||
@ -275,21 +515,44 @@ def main(page):
|
|||||||
], expand=True), expand=True, col=4),
|
], expand=True), expand=True, col=4),
|
||||||
ft.Container(expand=True, content=ft.Column(expand=True, controls=[
|
ft.Container(expand=True, content=ft.Column(expand=True, controls=[
|
||||||
ft.Column([
|
ft.Column([
|
||||||
ft.Text('Select parameters, and then load transcription model.', ref=load_model_text),
|
ft.Text(
|
||||||
|
'Select parameters, and then load transcription model.'
|
||||||
|
if mode == 'local'
|
||||||
|
else 'Input the URL of the onerahmet/openai-whisper-asr-webservice docker container'
|
||||||
|
, ref=current_mode_info_text),
|
||||||
ft.Row([
|
ft.Row([
|
||||||
|
ft.Dropdown(
|
||||||
|
ref=current_mode_select,
|
||||||
|
width=160,
|
||||||
|
hint_text='mode',
|
||||||
|
value=mode,
|
||||||
|
on_change=lambda _: mode_select(),
|
||||||
|
options=[
|
||||||
|
ft.dropdown.Option('local'),
|
||||||
|
ft.dropdown.Option('webservice'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
|
||||||
|
# === LOCAL MODE ===
|
||||||
ft.Dropdown(
|
ft.Dropdown(
|
||||||
ref=model_size_select,
|
ref=model_size_select,
|
||||||
width=100,
|
width=100,
|
||||||
hint_text='model size',
|
hint_text='model size',
|
||||||
value=page.client_storage.get('model_size') if page.client_storage.contains_key('model_size') else 'base',
|
value=page.client_storage.get('model_size') if page.client_storage.contains_key(
|
||||||
options=[ft.dropdown.Option(x) for x in mm.ModelSize.__args__], # __args__ is not perfect here. But works.
|
'model_size') else 'base',
|
||||||
|
options=[ft.dropdown.Option(x) for x in mm.ModelSize.__args__],
|
||||||
|
# __args__ is not perfect here. But works.
|
||||||
|
visible=mode == 'local',
|
||||||
),
|
),
|
||||||
ft.Dropdown(
|
ft.Dropdown(
|
||||||
ref=model_device_select,
|
ref=model_device_select,
|
||||||
width=100,
|
width=100,
|
||||||
hint_text='device',
|
hint_text='device',
|
||||||
value=page.client_storage.get('device_select') if page.client_storage.contains_key('device_select') else 'auto',
|
value=page.client_storage.get('device_select') if page.client_storage.contains_key(
|
||||||
options=[ft.dropdown.Option(x) for x in mm.Device.__args__] # __args__ is not perfect here. But works.
|
'device_select') else 'auto',
|
||||||
|
options=[ft.dropdown.Option(x) for x in mm.Device.__args__],
|
||||||
|
visible=mode == 'local',
|
||||||
|
# __args__ is not perfect here. But works.
|
||||||
),
|
),
|
||||||
# ft.Dropdown(
|
# ft.Dropdown(
|
||||||
# ref=model_bits_select,
|
# ref=model_bits_select,
|
||||||
@ -297,13 +560,26 @@ def main(page):
|
|||||||
# hint_text='bits',
|
# hint_text='bits',
|
||||||
# value='16bit',
|
# value='16bit',
|
||||||
# options=[ft.dropdown.Option(x) for x in mm.ComputeType.__args__] # __args__ is not perfect here. But works.
|
# options=[ft.dropdown.Option(x) for x in mm.ComputeType.__args__] # __args__ is not perfect here. But works.
|
||||||
#),
|
# ),
|
||||||
ft.IconButton(
|
ft.IconButton(
|
||||||
icon=ft.icons.START,
|
icon=ft.icons.START,
|
||||||
ref=model_load_unload_button,
|
ref=model_load_unload_button,
|
||||||
on_click=lambda _: load_model(),
|
on_click=lambda _: load_model(),
|
||||||
|
visible=mode == 'local',
|
||||||
),
|
),
|
||||||
ft.ProgressRing(ref=model_loading_spinner, visible=False)
|
# === WEBSERVICE MODE ===
|
||||||
|
ft.TextField(
|
||||||
|
ref=whisper_webservice_url_input,
|
||||||
|
visible=mode == 'webservice',
|
||||||
|
on_change=on_url_input,
|
||||||
|
hint_text='e.g. http://localhost:9000',
|
||||||
|
value=page.client_storage.get('webservice_url') if page.client_storage.contains_key(
|
||||||
|
'webservice_url') else '',
|
||||||
|
),
|
||||||
|
# TODO: question mark hint button about what the web service is
|
||||||
|
|
||||||
|
# === GENERAL ===
|
||||||
|
ft.ProgressRing(ref=processing_spinner, visible=False)
|
||||||
])
|
])
|
||||||
]),
|
]),
|
||||||
ft.Container(expand=True, padding=12, border=ft.border.all(2, 'grey'),
|
ft.Container(expand=True, padding=12, border=ft.border.all(2, 'grey'),
|
||||||
@ -311,9 +587,9 @@ def main(page):
|
|||||||
ref=output_text_container,
|
ref=output_text_container,
|
||||||
content=ft.Column(
|
content=ft.Column(
|
||||||
[ft.Text('Nothing to see here!', text_align=ft.TextAlign.CENTER)],
|
[ft.Text('Nothing to see here!', text_align=ft.TextAlign.CENTER)],
|
||||||
ref=output_text_col,
|
ref=output_text_col,
|
||||||
expand=True,
|
expand=True,
|
||||||
scroll=ft.ScrollMode.ADAPTIVE)),
|
scroll=ft.ScrollMode.ADAPTIVE)),
|
||||||
]), col=8)
|
]), col=8)
|
||||||
], expand=True),
|
], expand=True),
|
||||||
)
|
)
|
||||||
|
37
main.spec
Normal file
37
main.spec
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
# -*- mode: python ; coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
|
a = Analysis(
|
||||||
|
['main.py'],
|
||||||
|
pathex=[],
|
||||||
|
binaries=[],
|
||||||
|
datas=[],
|
||||||
|
hiddenimports=[],
|
||||||
|
hookspath=[],
|
||||||
|
hooksconfig={},
|
||||||
|
runtime_hooks=[],
|
||||||
|
excludes=[],
|
||||||
|
noarchive=False,
|
||||||
|
)
|
||||||
|
pyz = PYZ(a.pure)
|
||||||
|
|
||||||
|
exe = EXE(
|
||||||
|
pyz,
|
||||||
|
a.scripts,
|
||||||
|
a.binaries,
|
||||||
|
a.datas,
|
||||||
|
[],
|
||||||
|
name='main',
|
||||||
|
debug=False,
|
||||||
|
bootloader_ignore_signals=False,
|
||||||
|
strip=False,
|
||||||
|
upx=True,
|
||||||
|
upx_exclude=[],
|
||||||
|
runtime_tmpdir=None,
|
||||||
|
console=False,
|
||||||
|
disable_windowed_traceback=False,
|
||||||
|
argv_emulation=False,
|
||||||
|
target_arch=None,
|
||||||
|
codesign_identity=None,
|
||||||
|
entitlements_file=None,
|
||||||
|
)
|
@ -1,5 +1,7 @@
|
|||||||
|
import io
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
import faster_whisper
|
import faster_whisper
|
||||||
from typing import Literal, Iterable, Tuple
|
from typing import Literal, Iterable, Tuple
|
||||||
@ -47,6 +49,30 @@ def is_model_loaded() -> bool:
|
|||||||
return _model is not None
|
return _model is not None
|
||||||
|
|
||||||
|
|
||||||
|
def transcribe_from_i16_audio(audio: bytes) -> Tuple[Iterable[
|
||||||
|
faster_whisper.transcribe.Segment], faster_whisper.transcribe.TranscriptionInfo] | None:
|
||||||
|
"""
|
||||||
|
Transcribe audio from an MP3 file.
|
||||||
|
Note that this can - and will - crash if you don't catch exceptions.
|
||||||
|
|
||||||
|
If the model isn't loaded yet, this will return None.
|
||||||
|
Otherwise, it will return the raw transcription from `faster-whisper`.
|
||||||
|
"""
|
||||||
|
if not is_model_loaded():
|
||||||
|
return None
|
||||||
|
|
||||||
|
data = np.frombuffer(audio, dtype=np.int16)
|
||||||
|
|
||||||
|
# Convert s16 to f32.
|
||||||
|
data = data.astype(np.float32) / 32768.0
|
||||||
|
|
||||||
|
global _model
|
||||||
|
segments, info = _model.transcribe(data, beam_size=5)
|
||||||
|
# transcribe, and throw all exceptions to application to handle
|
||||||
|
|
||||||
|
return segments, info
|
||||||
|
|
||||||
|
|
||||||
def transcribe_from_file(mp3_path: str) -> Tuple[Iterable[faster_whisper.transcribe.Segment], faster_whisper.transcribe.TranscriptionInfo] | None:
|
def transcribe_from_file(mp3_path: str) -> Tuple[Iterable[faster_whisper.transcribe.Segment], faster_whisper.transcribe.TranscriptionInfo] | None:
|
||||||
"""
|
"""
|
||||||
Transcribe audio from an MP3 file.
|
Transcribe audio from an MP3 file.
|
||||||
|
7
openapitools.json
Normal file
7
openapitools.json
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"$schema": "./node_modules/@openapitools/openapi-generator-cli/config.schema.json",
|
||||||
|
"spaces": 2,
|
||||||
|
"generator-cli": {
|
||||||
|
"version": "7.0.1"
|
||||||
|
}
|
||||||
|
}
|
178
poetry.lock
generated
178
poetry.lock
generated
@ -1,5 +1,17 @@
|
|||||||
# This file is automatically @generated by Poetry and should not be changed by hand.
|
# This file is automatically @generated by Poetry and should not be changed by hand.
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "altgraph"
|
||||||
|
version = "0.17.4"
|
||||||
|
description = "Python graph (network) package"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "altgraph-0.17.4-py2.py3-none-any.whl", hash = "sha256:642743b4750de17e655e6711601b077bc6598dbfa3ba5fa2b2a35ce12b508dff"},
|
||||||
|
{file = "altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "annotated-types"
|
name = "annotated-types"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
@ -681,6 +693,21 @@ files = [
|
|||||||
{file = "lit-17.0.3.tar.gz", hash = "sha256:e6049032462be1e2928686cbd4a6cc5b3c545d83ecd078737fe79412c1f3fcc1"},
|
{file = "lit-17.0.3.tar.gz", hash = "sha256:e6049032462be1e2928686cbd4a6cc5b3c545d83ecd078737fe79412c1f3fcc1"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "macholib"
|
||||||
|
version = "1.16.3"
|
||||||
|
description = "Mach-O header analysis and editing"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "macholib-1.16.3-py2.py3-none-any.whl", hash = "sha256:0e315d7583d38b8c77e815b1ecbdbf504a8258d8b3e17b61165c6feb60d18f2c"},
|
||||||
|
{file = "macholib-1.16.3.tar.gz", hash = "sha256:07ae9e15e8e4cd9a788013d81f5908b3609aa76f9b1421bae9c4d7606ec86a30"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
altgraph = ">=0.17"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "markupsafe"
|
name = "markupsafe"
|
||||||
version = "2.1.3"
|
version = "2.1.3"
|
||||||
@ -1075,6 +1102,18 @@ files = [
|
|||||||
{file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"},
|
{file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pefile"
|
||||||
|
version = "2023.2.7"
|
||||||
|
description = "Python PE parsing module"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6.0"
|
||||||
|
files = [
|
||||||
|
{file = "pefile-2023.2.7-py3-none-any.whl", hash = "sha256:da185cd2af68c08a6cd4481f7325ed600a88f6a813bad9dea07ab3ef73d8d8d6"},
|
||||||
|
{file = "pefile-2023.2.7.tar.gz", hash = "sha256:82e6114004b3d6911c77c3953e3838654b04511b8b66e8583db70c65998017dc"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "plumbum"
|
name = "plumbum"
|
||||||
version = "1.8.2"
|
version = "1.8.2"
|
||||||
@ -1133,6 +1172,30 @@ files = [
|
|||||||
{file = "protobuf-4.24.4.tar.gz", hash = "sha256:5a70731910cd9104762161719c3d883c960151eea077134458503723b60e3667"},
|
{file = "protobuf-4.24.4.tar.gz", hash = "sha256:5a70731910cd9104762161719c3d883c960151eea077134458503723b60e3667"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyaudio"
|
||||||
|
version = "0.2.13"
|
||||||
|
description = "Cross-platform audio I/O with PortAudio"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "PyAudio-0.2.13-cp310-cp310-win32.whl", hash = "sha256:48e29537ea22ae2ae323eebe297bfb2683831cee4f20d96964e131f65ab2161d"},
|
||||||
|
{file = "PyAudio-0.2.13-cp310-cp310-win_amd64.whl", hash = "sha256:87137cfd0ef8608a2a383be3f6996f59505e322dab9d16531f14cf542fa294f1"},
|
||||||
|
{file = "PyAudio-0.2.13-cp311-cp311-win32.whl", hash = "sha256:13915faaa780e6bbbb6d745ef0e761674fd461b1b1b3f9c1f57042a534bfc0c3"},
|
||||||
|
{file = "PyAudio-0.2.13-cp311-cp311-win_amd64.whl", hash = "sha256:59cc3cc5211b729c7854e3989058a145872cc58b1a7b46c6d4d88448a343d890"},
|
||||||
|
{file = "PyAudio-0.2.13-cp37-cp37m-win32.whl", hash = "sha256:d294e3f85b2238649b1ff49ce3412459a8a312569975a89d14646536362d7576"},
|
||||||
|
{file = "PyAudio-0.2.13-cp37-cp37m-win_amd64.whl", hash = "sha256:ff7f5e44ef51fe61da1e09c6f632f0b5808198edd61b363855cc7dd03bf4a8ac"},
|
||||||
|
{file = "PyAudio-0.2.13-cp38-cp38-win32.whl", hash = "sha256:c6b302b048c054b7463936d8ba884b73877dc47012f3c94665dba92dd658ae04"},
|
||||||
|
{file = "PyAudio-0.2.13-cp38-cp38-win_amd64.whl", hash = "sha256:1505d766ee718df6f5a18b73ac42307ba1cb4d2c0397873159254a34f67515d6"},
|
||||||
|
{file = "PyAudio-0.2.13-cp39-cp39-win32.whl", hash = "sha256:eb128e4a6ea9b98d9a31f33c44978885af27dbe8ae53d665f8790cbfe045517e"},
|
||||||
|
{file = "PyAudio-0.2.13-cp39-cp39-win_amd64.whl", hash = "sha256:910ef09225cce227adbba92622d4a3e3c8375117f7dd64039f287d9ffc0e02a1"},
|
||||||
|
{file = "PyAudio-0.2.13.tar.gz", hash = "sha256:26bccc81e4243d1c0ff5487e6b481de6329fcd65c79365c267cef38f363a2b56"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
test = ["numpy"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pydantic"
|
name = "pydantic"
|
||||||
version = "2.4.2"
|
version = "2.4.2"
|
||||||
@ -1354,6 +1417,52 @@ files = [
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
plugins = ["importlib-metadata"]
|
plugins = ["importlib-metadata"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyinstaller"
|
||||||
|
version = "6.1.0"
|
||||||
|
description = "PyInstaller bundles a Python application and all its dependencies into a single package."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "<3.13,>=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:da78942d31c1911ea4abcd3ca3bd0c062af7f163a5e227fd18a359b61deda4ca"},
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:f63d2353537bac7bfeeaedbe5ac99f3be35daa290dd1ad1be90768acbf77e3d5"},
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-manylinux2014_i686.whl", hash = "sha256:6e71d9f6f5a1e0f7523e8ebee1b76bb29538f64d863e3711c2b21033f499e2b9"},
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:bebf6f442bbe6343acaec873803510ee1930d026846a018f727da4e0690081f8"},
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:3c04963637481a3edf1eec64ab4c3fce098908f02fc472c11e73be7eedc08b95"},
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:4368e4eb9999ce32e3280330b3c26f175e0fa7fa13efb4d2dc4ade488ff6d7c2"},
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:041ab9311d08162356829bf47293a613c44dc9ace28846fb63098889c7383c5d"},
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:331f050e8f9e923bb6b50454acfc0547fd52092585c61eb5f2fc93de60703f13"},
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-win32.whl", hash = "sha256:9e8b5bbc1bdf554ade1360e62e4959091430c3cc15ebfff3c28c8894fd1f312a"},
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-win_amd64.whl", hash = "sha256:f9f5bcaef6122d93c54ee7a9ecb07eab5b81a7ebfb5cb99af2b2a6ff49eff62f"},
|
||||||
|
{file = "pyinstaller-6.1.0-py3-none-win_arm64.whl", hash = "sha256:dd438afd2abb643f5399c0cb254a11c217c06782cb274a2911dd785f9f67fa9e"},
|
||||||
|
{file = "pyinstaller-6.1.0.tar.gz", hash = "sha256:8f3d49c60f3344bf3d4a6d4258bda665dad185ab2b097341d3af2a6387c838ef"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
altgraph = "*"
|
||||||
|
macholib = {version = ">=1.8", markers = "sys_platform == \"darwin\""}
|
||||||
|
packaging = ">=20.0"
|
||||||
|
pefile = {version = ">=2022.5.30", markers = "sys_platform == \"win32\""}
|
||||||
|
pyinstaller-hooks-contrib = ">=2021.4"
|
||||||
|
pywin32-ctypes = {version = ">=0.2.1", markers = "sys_platform == \"win32\""}
|
||||||
|
setuptools = ">=42.0.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
hook-testing = ["execnet (>=1.5.0)", "psutil", "pytest (>=2.7.3)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyinstaller-hooks-contrib"
|
||||||
|
version = "2023.10"
|
||||||
|
description = "Community maintained hooks for PyInstaller"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "pyinstaller-hooks-contrib-2023.10.tar.gz", hash = "sha256:4b4a998036abb713774cb26534ca06b7e6e09e4c628196017a10deb11a48747f"},
|
||||||
|
{file = "pyinstaller_hooks_contrib-2023.10-py2.py3-none-any.whl", hash = "sha256:6dc1786a8f452941245d5bb85893e2a33632ebdcbc4c23eea41f2ee08281b0c0"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pypng"
|
name = "pypng"
|
||||||
version = "0.20220715.0"
|
version = "0.20220715.0"
|
||||||
@ -1378,6 +1487,36 @@ files = [
|
|||||||
{file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"},
|
{file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pysdl2"
|
||||||
|
version = "0.9.16"
|
||||||
|
description = "Python SDL2 bindings"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "PySDL2-0.9.16.tar.gz", hash = "sha256:1027406badbecdd30fe56e800a5a76ad7d7271a3aec0b7acf780ee26a00f2d40"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pysdl2-dll"
|
||||||
|
version = "2.28.4"
|
||||||
|
description = "Pre-built SDL2 binaries for PySDL2"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "pysdl2-dll-2.28.4.tar.gz", hash = "sha256:051e411ef93778d924a21c6e8fcfabd404bae5620fa49fa417e05b494a6a7dca"},
|
||||||
|
{file = "pysdl2_dll-2.28.4-py2.py3-none-macosx_10_11_universal2.whl", hash = "sha256:1acff652e62f906109a6ca4874ff1e210eebb4989df651955c48add43f89c077"},
|
||||||
|
{file = "pysdl2_dll-2.28.4-py2.py3-none-macosx_10_11_x86_64.whl", hash = "sha256:a35ab0f06b9e42ba12575b6960ad7ea013fc0f49e6935b4b53d66a0a06668eae"},
|
||||||
|
{file = "pysdl2_dll-2.28.4-py2.py3-none-manylinux2014_i686.whl", hash = "sha256:6868f67b831053730c1d429076594e3b4db8522b779c51932b0ca003ae47b134"},
|
||||||
|
{file = "pysdl2_dll-2.28.4-py2.py3-none-manylinux2014_x86_64.whl", hash = "sha256:d77f13a0f411abb3abd6d49f8b41c1373f72b86b1973236023dc37d563c2d0db"},
|
||||||
|
{file = "pysdl2_dll-2.28.4-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:bba4abda0962025bff2ab0f17ff93f70f09fe706468460a4709533f5550c9bd5"},
|
||||||
|
{file = "pysdl2_dll-2.28.4-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:142133f79236b809850e035c9a7fc77cd1098bdeb5f4edbba818a24f2aa6cf55"},
|
||||||
|
{file = "pysdl2_dll-2.28.4-py2.py3-none-win32.whl", hash = "sha256:e417decf74d63cc3f5092385bdfb75cc7815d34b838992f09aff21c40ad27237"},
|
||||||
|
{file = "pysdl2_dll-2.28.4-py2.py3-none-win_amd64.whl", hash = "sha256:667628a119e00f45aed279e480516ccc484c2f9a5d03c901dd1996c3af4c5840"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pywin32"
|
name = "pywin32"
|
||||||
version = "306"
|
version = "306"
|
||||||
@ -1402,6 +1541,18 @@ files = [
|
|||||||
{file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"},
|
{file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pywin32-ctypes"
|
||||||
|
version = "0.2.2"
|
||||||
|
description = "A (partial) reimplementation of pywin32 using ctypes/cffi"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
files = [
|
||||||
|
{file = "pywin32-ctypes-0.2.2.tar.gz", hash = "sha256:3426e063bdd5fd4df74a14fa3cf80a0b42845a87e1d1e81f6549f9daec593a60"},
|
||||||
|
{file = "pywin32_ctypes-0.2.2-py3-none-any.whl", hash = "sha256:bf490a1a709baf35d688fe0ecf980ed4de11d2b3e37b51e5442587a75d9957e7"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyyaml"
|
name = "pyyaml"
|
||||||
version = "6.0.1"
|
version = "6.0.1"
|
||||||
@ -1873,6 +2024,29 @@ secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.
|
|||||||
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
|
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||||
zstd = ["zstandard (>=0.18.0)"]
|
zstd = ["zstandard (>=0.18.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "validators"
|
||||||
|
version = "0.22.0"
|
||||||
|
description = "Python Data Validation for Humans™"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "validators-0.22.0-py3-none-any.whl", hash = "sha256:61cf7d4a62bbae559f2e54aed3b000cea9ff3e2fdbe463f51179b92c58c9585a"},
|
||||||
|
{file = "validators-0.22.0.tar.gz", hash = "sha256:77b2689b172eeeb600d9605ab86194641670cdb73b60afd577142a9397873370"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs-offline = ["myst-parser (>=2.0.0)", "pypandoc-binary (>=1.11)", "sphinx (>=7.1.1)"]
|
||||||
|
docs-online = ["mkdocs (>=1.5.2)", "mkdocs-git-revision-date-localized-plugin (>=1.2.0)", "mkdocs-material (>=9.2.6)", "mkdocstrings[python] (>=0.22.0)", "pyaml (>=23.7.0)"]
|
||||||
|
hooks = ["pre-commit (>=3.3.3)"]
|
||||||
|
package = ["build (>=1.0.0)", "twine (>=4.0.2)"]
|
||||||
|
runner = ["tox (>=4.11.1)"]
|
||||||
|
sast = ["bandit[toml] (>=1.7.5)"]
|
||||||
|
testing = ["pytest (>=7.4.0)"]
|
||||||
|
tooling = ["black (>=23.7.0)", "pyright (>=1.1.325)", "ruff (>=0.0.287)"]
|
||||||
|
tooling-extras = ["pyaml (>=23.7.0)", "pypandoc-binary (>=1.11)", "pytest (>=7.4.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "watchdog"
|
name = "watchdog"
|
||||||
version = "3.0.0"
|
version = "3.0.0"
|
||||||
@ -2039,5 +2213,5 @@ test = ["pytest (>=6.0.0)", "setuptools (>=65)"]
|
|||||||
|
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.11"
|
python-versions = ">=3.11, <3.13"
|
||||||
content-hash = "5adbe2b271f9a98bc456e1995fd743db377752eede94e42bcc5dced023d42757"
|
content-hash = "5757172c816b0e5b7863ffee379028af4ae9d77e6aaa9e3076830030ccdcc539"
|
||||||
|
@ -7,11 +7,17 @@ license = "MIT"
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.11"
|
python = ">=3.11, <3.13"
|
||||||
flet = "^0.10.3"
|
flet = "^0.10.3"
|
||||||
faster-whisper = "^0.9.0"
|
faster-whisper = "^0.9.0"
|
||||||
pygame = "^2.5.2"
|
pygame = "^2.5.2"
|
||||||
torch = "2.0.0"
|
torch = "2.0.0"
|
||||||
|
requests = "^2.31.0"
|
||||||
|
validators = "^0.22.0"
|
||||||
|
pyinstaller = "^6.1.0"
|
||||||
|
pysdl2 = "^0.9.16"
|
||||||
|
pysdl2-dll = "^2.28.4"
|
||||||
|
pyaudio = "^0.2.13"
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
84
whisper_webservice_interface.py
Normal file
84
whisper_webservice_interface.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
from typing import Optional, Union, Dict, Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Optional[str] = None, language: Optional[str] = None,
|
||||||
|
initial_prompt: Optional[str] = None, encode: Optional[bool] = None,
|
||||||
|
output: Optional[str] = None, word_timestamps: Optional[bool] = None) -> tuple[int, str]:
|
||||||
|
"""
|
||||||
|
Send a request to the ASR endpoint.
|
||||||
|
Returns the text represented by the audio file if everything worked out,
|
||||||
|
and a tuple of the form (status_code, response_text) otherwise
|
||||||
|
"""
|
||||||
|
endpoint = f"{url}/asr"
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"task": task,
|
||||||
|
"language": language,
|
||||||
|
"initial_prompt": initial_prompt,
|
||||||
|
"encode": encode,
|
||||||
|
"output": output,
|
||||||
|
"word_timestamps": word_timestamps
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {k: v for k, v in params.items() if v is not None}
|
||||||
|
|
||||||
|
if isinstance(audio_file_path_or_bytes, str):
|
||||||
|
with open(audio_file_path_or_bytes, 'rb') as f:
|
||||||
|
audio_file = f.read()
|
||||||
|
else:
|
||||||
|
audio_file = audio_file_path_or_bytes
|
||||||
|
|
||||||
|
files = {
|
||||||
|
'audio_file': audio_file
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(endpoint, params=params, files=files)
|
||||||
|
|
||||||
|
return response.status_code, response.text
|
||||||
|
|
||||||
|
|
||||||
|
def detect_language(url: str, audio_file_path: str, encode: Optional[bool] = None) -> Dict[str, Any] | tuple[int, str]:
|
||||||
|
"""
|
||||||
|
Send a request to the Detect Language endpoint.
|
||||||
|
Returns either a dictionary of the form {'detected_language': '<LANG>', 'language_code': '<LANG_CODE>'} if the request
|
||||||
|
was successful, or a tuple of the form (status_code, response_text) otherwise.
|
||||||
|
"""
|
||||||
|
endpoint = f"{url}/detect-language"
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"encode": encode
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {k: v for k, v in params.items() if v is not None}
|
||||||
|
|
||||||
|
with open(audio_file_path, 'rb') as f:
|
||||||
|
audio_file = f.read()
|
||||||
|
|
||||||
|
files = {
|
||||||
|
'audio_file': audio_file
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(endpoint, params=params, files=files)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
|
else:
|
||||||
|
return response.status_code, response.text
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
def main():
|
||||||
|
url = "http://127.0.0.1:9000" # Replace with the actual URL of the webservice
|
||||||
|
audio_file_path = "/run/media/yannik/IC RECORDER/REC_FILE/Interview01/231021_1541.mp3"
|
||||||
|
|
||||||
|
response = send_asr_request(url, audio_file_path, task="transcribe", language="en")
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
response = detect_language(url, audio_file_path)
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user