feat: fixed direct recording to webservice

This commit is contained in:
Yandrik 2023-12-18 19:33:58 +01:00
parent 67951b4349
commit 1e2f0334f7
5 changed files with 548 additions and 860 deletions

26
main.py
View File

@ -360,14 +360,16 @@ def main(page):
set_transcribe_ready(False) set_transcribe_ready(False)
def paralyze_ui(): def paralyze_ui(spinner: bool = True, disable_recording_button: bool = True):
model_size_select.current.disabled = True model_size_select.current.disabled = True
model_device_select.current.disabled = True model_device_select.current.disabled = True
# model_bits_select.current.disabled = True # model_bits_select.current.disabled = True
model_load_unload_button.current.disabled = True model_load_unload_button.current.disabled = True
processing_spinner.current.visible = True processing_spinner.current.visible = spinner
current_mode_select.current.disabled = True current_mode_select.current.disabled = True
record_button.current.disabled = disable_recording_button
model_load_unload_button.current.icon = ft.icons.CLOSE model_load_unload_button.current.icon = ft.icons.CLOSE
model_load_unload_button.current.disabled = False model_load_unload_button.current.disabled = False
for btn in transcribe_buttons: for btn in transcribe_buttons:
@ -392,6 +394,8 @@ def main(page):
model_load_unload_button.current.icon = ft.icons.CLOSE model_load_unload_button.current.icon = ft.icons.CLOSE
model_load_unload_button.current.disabled = False model_load_unload_button.current.disabled = False
record_button.current.disabled = False
if mm.is_model_loaded(): if mm.is_model_loaded():
current_mode_select.current.disabled = True current_mode_select.current.disabled = True
else: else:
@ -406,6 +410,8 @@ def main(page):
processing_spinner.current.visible = False processing_spinner.current.visible = False
current_mode_select.current.disabled = False current_mode_select.current.disabled = False
record_button.current.disabled = True
page.update() page.update()
def on_url_input(e): def on_url_input(e):
@ -442,15 +448,23 @@ def main(page):
recorded_audio = b"".join(sound_chunks) recorded_audio = b"".join(sound_chunks)
set_transcribe_ready(False)
transcribe(recorded_audio) transcribe(recorded_audio)
recording = False recording = False
# sound = pygame.mixer.Sound(buffer=recorded_audio) # doesn't work because sampling rate is wrong # sound = pygame.mixer.Sound(buffer=recorded_audio) # doesn't work because sampling rate is wrong
record_button.current.bgcolor = "0x000000FF"
print("playing back recorded sound") set_transcribe_ready(True)
print("done")
# sound.play() # sound.play()
else: else:
if not transcribe_ready:
print("Can't record, not ready")
return
print("Starting Recording...") print("Starting Recording...")
recording = True recording = True
@ -472,6 +486,9 @@ def main(page):
rec_stream.start_stream() rec_stream.start_stream()
record_button.current.bgcolor = "0xFFFF4444"
paralyze_ui(spinner=False, disable_recording_button=False)
def find_recordingdevice_tuple_by_name(search_name: str) -> typing.Tuple[int, str] | None: def find_recordingdevice_tuple_by_name(search_name: str) -> typing.Tuple[int, str] | None:
return next(((device_id, name) for device_id, name in capture_devices if name == search_name)) return next(((device_id, name) for device_id, name in capture_devices if name == search_name))
@ -594,5 +611,8 @@ def main(page):
], expand=True), ], expand=True),
) )
# refresh all values, and make sure the right stuff is shown
mode_select()
ft.app(target=main) ft.app(target=main)

1357
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -9,15 +9,13 @@ readme = "README.md"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.11, <3.13" python = ">=3.11, <3.13"
flet = "^0.10.3" flet = "^0.10.3"
faster-whisper = "^0.9.0" faster-whisper = "0.10.0"
pygame = "^2.5.2" pygame = "^2.5.2"
torch = "2.0.0"
requests = "^2.31.0" requests = "^2.31.0"
validators = "^0.22.0" validators = "^0.22.0"
pyinstaller = "^6.1.0" pyinstaller = "^6.1.0"
pysdl2 = "^0.9.16"
pysdl2-dll = "^2.28.4"
pyaudio = "^0.2.13" pyaudio = "^0.2.13"
pydub = "^0.25.1"
[build-system] [build-system]

View File

@ -3,6 +3,9 @@ import os
from typing import DefaultDict, Dict, List from typing import DefaultDict, Dict, List
from pydub import AudioSegment
import io
def tree() -> DefaultDict: def tree() -> DefaultDict:
return defaultdict(tree) return defaultdict(tree)
@ -44,3 +47,15 @@ def defaultdict_to_dict(d: defaultdict) -> dict:
if isinstance(d, defaultdict): if isinstance(d, defaultdict):
d = {k: defaultdict_to_dict(v) for k, v in d.items()} d = {k: defaultdict_to_dict(v) for k, v in d.items()}
return d return d
def convert_to_mp3(audio_data: bytes, sample_width: int, frame_rate: int, channels: int) -> bytes:
audio = AudioSegment.from_raw(
io.BytesIO(audio_data),
sample_width=sample_width,
frame_rate=frame_rate,
channels=channels
)
mp3_buffer = io.BytesIO()
audio.export(mp3_buffer, format="mp3")
return mp3_buffer.getvalue()

View File

@ -2,6 +2,8 @@ from typing import Optional, Union, Dict, Any
import requests import requests
from utils import convert_to_mp3
def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Optional[str] = None, language: Optional[str] = None, def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Optional[str] = None, language: Optional[str] = None,
initial_prompt: Optional[str] = None, encode: Optional[bool] = None, initial_prompt: Optional[str] = None, encode: Optional[bool] = None,
@ -28,7 +30,7 @@ def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Opti
with open(audio_file_path_or_bytes, 'rb') as f: with open(audio_file_path_or_bytes, 'rb') as f:
audio_file = f.read() audio_file = f.read()
else: else:
audio_file = audio_file_path_or_bytes audio_file = convert_to_mp3(audio_file_path_or_bytes, sample_width=2, frame_rate=16000, channels=1)
files = { files = {
'audio_file': audio_file 'audio_file': audio_file