feat: fixed direct recording to webservice
This commit is contained in:
parent
d721eb3a5b
commit
85c89d5343
26
main.py
26
main.py
@ -360,14 +360,16 @@ def main(page):
|
||||
|
||||
set_transcribe_ready(False)
|
||||
|
||||
def paralyze_ui():
|
||||
def paralyze_ui(spinner: bool = True, disable_recording_button: bool = True):
|
||||
model_size_select.current.disabled = True
|
||||
model_device_select.current.disabled = True
|
||||
# model_bits_select.current.disabled = True
|
||||
model_load_unload_button.current.disabled = True
|
||||
processing_spinner.current.visible = True
|
||||
processing_spinner.current.visible = spinner
|
||||
current_mode_select.current.disabled = True
|
||||
|
||||
record_button.current.disabled = disable_recording_button
|
||||
|
||||
model_load_unload_button.current.icon = ft.icons.CLOSE
|
||||
model_load_unload_button.current.disabled = False
|
||||
for btn in transcribe_buttons:
|
||||
@ -392,6 +394,8 @@ def main(page):
|
||||
model_load_unload_button.current.icon = ft.icons.CLOSE
|
||||
model_load_unload_button.current.disabled = False
|
||||
|
||||
record_button.current.disabled = False
|
||||
|
||||
if mm.is_model_loaded():
|
||||
current_mode_select.current.disabled = True
|
||||
else:
|
||||
@ -406,6 +410,8 @@ def main(page):
|
||||
processing_spinner.current.visible = False
|
||||
current_mode_select.current.disabled = False
|
||||
|
||||
record_button.current.disabled = True
|
||||
|
||||
page.update()
|
||||
|
||||
def on_url_input(e):
|
||||
@ -442,15 +448,23 @@ def main(page):
|
||||
|
||||
recorded_audio = b"".join(sound_chunks)
|
||||
|
||||
set_transcribe_ready(False)
|
||||
|
||||
transcribe(recorded_audio)
|
||||
|
||||
recording = False
|
||||
|
||||
# sound = pygame.mixer.Sound(buffer=recorded_audio) # doesn't work because sampling rate is wrong
|
||||
record_button.current.bgcolor = "0x000000FF"
|
||||
|
||||
print("playing back recorded sound")
|
||||
set_transcribe_ready(True)
|
||||
|
||||
print("done")
|
||||
# sound.play()
|
||||
else:
|
||||
if not transcribe_ready:
|
||||
print("Can't record, not ready")
|
||||
return
|
||||
print("Starting Recording...")
|
||||
recording = True
|
||||
|
||||
@ -472,6 +486,9 @@ def main(page):
|
||||
|
||||
rec_stream.start_stream()
|
||||
|
||||
record_button.current.bgcolor = "0xFFFF4444"
|
||||
paralyze_ui(spinner=False, disable_recording_button=False)
|
||||
|
||||
def find_recordingdevice_tuple_by_name(search_name: str) -> typing.Tuple[int, str] | None:
|
||||
return next(((device_id, name) for device_id, name in capture_devices if name == search_name))
|
||||
|
||||
@ -594,5 +611,8 @@ def main(page):
|
||||
], expand=True),
|
||||
)
|
||||
|
||||
# refresh all values, and make sure the right stuff is shown
|
||||
mode_select()
|
||||
|
||||
|
||||
ft.app(target=main)
|
||||
|
1357
poetry.lock
generated
1357
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -9,15 +9,13 @@ readme = "README.md"
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.11, <3.13"
|
||||
flet = "^0.10.3"
|
||||
faster-whisper = "^0.9.0"
|
||||
faster-whisper = "0.10.0"
|
||||
pygame = "^2.5.2"
|
||||
torch = "2.0.0"
|
||||
requests = "^2.31.0"
|
||||
validators = "^0.22.0"
|
||||
pyinstaller = "^6.1.0"
|
||||
pysdl2 = "^0.9.16"
|
||||
pysdl2-dll = "^2.28.4"
|
||||
pyaudio = "^0.2.13"
|
||||
pydub = "^0.25.1"
|
||||
|
||||
|
||||
[build-system]
|
||||
|
15
utils.py
15
utils.py
@ -3,6 +3,9 @@ import os
|
||||
|
||||
from typing import DefaultDict, Dict, List
|
||||
|
||||
from pydub import AudioSegment
|
||||
import io
|
||||
|
||||
|
||||
def tree() -> DefaultDict:
|
||||
return defaultdict(tree)
|
||||
@ -44,3 +47,15 @@ def defaultdict_to_dict(d: defaultdict) -> dict:
|
||||
if isinstance(d, defaultdict):
|
||||
d = {k: defaultdict_to_dict(v) for k, v in d.items()}
|
||||
return d
|
||||
|
||||
|
||||
def convert_to_mp3(audio_data: bytes, sample_width: int, frame_rate: int, channels: int) -> bytes:
|
||||
audio = AudioSegment.from_raw(
|
||||
io.BytesIO(audio_data),
|
||||
sample_width=sample_width,
|
||||
frame_rate=frame_rate,
|
||||
channels=channels
|
||||
)
|
||||
mp3_buffer = io.BytesIO()
|
||||
audio.export(mp3_buffer, format="mp3")
|
||||
return mp3_buffer.getvalue()
|
@ -2,6 +2,8 @@ from typing import Optional, Union, Dict, Any
|
||||
|
||||
import requests
|
||||
|
||||
from utils import convert_to_mp3
|
||||
|
||||
|
||||
def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Optional[str] = None, language: Optional[str] = None,
|
||||
initial_prompt: Optional[str] = None, encode: Optional[bool] = None,
|
||||
@ -28,7 +30,7 @@ def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Opti
|
||||
with open(audio_file_path_or_bytes, 'rb') as f:
|
||||
audio_file = f.read()
|
||||
else:
|
||||
audio_file = audio_file_path_or_bytes
|
||||
audio_file = convert_to_mp3(audio_file_path_or_bytes, sample_width=2, frame_rate=16000, channels=1)
|
||||
|
||||
files = {
|
||||
'audio_file': audio_file
|
||||
|
Loading…
Reference in New Issue
Block a user