feat: fixed direct recording to webservice
This commit is contained in:
parent
67951b4349
commit
1e2f0334f7
26
main.py
26
main.py
@ -360,14 +360,16 @@ def main(page):
|
|||||||
|
|
||||||
set_transcribe_ready(False)
|
set_transcribe_ready(False)
|
||||||
|
|
||||||
def paralyze_ui():
|
def paralyze_ui(spinner: bool = True, disable_recording_button: bool = True):
|
||||||
model_size_select.current.disabled = True
|
model_size_select.current.disabled = True
|
||||||
model_device_select.current.disabled = True
|
model_device_select.current.disabled = True
|
||||||
# model_bits_select.current.disabled = True
|
# model_bits_select.current.disabled = True
|
||||||
model_load_unload_button.current.disabled = True
|
model_load_unload_button.current.disabled = True
|
||||||
processing_spinner.current.visible = True
|
processing_spinner.current.visible = spinner
|
||||||
current_mode_select.current.disabled = True
|
current_mode_select.current.disabled = True
|
||||||
|
|
||||||
|
record_button.current.disabled = disable_recording_button
|
||||||
|
|
||||||
model_load_unload_button.current.icon = ft.icons.CLOSE
|
model_load_unload_button.current.icon = ft.icons.CLOSE
|
||||||
model_load_unload_button.current.disabled = False
|
model_load_unload_button.current.disabled = False
|
||||||
for btn in transcribe_buttons:
|
for btn in transcribe_buttons:
|
||||||
@ -392,6 +394,8 @@ def main(page):
|
|||||||
model_load_unload_button.current.icon = ft.icons.CLOSE
|
model_load_unload_button.current.icon = ft.icons.CLOSE
|
||||||
model_load_unload_button.current.disabled = False
|
model_load_unload_button.current.disabled = False
|
||||||
|
|
||||||
|
record_button.current.disabled = False
|
||||||
|
|
||||||
if mm.is_model_loaded():
|
if mm.is_model_loaded():
|
||||||
current_mode_select.current.disabled = True
|
current_mode_select.current.disabled = True
|
||||||
else:
|
else:
|
||||||
@ -406,6 +410,8 @@ def main(page):
|
|||||||
processing_spinner.current.visible = False
|
processing_spinner.current.visible = False
|
||||||
current_mode_select.current.disabled = False
|
current_mode_select.current.disabled = False
|
||||||
|
|
||||||
|
record_button.current.disabled = True
|
||||||
|
|
||||||
page.update()
|
page.update()
|
||||||
|
|
||||||
def on_url_input(e):
|
def on_url_input(e):
|
||||||
@ -442,15 +448,23 @@ def main(page):
|
|||||||
|
|
||||||
recorded_audio = b"".join(sound_chunks)
|
recorded_audio = b"".join(sound_chunks)
|
||||||
|
|
||||||
|
set_transcribe_ready(False)
|
||||||
|
|
||||||
transcribe(recorded_audio)
|
transcribe(recorded_audio)
|
||||||
|
|
||||||
recording = False
|
recording = False
|
||||||
|
|
||||||
# sound = pygame.mixer.Sound(buffer=recorded_audio) # doesn't work because sampling rate is wrong
|
# sound = pygame.mixer.Sound(buffer=recorded_audio) # doesn't work because sampling rate is wrong
|
||||||
|
record_button.current.bgcolor = "0x000000FF"
|
||||||
|
|
||||||
print("playing back recorded sound")
|
set_transcribe_ready(True)
|
||||||
|
|
||||||
|
print("done")
|
||||||
# sound.play()
|
# sound.play()
|
||||||
else:
|
else:
|
||||||
|
if not transcribe_ready:
|
||||||
|
print("Can't record, not ready")
|
||||||
|
return
|
||||||
print("Starting Recording...")
|
print("Starting Recording...")
|
||||||
recording = True
|
recording = True
|
||||||
|
|
||||||
@ -472,6 +486,9 @@ def main(page):
|
|||||||
|
|
||||||
rec_stream.start_stream()
|
rec_stream.start_stream()
|
||||||
|
|
||||||
|
record_button.current.bgcolor = "0xFFFF4444"
|
||||||
|
paralyze_ui(spinner=False, disable_recording_button=False)
|
||||||
|
|
||||||
def find_recordingdevice_tuple_by_name(search_name: str) -> typing.Tuple[int, str] | None:
|
def find_recordingdevice_tuple_by_name(search_name: str) -> typing.Tuple[int, str] | None:
|
||||||
return next(((device_id, name) for device_id, name in capture_devices if name == search_name))
|
return next(((device_id, name) for device_id, name in capture_devices if name == search_name))
|
||||||
|
|
||||||
@ -594,5 +611,8 @@ def main(page):
|
|||||||
], expand=True),
|
], expand=True),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# refresh all values, and make sure the right stuff is shown
|
||||||
|
mode_select()
|
||||||
|
|
||||||
|
|
||||||
ft.app(target=main)
|
ft.app(target=main)
|
||||||
|
1357
poetry.lock
generated
1357
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -9,15 +9,13 @@ readme = "README.md"
|
|||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = ">=3.11, <3.13"
|
python = ">=3.11, <3.13"
|
||||||
flet = "^0.10.3"
|
flet = "^0.10.3"
|
||||||
faster-whisper = "^0.9.0"
|
faster-whisper = "0.10.0"
|
||||||
pygame = "^2.5.2"
|
pygame = "^2.5.2"
|
||||||
torch = "2.0.0"
|
|
||||||
requests = "^2.31.0"
|
requests = "^2.31.0"
|
||||||
validators = "^0.22.0"
|
validators = "^0.22.0"
|
||||||
pyinstaller = "^6.1.0"
|
pyinstaller = "^6.1.0"
|
||||||
pysdl2 = "^0.9.16"
|
|
||||||
pysdl2-dll = "^2.28.4"
|
|
||||||
pyaudio = "^0.2.13"
|
pyaudio = "^0.2.13"
|
||||||
|
pydub = "^0.25.1"
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
15
utils.py
15
utils.py
@ -3,6 +3,9 @@ import os
|
|||||||
|
|
||||||
from typing import DefaultDict, Dict, List
|
from typing import DefaultDict, Dict, List
|
||||||
|
|
||||||
|
from pydub import AudioSegment
|
||||||
|
import io
|
||||||
|
|
||||||
|
|
||||||
def tree() -> DefaultDict:
|
def tree() -> DefaultDict:
|
||||||
return defaultdict(tree)
|
return defaultdict(tree)
|
||||||
@ -44,3 +47,15 @@ def defaultdict_to_dict(d: defaultdict) -> dict:
|
|||||||
if isinstance(d, defaultdict):
|
if isinstance(d, defaultdict):
|
||||||
d = {k: defaultdict_to_dict(v) for k, v in d.items()}
|
d = {k: defaultdict_to_dict(v) for k, v in d.items()}
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_mp3(audio_data: bytes, sample_width: int, frame_rate: int, channels: int) -> bytes:
|
||||||
|
audio = AudioSegment.from_raw(
|
||||||
|
io.BytesIO(audio_data),
|
||||||
|
sample_width=sample_width,
|
||||||
|
frame_rate=frame_rate,
|
||||||
|
channels=channels
|
||||||
|
)
|
||||||
|
mp3_buffer = io.BytesIO()
|
||||||
|
audio.export(mp3_buffer, format="mp3")
|
||||||
|
return mp3_buffer.getvalue()
|
@ -2,6 +2,8 @@ from typing import Optional, Union, Dict, Any
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from utils import convert_to_mp3
|
||||||
|
|
||||||
|
|
||||||
def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Optional[str] = None, language: Optional[str] = None,
|
def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Optional[str] = None, language: Optional[str] = None,
|
||||||
initial_prompt: Optional[str] = None, encode: Optional[bool] = None,
|
initial_prompt: Optional[str] = None, encode: Optional[bool] = None,
|
||||||
@ -28,7 +30,7 @@ def send_asr_request(url: str, audio_file_path_or_bytes: str | bytes, task: Opti
|
|||||||
with open(audio_file_path_or_bytes, 'rb') as f:
|
with open(audio_file_path_or_bytes, 'rb') as f:
|
||||||
audio_file = f.read()
|
audio_file = f.read()
|
||||||
else:
|
else:
|
||||||
audio_file = audio_file_path_or_bytes
|
audio_file = convert_to_mp3(audio_file_path_or_bytes, sample_width=2, frame_rate=16000, channels=1)
|
||||||
|
|
||||||
files = {
|
files = {
|
||||||
'audio_file': audio_file
|
'audio_file': audio_file
|
||||||
|
Loading…
Reference in New Issue
Block a user