fmt: re-linted files

This commit is contained in:
Yandrik 2024-04-25 15:32:07 +02:00
parent 8625ddf6f6
commit 69da9f8d25
6 changed files with 216 additions and 77 deletions

87
main.py
View File

@ -1,4 +1,5 @@
import re
from sys import exit
import argparse
@ -7,56 +8,60 @@ from audiogen import AudioGenerator
class SimpleMarkupParser:
def __init__(self, input_text):
self.input_text = ' '.join(input_text.split())
self.input_text = " ".join(input_text.split())
self.parsed_output = []
self.sections = {}
def parse(self):
tokens = re.split(r'(\[[^]]+])', self.input_text)
tokens = re.split(r"(\[[^]]+])", self.input_text)
for token in tokens:
voice_match = re.match(r'\[voice ([^]]+)]', token)
voice_match = re.match(r"\[voice ([^]]+)]", token)
if voice_match:
self.parsed_output.append({'type': 'voice', 'voice': voice_match.group(1)})
self.parsed_output.append(
{"type": "voice", "voice": voice_match.group(1)}
)
continue
silence_match = re.match(r'\[silence (\d+)s]', token)
silence_match = re.match(r"\[silence (\d+)s]", token)
if silence_match:
duration = int(silence_match.group(1)) * 1000
self.parsed_output.append({'type': 'silence', 'duration': duration})
self.parsed_output.append({"type": "silence", "duration": duration})
continue
section_match = re.match(r'\[section (\d+)]', token)
section_match = re.match(r"\[section (\d+)]", token)
if section_match:
section_id = int(section_match.group(1))
self.parsed_output.append({'type': 'section_start', 'section_id': section_id})
self.parsed_output.append(
{"type": "section_start", "section_id": section_id}
)
continue
end_section_match = re.match(r'\[end_section]', token)
end_section_match = re.match(r"\[end_section]", token)
if end_section_match:
self.parsed_output.append({'type': 'section_end'})
self.parsed_output.append({"type": "section_end"})
continue
insert_section_match = re.match(r'\[insert_section (\d+)]', token)
insert_section_match = re.match(r"\[insert_section (\d+)]", token)
if insert_section_match:
section_id = int(insert_section_match.group(1))
self.parsed_output.append({'type': 'insert_section', 'section_id': section_id})
self.parsed_output.append(
{"type": "insert_section", "section_id": section_id}
)
continue
if re.match(r'\[.*]', token):
self.parsed_output.append({'type': 'none', 'text': token})
if re.match(r"\[.*]", token):
self.parsed_output.append({"type": "none", "text": token})
continue
if token.strip():
self.parsed_output.append({'type': 'text', 'text': token.strip()})
self.parsed_output.append({"type": "text", "text": token.strip()})
def get_output(self):
return self.parsed_output
def main():
parser_description = """
TTS text with voice selection, silence intervals, and section functionality.
The script supports a simple markup language to change voices, insert silence, define sections, and insert sections within the text.
@ -93,16 +98,29 @@ def main():
4. Insert fable speaking the audio from section 1 (without regenerating it).
"""
parser = argparse.ArgumentParser(description=parser_description,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('--file', type=str, help="File containing the text to parse.")
parser.add_argument('text', nargs='?', default=None, help="Text to parse.")
parser.add_argument('--out-file', type=str, default="out.mp3",
help="Output file to save the audio to (mp3 recommended). Default out.mp3")
parser.add_argument('--provider', type=str, default="openai", help="AI Provider. Supported: openai, zuki")
parser.add_argument('--api-key', type=str, default=None,
help="API Key for AI Provider. Alternatively, create a file 'apikey.secret' in the workdir containing your API key.")
parser = argparse.ArgumentParser(
description=parser_description, formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument("--file", type=str, help="File containing the text to parse.")
parser.add_argument("text", nargs="?", default=None, help="Text to parse.")
parser.add_argument(
"--out-file",
type=str,
default="out.mp3",
help="Output file to save the audio to (mp3 recommended). Default out.mp3",
)
parser.add_argument(
"--provider",
type=str,
default="openai",
help="AI Provider. Supported: openai, zuki",
)
parser.add_argument(
"--api-key",
type=str,
default=None,
help="API Key for AI Provider. Alternatively, create a file 'apikey.secret' in the workdir containing your API key.",
)
args = parser.parse_args()
if not args.file and not args.text:
@ -110,12 +128,14 @@ def main():
exit(1)
if args.file and args.text:
print("Please provide either a file (using --file <PATH>) or a text input, not both!")
print(
"Please provide either a file (using --file <PATH>) or a text input, not both!"
)
exit(1)
input_text = args.text
if args.file:
with open(args.file, 'r') as file:
with open(args.file, "r") as file:
input_text = file.read()
parser = SimpleMarkupParser(input_text)
@ -132,8 +152,13 @@ def main():
tts.validate_voices()
except ValueError as e:
print(e)
valid_voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
print("Voices not valid! Valid voices are: " + "'" + "', '".join(valid_voices) + "'")
valid_voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
print(
"Voices not valid! Valid voices are: "
+ "'"
+ "', '".join(valid_voices)
+ "'"
)
try:
tts.generate_audio()

BIN
out.mp3

Binary file not shown.

117
poetry.lock generated
View File

@ -1,5 +1,16 @@
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "altgraph"
version = "0.17.4"
description = "Python graph (network) package"
optional = false
python-versions = "*"
files = [
{file = "altgraph-0.17.4-py2.py3-none-any.whl", hash = "sha256:642743b4750de17e655e6711601b077bc6598dbfa3ba5fa2b2a35ce12b508dff"},
{file = "altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406"},
]
[[package]]
name = "annotated-types"
version = "0.6.0"
@ -153,6 +164,20 @@ files = [
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
]
[[package]]
name = "macholib"
version = "1.16.3"
description = "Mach-O header analysis and editing"
optional = false
python-versions = "*"
files = [
{file = "macholib-1.16.3-py2.py3-none-any.whl", hash = "sha256:0e315d7583d38b8c77e815b1ecbdbf504a8258d8b3e17b61165c6feb60d18f2c"},
{file = "macholib-1.16.3.tar.gz", hash = "sha256:07ae9e15e8e4cd9a788013d81f5908b3609aa76f9b1421bae9c4d7606ec86a30"},
]
[package.dependencies]
altgraph = ">=0.17"
[[package]]
name = "openai"
version = "1.23.6"
@ -188,14 +213,14 @@ files = [
]
[[package]]
name = "pathlib"
version = "1.0.1"
description = "Object-oriented filesystem paths"
name = "pefile"
version = "2023.2.7"
description = "Python PE parsing module"
optional = false
python-versions = "*"
python-versions = ">=3.6.0"
files = [
{file = "pathlib-1.0.1-py3-none-any.whl", hash = "sha256:f35f95ab8b0f59e6d354090350b44a80a80635d22efdedfa84c7ad1cf0a74147"},
{file = "pathlib-1.0.1.tar.gz", hash = "sha256:6940718dfc3eff4258203ad5021090933e5c04707d5ca8cc9e73c94a7894ea9f"},
{file = "pefile-2023.2.7-py3-none-any.whl", hash = "sha256:da185cd2af68c08a6cd4481f7325ed600a88f6a813bad9dea07ab3ef73d8d8d6"},
{file = "pefile-2023.2.7.tar.gz", hash = "sha256:82e6114004b3d6911c77c3953e3838654b04511b8b66e8583db70c65998017dc"},
]
[[package]]
@ -334,6 +359,55 @@ files = [
{file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"},
]
[[package]]
name = "pyinstaller"
version = "6.6.0"
description = "PyInstaller bundles a Python application and all its dependencies into a single package."
optional = false
python-versions = "<3.13,>=3.8"
files = [
{file = "pyinstaller-6.6.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:d2705efe79f8749526f65c4bce70ae88eea8b6adfb051f123122e86542fe3802"},
{file = "pyinstaller-6.6.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:2aa771693ee3e0a899be3e9d946a24eab9896a98d0d4035f05a22f1193004cfb"},
{file = "pyinstaller-6.6.0-py3-none-manylinux2014_i686.whl", hash = "sha256:1fc15e8cebf76361568359a40926aa5746fc0a84ca365fb2ac6caeea014a2cd3"},
{file = "pyinstaller-6.6.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:7c4a55a5d872c118bc7a5e641c2df46ad18585c002d96adad129b4ee8c104463"},
{file = "pyinstaller-6.6.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:97197593344f11f3dd2bdadbab14c61fbc4cdf9cc692a89b047cb671764c1824"},
{file = "pyinstaller-6.6.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:00d81ddeee97710245a7ed03b0f9d5a4daf6c3a07adf978487b10991e1e20470"},
{file = "pyinstaller-6.6.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:b7cab21db6fcfbdab47ee960239d1b44cd95383a4463177bd592613941d67959"},
{file = "pyinstaller-6.6.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:00996d2090734d9ae4a1e53ed40351b07d593c37118d3e0d435bbcfa8db9edee"},
{file = "pyinstaller-6.6.0-py3-none-win32.whl", hash = "sha256:cfe3ed214601de0723cb660994b44934efacb77a1cf0e4cc5133da996bcf36ce"},
{file = "pyinstaller-6.6.0-py3-none-win_amd64.whl", hash = "sha256:e2f55fbbdf8a99ea84b39bc5669a68624473c303486d7eb2cd9063b339f0aa28"},
{file = "pyinstaller-6.6.0-py3-none-win_arm64.whl", hash = "sha256:abbd591967593dab264bcc3bcb2466c0a1582f19a112e37e916c4212069c7933"},
{file = "pyinstaller-6.6.0.tar.gz", hash = "sha256:be6bc2c3073d3e84fb7148d3af33ce9b6a7f01cfb154e06314cd1d4c05798a32"},
]
[package.dependencies]
altgraph = "*"
macholib = {version = ">=1.8", markers = "sys_platform == \"darwin\""}
packaging = ">=22.0"
pefile = {version = ">=2022.5.30", markers = "sys_platform == \"win32\""}
pyinstaller-hooks-contrib = ">=2024.3"
pywin32-ctypes = {version = ">=0.2.1", markers = "sys_platform == \"win32\""}
setuptools = ">=42.0.0"
[package.extras]
completion = ["argcomplete"]
hook-testing = ["execnet (>=1.5.0)", "psutil", "pytest (>=2.7.3)"]
[[package]]
name = "pyinstaller-hooks-contrib"
version = "2024.5"
description = "Community maintained hooks for PyInstaller"
optional = false
python-versions = ">=3.7"
files = [
{file = "pyinstaller_hooks_contrib-2024.5-py2.py3-none-any.whl", hash = "sha256:0852249b7fb1e9394f8f22af2c22fa5294c2c0366157969f98c96df62410c4c6"},
{file = "pyinstaller_hooks_contrib-2024.5.tar.gz", hash = "sha256:aa5dee25ea7ca317ad46fa16b5afc8dba3b0e43f2847e498930138885efd3cab"},
]
[package.dependencies]
packaging = ">=22.0"
setuptools = ">=42.0.0"
[[package]]
name = "pytest"
version = "8.1.1"
@ -354,6 +428,33 @@ pluggy = ">=1.4,<2.0"
[package.extras]
testing = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
[[package]]
name = "pywin32-ctypes"
version = "0.2.2"
description = "A (partial) reimplementation of pywin32 using ctypes/cffi"
optional = false
python-versions = ">=3.6"
files = [
{file = "pywin32-ctypes-0.2.2.tar.gz", hash = "sha256:3426e063bdd5fd4df74a14fa3cf80a0b42845a87e1d1e81f6549f9daec593a60"},
{file = "pywin32_ctypes-0.2.2-py3-none-any.whl", hash = "sha256:bf490a1a709baf35d688fe0ecf980ed4de11d2b3e37b51e5442587a75d9957e7"},
]
[[package]]
name = "setuptools"
version = "69.5.1"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
optional = false
python-versions = ">=3.8"
files = [
{file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"},
{file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"},
]
[package.extras]
docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
[[package]]
name = "sniffio"
version = "1.3.1"
@ -398,5 +499,5 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = "^3.12"
content-hash = "797f461706f9340f50f5d1b4a52b6af41ccb1565e5b1d199b36aa7ec140bcb72"
python-versions = ">=3.12,<3.13"
content-hash = "5edb9bebdbf3d2cbd05201e953830c0bd3cb05956885df55a192ad1026081cc3"

View File

@ -7,13 +7,17 @@ license = "MIT"
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.12"
python = ">=3.12,<3.13"
openai = "^1.16.2"
argparse = "^1.4.0"
pathlib = "^1.0.1"
# pathlib = "^1.0.1"
pydub = "^0.25.1"
[tool.poetry.dev-dependencies]
pytest = "^8.1.1"
[tool.poetry.group.dev.dependencies]
pyinstaller = "^6.6.0"
[build-system]
requires = ["poetry-core"]

View File

@ -2,18 +2,19 @@ from unittest import TestCase
from unittest.mock import patch
from audiogen import AudioGenerator
class TestAudioGeneratorValidate(TestCase):
def setUp(self):
self.valid_parsed_data = [
{'type': 'voice', 'voice': 'alloy'},
{'type': 'text', 'text': 'Hello, world!'},
{'type': 'silence', 'duration': 1000},
{'type': 'section_start', 'section_id': 1},
{'type': 'text', 'text': 'This is section 1.'},
{'type': 'section_end'},
{'type': 'insert_section', 'section_id': 1}
{"type": "voice", "voice": "alloy"},
{"type": "text", "text": "Hello, world!"},
{"type": "silence", "duration": 1000},
{"type": "section_start", "section_id": 1},
{"type": "text", "text": "This is section 1."},
{"type": "section_end"},
{"type": "insert_section", "section_id": 1},
]
self.audio_generator = AudioGenerator(self.valid_parsed_data, 'test_output.mp3')
self.audio_generator = AudioGenerator(self.valid_parsed_data, "test_output.mp3")
def test_validate_voices_valid(self):
self.audio_generator.validate_voices()
@ -21,10 +22,10 @@ class TestAudioGeneratorValidate(TestCase):
def test_validate_voices_invalid(self):
invalid_parsed_data = [
{'type': 'voice', 'voice': 'invalid_voice'},
{'type': 'text', 'text': 'Hello, world!'}
{"type": "voice", "voice": "invalid_voice"},
{"type": "text", "text": "Hello, world!"},
]
invalid_audio_generator = AudioGenerator(invalid_parsed_data, 'test_output.mp3')
invalid_audio_generator = AudioGenerator(invalid_parsed_data, "test_output.mp3")
with self.assertRaises(ValueError) as cm:
invalid_audio_generator.validate_voices()
self.assertEqual(str(cm.exception), "Invalid voice(s) found: invalid_voice")
@ -35,11 +36,11 @@ class TestAudioGeneratorValidate(TestCase):
def test_validate_sections_invalid(self):
invalid_parsed_data = [
{'type': 'voice', 'voice': 'alloy'},
{'type': 'text', 'text': 'Hello, world!'},
{'type': 'insert_section', 'section_id': 1}
{"type": "voice", "voice": "alloy"},
{"type": "text", "text": "Hello, world!"},
{"type": "insert_section", "section_id": 1},
]
invalid_audio_generator = AudioGenerator(invalid_parsed_data, 'test_output.mp3')
invalid_audio_generator = AudioGenerator(invalid_parsed_data, "test_output.mp3")
with self.assertRaises(ValueError) as cm:
invalid_audio_generator.validate_sections()
self.assertIn("Section 1 is used before being defined.", str(cm.exception))

View File

@ -3,7 +3,6 @@ from unittest import TestCase
from main import SimpleMarkupParser
class Test(TestCase):
def test_simple_markup_parser_0(self):
# Test case with sections
@ -13,27 +12,34 @@ class Test(TestCase):
parsed_output = parser.get_output()
assert len(parsed_output) == 7, "Expected 7 tokens, got %d" % len(parsed_output)
assert parsed_output[0] == {'type': 'section_start', 'section_id': 1}
assert parsed_output[1] == {'type': 'voice', 'voice': 'alloy'}
assert parsed_output[2] == {'type': 'text', 'text': 'Hello, this is section 1.'}
assert parsed_output[3] == {'type': 'section_end'}
assert parsed_output[4] == {'type': 'voice', 'voice': 'nova'}
assert parsed_output[5] == {'type': 'text', 'text': "Now we're outside the section."}
assert parsed_output[6] == {'type': 'insert_section', 'section_id': 1}
assert parsed_output[0] == {"type": "section_start", "section_id": 1}
assert parsed_output[1] == {"type": "voice", "voice": "alloy"}
assert parsed_output[2] == {"type": "text", "text": "Hello, this is section 1."}
assert parsed_output[3] == {"type": "section_end"}
assert parsed_output[4] == {"type": "voice", "voice": "nova"}
assert parsed_output[5] == {
"type": "text",
"text": "Now we're outside the section.",
}
assert parsed_output[6] == {"type": "insert_section", "section_id": 1}
def test_simple_markup_parser_1(self):
# Test case with silence
markup_text = "[voice nova] Let's have a moment of silence. [silence 3s] And we're back!"
markup_text = (
"[voice nova] Let's have a moment of silence. [silence 3s] And we're back!"
)
parser = SimpleMarkupParser(markup_text)
parser.parse()
parsed_output = parser.get_output()
assert len(parsed_output) == 4
assert parsed_output[0] == {'type': 'voice', 'voice': 'nova'}
assert parsed_output[1] == {'type': 'text', 'text': "Let's have a moment of silence."}
assert parsed_output[2] == {'type': 'silence', 'duration': 3000}
assert parsed_output[3] == {'type': 'text', 'text': "And we're back!"}
assert parsed_output[0] == {"type": "voice", "voice": "nova"}
assert parsed_output[1] == {
"type": "text",
"text": "Let's have a moment of silence.",
}
assert parsed_output[2] == {"type": "silence", "duration": 3000}
assert parsed_output[3] == {"type": "text", "text": "And we're back!"}
def test_simple_markup_parser_2(self):
# Test case with unknown markup
@ -43,10 +49,12 @@ class Test(TestCase):
parsed_output = parser.get_output()
assert len(parsed_output) == 6
assert parsed_output[0] == {'type': 'voice', 'voice': 'fable'}
assert parsed_output[1] == {'type': 'text', 'text': 'Hello!'}
assert parsed_output[2] == {'type': 'none', 'text': '[unknown_markup]'}
assert parsed_output[3] == {'type': 'text', 'text': 'This is an unknown markup.'}
assert parsed_output[4] == {'type': 'voice', 'voice': 'nova'}
assert parsed_output[5] == {'type': 'text', 'text': 'Back to a known voice.'}
assert parsed_output[0] == {"type": "voice", "voice": "fable"}
assert parsed_output[1] == {"type": "text", "text": "Hello!"}
assert parsed_output[2] == {"type": "none", "text": "[unknown_markup]"}
assert parsed_output[3] == {
"type": "text",
"text": "This is an unknown markup.",
}
assert parsed_output[4] == {"type": "voice", "voice": "nova"}
assert parsed_output[5] == {"type": "text", "text": "Back to a known voice."}