import logging import re from pathlib import Path from pydub import AudioSegment, silence from openai import OpenAI import time def get_api_key() -> str: try: with open('apikey.secret') as f: api_key = f.read().strip() if api_key == '': raise ValueError('API key not found. Please provide your API key in the file \'apikey.secret\'.') return api_key except FileNotFoundError: raise ValueError('Couldn\'t read API key from file \'apikey.secret\'. Does it exist?') class AudioGenerator: def __init__(self, parsed_data, output_file, default_silence=650, ai_provider="openai", api_key=None): self.parsed_data = parsed_data self.output_file = output_file self.default_silence = default_silence self.sections = {} self.current_section = None if not api_key: api_key = get_api_key() match ai_provider: case "openai": self.client = OpenAI(api_key=api_key) case "zuki": self.client = OpenAI(base_url="https://zukijourney.xyzbot.net/v1", api_key=api_key) case _: raise ValueError(f"Unsupported AI provider: {ai_provider}") def validate_voices(self): """Check if all voices in the parsed data are valid.""" valid_voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'] invalid_voices = set() for item in self.parsed_data: if item['type'] == 'voice' and item['voice'] not in valid_voices: invalid_voices.add(item['voice']) if invalid_voices: raise ValueError(f"Invalid voice(s) found: {', '.join(invalid_voices)}") print("All voices are valid.") def validate_sections(self): """Check if all sections used are defined beforehand.""" used_sections = set() defined_sections = set() section_errors = [] for item in self.parsed_data: if item['type'] == 'section_start': defined_sections.add(item['section_id']) elif item['type'] == 'insert_section': section_id = item['section_id'] if section_id not in defined_sections: section_errors.append(f"Section {section_id} is used before being defined.") used_sections.add(item['section_id']) undefined_sections = used_sections - defined_sections if undefined_sections or len(section_errors) > 0: raise ValueError(f"Section Validation Errors:\n {'\n '.join(section_errors)}\n\nUndefined section(s) used: {', '.join(map(str, undefined_sections))}") print("All sections are properly defined.") def text_to_speech(self, text, voice): """Generate speech using OpenAI's voice API with retry logic.""" print(f"Voice {voice} chosen") print(f"TTS: {text[:50]}...") temp_path = Path("temp_speech.mp3") attempts = 0 success = False while not success: try: response = self.client.audio.speech.create( model="tts-1", voice=voice, input=text, ) response.write_to_file(str(temp_path)) success = True return AudioSegment.from_mp3(temp_path) except Exception as e: print(f"Failed to generate TTS: {e}") attempts += 1 if attempts >= 3: user_decision = input("Retry TTS generation? (yes/no): ").strip().lower() if user_decision.lower() in ['y', 'yes']: attempts = 0 # Reset attempts for another round of retries else: print("Exiting due to TTS generation failure.") exit(1) else: print("Retrying...") time.sleep(1) # Wait a bit before retrying to avoid hammering the API too quickly def generate_audio(self): self.validate_voices() self.validate_sections() combined_audio = AudioSegment.empty() current_voice = None for item in self.parsed_data: if item['type'] == 'voice': current_voice = item['voice'] elif item['type'] == 'text': if not current_voice: raise ValueError("First text segment before voice was selected!") audio_segment = self.text_to_speech(item['text'], current_voice) combined_audio += audio_segment if self.default_silence > 0: combined_audio += AudioSegment.silent(duration=self.default_silence) if self.current_section is not None: self.sections[self.current_section] += audio_segment elif item['type'] == 'silence': combined_audio += AudioSegment.silent(duration=item['duration']) if self.current_section is not None: self.sections[self.current_section] += AudioSegment.silent(duration=item['duration']) elif item['type'] == 'section_start': self.current_section = item['section_id'] self.sections[self.current_section] = AudioSegment.empty() elif item['type'] == 'section_end': self.current_section = None elif item['type'] == 'insert_section': section_id = item['section_id'] if section_id in self.sections: combined_audio += self.sections[section_id] else: raise ValueError(f"Section {section_id} not found!") combined_audio.export(self.output_file, format="mp3") # Example usage