import os import subprocess import glob import logging from app.config import settings logger = logging.getLogger(__name__) def get_chunks_dir(meeting_id: str) -> str: path = os.path.join(settings.AUDIO_STORAGE_PATH, "chunks", meeting_id) os.makedirs(path, exist_ok=True) return path def save_chunk(chunk_data: bytes, meeting_id: str, chunk_index: int, ext: str = ".webm") -> str: chunks_dir = get_chunks_dir(meeting_id) filename = f"chunk_{chunk_index:06d}{ext}" path = os.path.join(chunks_dir, filename) with open(path, "wb") as f: f.write(chunk_data) return path def assemble_chunks(meeting_id: str) -> str: """ Assemble all chunks into a single WAV file for Whisper processing. Timeslice WebM chunks (from a single continuous MediaRecorder session) form a valid WebM stream when binary-concatenated in order. We concatenate them and then convert to 16kHz mono WAV with ffmpeg. Falls back to ffmpeg concat demuxer if binary concat fails. """ chunks_dir = get_chunks_dir(meeting_id) chunk_files = sorted(glob.glob(os.path.join(chunks_dir, "chunk_*"))) if not chunk_files: raise ValueError(f"No chunks found for meeting {meeting_id}") output_path = os.path.join(settings.AUDIO_STORAGE_PATH, f"{meeting_id}.wav") if len(chunk_files) == 1: result = subprocess.run( ["ffmpeg", "-y", "-i", chunk_files[0], "-ar", "16000", "-ac", "1", "-sample_fmt", "s16", output_path], capture_output=True, text=True, timeout=300, ) if result.returncode != 0: logger.error(f"ffmpeg single-chunk convert failed: {result.stderr}") raise RuntimeError(f"Failed to convert audio chunk: {result.stderr}") else: # Binary-concatenate timeslice WebM chunks (they are sequential fragments # of the same MediaRecorder stream — binary concat = valid WebM stream) raw_concat = os.path.join(chunks_dir, "concat_raw.webm") with open(raw_concat, "wb") as out: for cf in chunk_files: with open(cf, "rb") as inp: out.write(inp.read()) result = subprocess.run( ["ffmpeg", "-y", "-i", raw_concat, "-ar", "16000", "-ac", "1", "-sample_fmt", "s16", output_path], capture_output=True, text=True, timeout=300, ) if result.returncode != 0: logger.warning(f"Binary concat failed ({result.stderr[:200]}), trying concat demuxer...") concat_list = os.path.join(chunks_dir, "concat.txt") with open(concat_list, "w") as f: for cf in chunk_files: f.write(f"file '{cf}'\n") result2 = subprocess.run( ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_list, "-ar", "16000", "-ac", "1", "-sample_fmt", "s16", output_path], capture_output=True, text=True, timeout=300, ) if result2.returncode != 0: logger.error(f"Both assembly methods failed: {result2.stderr}") raise RuntimeError(f"Failed to assemble audio chunks: {result2.stderr}") logger.info(f"Assembled {len(chunk_files)} chunks -> {output_path}") return output_path def cleanup_chunks(meeting_id: str): """Remove chunk files after successful assembly.""" import shutil chunks_dir = get_chunks_dir(meeting_id) if os.path.exists(chunks_dir): shutil.rmtree(chunks_dir)