onQueue('video-processing'); } /** Shared progress-file path (written by the pipeline, read by the status endpoint). */ public static function progressPath(int $videoId, ?int $trackId): string { return storage_path('app/tmp/lyrics_prog_' . $videoId . '_' . ($trackId ?? 'primary') . '.json'); } /** Index of the GPU with the most free memory, or null if it can't be queried. */ private function freestGpu(): ?int { $out = []; $code = 1; @exec('nvidia-smi --query-gpu=index,memory.free --format=csv,noheader,nounits 2>/dev/null', $out, $code); if ($code !== 0 || empty($out)) return null; $best = null; $bestFree = -1; foreach ($out as $line) { $parts = array_map('trim', explode(',', $line)); if (count($parts) < 2) continue; $idx = (int) $parts[0]; $free = (int) $parts[1]; if ($free > $bestFree) { $bestFree = $free; $best = $idx; } } return $best; } public function handle(NasSyncService $nas): void { $video = Video::find($this->videoId); if (! $video) return; $track = $this->trackId ? VideoAudioTrack::find($this->trackId) : null; if ($this->trackId && ! $track) return; $language = $track ? $track->language : $video->language; // Mark as processing so the UI can show a generating state before the file lands. $nas->putLyrics($video, $track, [ 'version' => 1, 'status' => 'processing', 'language' => $language, ]); // Resolve a readable local copy of the audio (downloads from NAS if needed). $audioPath = $track ? $nas->ensureLocalTrackCopy($track) : $nas->ensureLocalCopy($video); $nasDownloaded = $audioPath && str_starts_with($audioPath, storage_path('app/nas_cache/')) ? $audioPath : null; if (! $audioPath || ! file_exists($audioPath)) { Log::error('GenerateLyricsJob: audio file unavailable', [ 'video_id' => $this->videoId, 'track_id' => $this->trackId, ]); $nas->putLyrics($video, $track, [ 'version' => 1, 'status' => 'failed', 'language' => $language, 'error' => 'audio file unavailable', ]); return; } $python = base_path('ml/venv/bin/python'); $script = base_path('ml/transcribe.py'); $outTmp = storage_path('app/tmp/lyrics_' . $this->videoId . '_' . ($this->trackId ?? 'primary') . '.json'); $progress = self::progressPath($this->videoId, $this->trackId); if (! is_dir(dirname($outTmp))) @mkdir(dirname($outTmp), 0775, true); @file_put_contents($progress, json_encode(['status' => 'processing', 'pct' => 1, 'stage' => 'Queued'])); // Model/weight downloads land in a www-data-writable cache, not root's $HOME. $cacheDir = base_path('ml/cache'); if (! is_dir($cacheDir)) @mkdir($cacheDir, 0775, true); // NOTE: we deliberately do NOT force --language. The stored label is just // metadata and is often wrong (e.g. a Tagalog song mislabeled "en"), which // made WhisperX transcribe the wrong language. Auto-detecting from the // isolated vocals is ground truth; the detected language is saved instead. $args = [$python, $script, '--audio', $audioPath, '--out', $outTmp, '--progress', $progress]; // Pipeline feature toggles (admin → Lyrics Pipeline). Defaults preserve // current behavior; admin can disable any sub-step that misbehaves. $useDescription = Setting::get('lyrics_use_description', 'true') === 'true'; $vadEnabled = Setting::get('lyrics_vad_enabled', 'true') === 'true'; $vocalGapFill = Setting::get('lyrics_vocal_region_gapfill', 'true') === 'true'; $demucsEnabled = Setting::get('lyrics_demucs_enabled', 'false') === 'true'; if (! $vadEnabled) $args[] = '--no-vad'; if (! $vocalGapFill) $args[] = '--no-vocal-gapfill'; // If the song's description contains the lyrics (typed by the uploader), // pass them to the pipeline so it ALIGNS those exact lines to the audio // instead of generating noisier text from scratch. Only for the primary // track — extra-language tracks have their own audio and aren't paired // with the description text. $userLyrFile = null; if ($useDescription && ! $this->trackId && $video->description) { // Prefer the deterministic regex parser. It strips emojis line-by-line // without touching the underlying words, so it preserves every // language a multilingual song contains (e.g. an English+Thai song // keeps both halves). The LLM cleaner is only a backup for cases // where the regex returns nothing — we've seen the LLM silently // drop whole verses that happened to be wrapped in emoji decoration. $descLines = \App\Support\LyricsDescriptionParser::extract($video->description); $source = 'regex'; if (empty($descLines)) { $llm = app(\App\Services\LlmLyricsService::class); if ($llm->cleanLyricsEnabled()) { try { $descLines = $llm->cleanDescription($video->description); $source = 'llm'; } catch (\Throwable $e) { Log::warning('LLM clean failed: ' . $e->getMessage()); } } } if ($descLines) { $userLyrFile = storage_path('app/tmp/userlyr_' . $this->videoId . '.txt'); file_put_contents($userLyrFile, implode("\n", $descLines)); $args[] = '--user-lyrics'; $args[] = $userLyrFile; // With description lyrics, Whisper is only providing word-timing // anchors — its actual transcription text is discarded by the // aligner. Vocal isolation (Demucs) helps transcription QUALITY // but is unnecessary for timing, AND the Demucs→Whisper CUDA- // context handoff has caused intermittent 50% futex deadlocks. // So we skip Demucs in this mode by default; the admin can // re-enable via the Lyrics Pipeline page. $args[] = '--no-demucs'; Log::info('GenerateLyricsJob: using description lyrics', [ 'video_id' => $this->videoId, 'lines' => count($descLines), 'source' => $source, 'demucs' => false, 'vad' => $vadEnabled, 'vocal_gapfill' => $vocalGapFill, ]); } } // Honor the admin Demucs toggle for tracks WITHOUT description lyrics // (where Whisper's transcription quality actually matters). if (! $userLyrFile && ! $demucsEnabled) { $args[] = '--no-demucs'; } if (Setting::gpuUsable()) { // Run on the GPU with the most free VRAM so a busy card never forces an // out-of-memory fall back to slow CPU. With two cards this keeps every // generation on the GPU and fast. $args[] = '--gpu'; $args[] = (string) ($this->freestGpu() ?? Setting::gpuDevice()); } Log::info('GenerateLyricsJob: starting', [ 'video_id' => $this->videoId, 'track_id' => $this->trackId, 'language' => $language, 'gpu' => Setting::gpuUsable(), ]); try { $result = Process::timeout($this->timeout) ->env([ 'HOME' => $cacheDir, 'XDG_CACHE_HOME' => $cacheDir, 'HF_HOME' => $cacheDir . '/huggingface', 'TORCH_HOME' => $cacheDir . '/torch', // Demucs runs as a subprocess BEFORE faster-whisper is imported. // If OpenMP gets initialised in the parent before that fork, the // post-fork CUDA/ctranslate2 stack can deadlock in futex_wait — // we've seen this hang lyrics jobs at 50% indefinitely. Forcing // single-threaded OpenMP in the parent eliminates the race // (faster-whisper sets its own thread count internally anyway). 'OMP_NUM_THREADS' => '1', 'MKL_NUM_THREADS' => '1', 'OPENBLAS_NUM_THREADS' => '1', ]) ->run($args); if (! $result->successful() || ! file_exists($outTmp)) { throw new \RuntimeException('transcribe.py failed: ' . substr($result->errorOutput(), -2000)); } $data = json_decode((string) file_get_contents($outTmp), true); if (! is_array($data) || empty($data['lines'])) { throw new \RuntimeException('transcribe.py produced no lines'); } $data['status'] = 'ready'; $data['generated_at'] = now()->toIso8601String(); $data['language'] = $data['language'] ?? $language; $nas->putLyrics($video, $track, $data); // Decoration is independent of the audio pipeline — kick it off as // its own job so a flaky LLM call can't delay or fail a successful // transcription. Skips itself silently if the decorator is off. DecorateLyricsJob::dispatch($this->videoId, $this->trackId) ->onConnection('database'); Log::info('GenerateLyricsJob: done', [ 'video_id' => $this->videoId, 'track_id' => $this->trackId, 'lines' => count($data['lines']), ]); } catch (\Throwable $e) { Log::error('GenerateLyricsJob failed: ' . $e->getMessage(), [ 'video_id' => $this->videoId, 'track_id' => $this->trackId, ]); $nas->putLyrics($video, $track, [ 'version' => 1, 'status' => 'failed', 'language' => $language, 'error' => $e->getMessage(), ]); } finally { @unlink($outTmp); @unlink($progress); if ($userLyrFile) @unlink($userLyrFile); if ($nasDownloaded) @unlink($nasDownloaded); } } }