takeone-youtube-clone/app/Jobs/GenerateLyricsJob.php
ghassan f98e5415a3 Add lyrics pipeline, playlist views, admin toggles, and player polish
Lyrics pipeline (Whisper + Demucs + description alignment):
- New GenerateLyricsJob runs WhisperX with VAD filtering and forced word
  alignment, writes per-track JSON to NAS.
- New DecorateLyricsJob calls the active LLM provider to bake one to
  several emojis into each line (heavy decoration prompt).
- LyricsDescriptionParser strips heading content, section markers, and
  emoji-decoration from a song's description while preserving every
  language verbatim.
- correct_whisper_with_description aligner: strong-match anchors only,
  vocal-region-aware gap-fill so missing verses land on actual singing.
- Owner UI for generate/regenerate/edit/delete in the player gear.

Admin pages:
- /admin/lyrics    toggles for VAD, vocal gap-fill, Demucs, master
- /admin/gpu       extracted GPU section, encoder picker, FFmpeg path
- /admin/backup    extracted users-and-settings export/import
- /admin/settings  now AI/LLM only with provider list and Test button
- /admin/nas-storage hosts NAS settings, repair, disable flow, browser
- Shared partials/settings-styles for a uniform look across pages.

Playlist view tracking:
- Migration adds playlists.view_count and playlist_views dedup table.
- Playlist::bumpViewIfNew increments per device with a one-hour window.
- Tracked from /playlists/{id}, /playlists/share/{token}, /ps/{token},
  and /videos/{id}?playlist={token}.  Dispatched after-response so it
  never blocks the page render.
- Loading a playlist on the video page now runs one query instead of
  the four the old getNextVideo/getPreviousVideo path triggered.
- View counts shown on every playlist card and the playlist hero.

Player polish:
- Floating mini-player is draggable, persists its position in
  localStorage, clamps to viewport on resize.
- Mini disabled entirely on mobile (less than 768px).
- New gear-menu Mini Player toggle (persists in localStorage) lets the
  user disable both scroll-activation and SPA-nav-activation.
- Close button keeps media playing when used on the player's own page.
- SPA navigator now swaps a #page-scripts container so per-page JS
  (channel tabs, etc.) gets re-executed after content swaps.

Storage layout:
- Runtime data moved from /storage/* to /data/* and gitignored.
- /ml/venv, /ml/cache, /ml/__pycache__ excluded.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-31 22:01:47 +03:00

245 lines
11 KiB
PHP

<?php
namespace App\Jobs;
use App\Models\Setting;
use App\Models\Video;
use App\Models\VideoAudioTrack;
use App\Services\NasSyncService;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Process;
/**
* Generate word-level synced lyrics for one audio track (the video's primary
* audio when $trackId is null, otherwise a specific extra-language track).
*
* Output is a per-track lyrics JSON written through NasSyncService::putLyrics()
* — source-of-truth, synced to NAS, never under cache/. Runs the GPU pipeline
* exactly once; playback just loads the file afterwards.
*/
class GenerateLyricsJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public int $timeout = 3600;
public int $tries = 1;
public function __construct(public int $videoId, public ?int $trackId = null)
{
$this->onQueue('video-processing');
}
/** Shared progress-file path (written by the pipeline, read by the status endpoint). */
public static function progressPath(int $videoId, ?int $trackId): string
{
return storage_path('app/tmp/lyrics_prog_' . $videoId . '_' . ($trackId ?? 'primary') . '.json');
}
/** Index of the GPU with the most free memory, or null if it can't be queried. */
private function freestGpu(): ?int
{
$out = []; $code = 1;
@exec('nvidia-smi --query-gpu=index,memory.free --format=csv,noheader,nounits 2>/dev/null', $out, $code);
if ($code !== 0 || empty($out)) return null;
$best = null; $bestFree = -1;
foreach ($out as $line) {
$parts = array_map('trim', explode(',', $line));
if (count($parts) < 2) continue;
$idx = (int) $parts[0]; $free = (int) $parts[1];
if ($free > $bestFree) { $bestFree = $free; $best = $idx; }
}
return $best;
}
public function handle(NasSyncService $nas): void
{
$video = Video::find($this->videoId);
if (! $video) return;
$track = $this->trackId ? VideoAudioTrack::find($this->trackId) : null;
if ($this->trackId && ! $track) return;
$language = $track ? $track->language : $video->language;
// Mark as processing so the UI can show a generating state before the file lands.
$nas->putLyrics($video, $track, [
'version' => 1,
'status' => 'processing',
'language' => $language,
]);
// Resolve a readable local copy of the audio (downloads from NAS if needed).
$audioPath = $track ? $nas->ensureLocalTrackCopy($track) : $nas->ensureLocalCopy($video);
$nasDownloaded = $audioPath && str_starts_with($audioPath, storage_path('app/nas_cache/'))
? $audioPath : null;
if (! $audioPath || ! file_exists($audioPath)) {
Log::error('GenerateLyricsJob: audio file unavailable', [
'video_id' => $this->videoId, 'track_id' => $this->trackId,
]);
$nas->putLyrics($video, $track, [
'version' => 1, 'status' => 'failed', 'language' => $language,
'error' => 'audio file unavailable',
]);
return;
}
$python = base_path('ml/venv/bin/python');
$script = base_path('ml/transcribe.py');
$outTmp = storage_path('app/tmp/lyrics_' . $this->videoId . '_' . ($this->trackId ?? 'primary') . '.json');
$progress = self::progressPath($this->videoId, $this->trackId);
if (! is_dir(dirname($outTmp))) @mkdir(dirname($outTmp), 0775, true);
@file_put_contents($progress, json_encode(['status' => 'processing', 'pct' => 1, 'stage' => 'Queued']));
// Model/weight downloads land in a www-data-writable cache, not root's $HOME.
$cacheDir = base_path('ml/cache');
if (! is_dir($cacheDir)) @mkdir($cacheDir, 0775, true);
// NOTE: we deliberately do NOT force --language. The stored label is just
// metadata and is often wrong (e.g. a Tagalog song mislabeled "en"), which
// made WhisperX transcribe the wrong language. Auto-detecting from the
// isolated vocals is ground truth; the detected language is saved instead.
$args = [$python, $script, '--audio', $audioPath, '--out', $outTmp, '--progress', $progress];
// Pipeline feature toggles (admin → Lyrics Pipeline). Defaults preserve
// current behavior; admin can disable any sub-step that misbehaves.
$useDescription = Setting::get('lyrics_use_description', 'true') === 'true';
$vadEnabled = Setting::get('lyrics_vad_enabled', 'true') === 'true';
$vocalGapFill = Setting::get('lyrics_vocal_region_gapfill', 'true') === 'true';
$demucsEnabled = Setting::get('lyrics_demucs_enabled', 'false') === 'true';
if (! $vadEnabled) $args[] = '--no-vad';
if (! $vocalGapFill) $args[] = '--no-vocal-gapfill';
// If the song's description contains the lyrics (typed by the uploader),
// pass them to the pipeline so it ALIGNS those exact lines to the audio
// instead of generating noisier text from scratch. Only for the primary
// track — extra-language tracks have their own audio and aren't paired
// with the description text.
$userLyrFile = null;
if ($useDescription && ! $this->trackId && $video->description) {
// Prefer the deterministic regex parser. It strips emojis line-by-line
// without touching the underlying words, so it preserves every
// language a multilingual song contains (e.g. an English+Thai song
// keeps both halves). The LLM cleaner is only a backup for cases
// where the regex returns nothing — we've seen the LLM silently
// drop whole verses that happened to be wrapped in emoji decoration.
$descLines = \App\Support\LyricsDescriptionParser::extract($video->description);
$source = 'regex';
if (empty($descLines)) {
$llm = app(\App\Services\LlmLyricsService::class);
if ($llm->cleanLyricsEnabled()) {
try {
$descLines = $llm->cleanDescription($video->description);
$source = 'llm';
} catch (\Throwable $e) {
Log::warning('LLM clean failed: ' . $e->getMessage());
}
}
}
if ($descLines) {
$userLyrFile = storage_path('app/tmp/userlyr_' . $this->videoId . '.txt');
file_put_contents($userLyrFile, implode("\n", $descLines));
$args[] = '--user-lyrics';
$args[] = $userLyrFile;
// With description lyrics, Whisper is only providing word-timing
// anchors — its actual transcription text is discarded by the
// aligner. Vocal isolation (Demucs) helps transcription QUALITY
// but is unnecessary for timing, AND the Demucs→Whisper CUDA-
// context handoff has caused intermittent 50% futex deadlocks.
// So we skip Demucs in this mode by default; the admin can
// re-enable via the Lyrics Pipeline page.
$args[] = '--no-demucs';
Log::info('GenerateLyricsJob: using description lyrics', [
'video_id' => $this->videoId, 'lines' => count($descLines),
'source' => $source, 'demucs' => false,
'vad' => $vadEnabled, 'vocal_gapfill' => $vocalGapFill,
]);
}
}
// Honor the admin Demucs toggle for tracks WITHOUT description lyrics
// (where Whisper's transcription quality actually matters).
if (! $userLyrFile && ! $demucsEnabled) {
$args[] = '--no-demucs';
}
if (Setting::gpuUsable()) {
// Run on the GPU with the most free VRAM so a busy card never forces an
// out-of-memory fall back to slow CPU. With two cards this keeps every
// generation on the GPU and fast.
$args[] = '--gpu';
$args[] = (string) ($this->freestGpu() ?? Setting::gpuDevice());
}
Log::info('GenerateLyricsJob: starting', [
'video_id' => $this->videoId, 'track_id' => $this->trackId,
'language' => $language, 'gpu' => Setting::gpuUsable(),
]);
try {
$result = Process::timeout($this->timeout)
->env([
'HOME' => $cacheDir,
'XDG_CACHE_HOME' => $cacheDir,
'HF_HOME' => $cacheDir . '/huggingface',
'TORCH_HOME' => $cacheDir . '/torch',
// Demucs runs as a subprocess BEFORE faster-whisper is imported.
// If OpenMP gets initialised in the parent before that fork, the
// post-fork CUDA/ctranslate2 stack can deadlock in futex_wait —
// we've seen this hang lyrics jobs at 50% indefinitely. Forcing
// single-threaded OpenMP in the parent eliminates the race
// (faster-whisper sets its own thread count internally anyway).
'OMP_NUM_THREADS' => '1',
'MKL_NUM_THREADS' => '1',
'OPENBLAS_NUM_THREADS' => '1',
])
->run($args);
if (! $result->successful() || ! file_exists($outTmp)) {
throw new \RuntimeException('transcribe.py failed: ' . substr($result->errorOutput(), -2000));
}
$data = json_decode((string) file_get_contents($outTmp), true);
if (! is_array($data) || empty($data['lines'])) {
throw new \RuntimeException('transcribe.py produced no lines');
}
$data['status'] = 'ready';
$data['generated_at'] = now()->toIso8601String();
$data['language'] = $data['language'] ?? $language;
$nas->putLyrics($video, $track, $data);
// Decoration is independent of the audio pipeline — kick it off as
// its own job so a flaky LLM call can't delay or fail a successful
// transcription. Skips itself silently if the decorator is off.
DecorateLyricsJob::dispatch($this->videoId, $this->trackId)
->onConnection('database');
Log::info('GenerateLyricsJob: done', [
'video_id' => $this->videoId, 'track_id' => $this->trackId,
'lines' => count($data['lines']),
]);
} catch (\Throwable $e) {
Log::error('GenerateLyricsJob failed: ' . $e->getMessage(), [
'video_id' => $this->videoId, 'track_id' => $this->trackId,
]);
$nas->putLyrics($video, $track, [
'version' => 1, 'status' => 'failed', 'language' => $language,
'error' => $e->getMessage(),
]);
} finally {
@unlink($outTmp);
@unlink($progress);
if ($userLyrFile) @unlink($userLyrFile);
if ($nasDownloaded) @unlink($nasDownloaded);
}
}
}