Lyrics pipeline (Whisper + Demucs + description alignment):
- New GenerateLyricsJob runs WhisperX with VAD filtering and forced word
alignment, writes per-track JSON to NAS.
- New DecorateLyricsJob calls the active LLM provider to bake one to
several emojis into each line (heavy decoration prompt).
- LyricsDescriptionParser strips heading content, section markers, and
emoji-decoration from a song's description while preserving every
language verbatim.
- correct_whisper_with_description aligner: strong-match anchors only,
vocal-region-aware gap-fill so missing verses land on actual singing.
- Owner UI for generate/regenerate/edit/delete in the player gear.
Admin pages:
- /admin/lyrics toggles for VAD, vocal gap-fill, Demucs, master
- /admin/gpu extracted GPU section, encoder picker, FFmpeg path
- /admin/backup extracted users-and-settings export/import
- /admin/settings now AI/LLM only with provider list and Test button
- /admin/nas-storage hosts NAS settings, repair, disable flow, browser
- Shared partials/settings-styles for a uniform look across pages.
Playlist view tracking:
- Migration adds playlists.view_count and playlist_views dedup table.
- Playlist::bumpViewIfNew increments per device with a one-hour window.
- Tracked from /playlists/{id}, /playlists/share/{token}, /ps/{token},
and /videos/{id}?playlist={token}. Dispatched after-response so it
never blocks the page render.
- Loading a playlist on the video page now runs one query instead of
the four the old getNextVideo/getPreviousVideo path triggered.
- View counts shown on every playlist card and the playlist hero.
Player polish:
- Floating mini-player is draggable, persists its position in
localStorage, clamps to viewport on resize.
- Mini disabled entirely on mobile (less than 768px).
- New gear-menu Mini Player toggle (persists in localStorage) lets the
user disable both scroll-activation and SPA-nav-activation.
- Close button keeps media playing when used on the player's own page.
- SPA navigator now swaps a #page-scripts container so per-page JS
(channel tabs, etc.) gets re-executed after content swaps.
Storage layout:
- Runtime data moved from /storage/* to /data/* and gitignored.
- /ml/venv, /ml/cache, /ml/__pycache__ excluded.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
145 lines
6.4 KiB
PHP
145 lines
6.4 KiB
PHP
<?php
|
||
|
||
namespace App\Support;
|
||
|
||
/**
|
||
* Extract clean lyric lines from a video's free-text description.
|
||
*
|
||
* Many users paste the song's lyrics into the description with section markers,
|
||
* emoji decorations, instrument tags, etc. When that's present we want to USE
|
||
* those exact lines (and let the pipeline only do the *sync*, not the
|
||
* transcription), because they're far more accurate than anything Whisper can
|
||
* derive from sung audio.
|
||
*
|
||
* Returns an empty array when no usable lyric block is found.
|
||
*/
|
||
class LyricsDescriptionParser
|
||
{
|
||
/** Heuristic threshold: descriptions with fewer cleaned lines aren't worth aligning. */
|
||
private const MIN_LYRIC_LINES = 4;
|
||
|
||
public static function extract(?string $desc): array
|
||
{
|
||
if (! $desc) return [];
|
||
|
||
// HTML descriptions use <br> for line breaks — convert those (and other
|
||
// block-ending tags) into real newlines BEFORE stripping tags, otherwise
|
||
// the entire body collapses into one long run-on line.
|
||
// Heading tags (<h1>…<h6>) carry the song title — drop their content
|
||
// entirely so the title never leaks into the lyric list.
|
||
$text = preg_replace('/<\s*h[1-6][^>]*>.*?<\s*\/\s*h[1-6]\s*>/isu', "\n", $desc);
|
||
$text = preg_replace('/<\s*br\s*\/?>/i', "\n", $text);
|
||
$text = preg_replace('/<\s*\/\s*(p|div|li|tr|blockquote)\s*>/i', "\n", $text);
|
||
$text = strip_tags($text);
|
||
// Decode HTML entities ( , &, etc.) so the comparison later isn't fooled.
|
||
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||
$text = preg_replace('/\r\n|\r/u', "\n", $text);
|
||
|
||
// First pass: clean each line and flag section headers (Verse / Ritornello
|
||
// / Bridge / etc.) so they can be dropped — those aren't sung.
|
||
$cleaned = [];
|
||
foreach (explode("\n", $text) as $line) {
|
||
$line = self::cleanLine($line);
|
||
if ($line === null) continue;
|
||
$cleaned[] = ['text' => $line, 'header' => self::isSectionHeader($line)];
|
||
}
|
||
|
||
// Title detection: if the first non-header line is immediately followed by
|
||
// a section header (e.g. "Figlio Mio — Viaggio di Vita" then "Verso 1"),
|
||
// that first line is the song title — drop it too.
|
||
$firstIdx = null;
|
||
foreach ($cleaned as $i => $c) {
|
||
if (! $c['header']) { $firstIdx = $i; break; }
|
||
}
|
||
$dropTitle = false;
|
||
if ($firstIdx !== null) {
|
||
for ($j = $firstIdx + 1; $j < count($cleaned); $j++) {
|
||
if ($cleaned[$j]['header']) { $dropTitle = true; break; }
|
||
break; // first thing after is a real lyric line → not a title block
|
||
}
|
||
}
|
||
|
||
$out = [];
|
||
foreach ($cleaned as $i => $c) {
|
||
if ($c['header']) continue;
|
||
if ($dropTitle && $i === $firstIdx) continue;
|
||
$out[] = $c['text'];
|
||
}
|
||
|
||
// Avoid mistakenly aligning non-lyric descriptions (a credit line, a URL,
|
||
// etc.). Require at least a handful of plausible lyric lines.
|
||
if (count($out) < self::MIN_LYRIC_LINES) return [];
|
||
|
||
return $out;
|
||
}
|
||
|
||
/**
|
||
* True when a line is a section marker (Verse / Chorus / Bridge / Outro /
|
||
* their many translations) rather than a sung lyric. Matches the WHOLE line
|
||
* so a real lyric containing one of these words isn't mistakenly dropped.
|
||
*/
|
||
private static function isSectionHeader(string $line): bool
|
||
{
|
||
$t = mb_strtolower(trim($line));
|
||
if ($t === '') return false;
|
||
|
||
$roots = [
|
||
'intro', 'outro', 'interlude', 'instrumental',
|
||
'verse', 'verso', 'verset', 'couplet', 'estrofa', 'strofa',
|
||
'chorus', 'ritornello', 'refrain', 'refrão', 'refrao', 'coro', 'estribillo',
|
||
'pre[\s\-]?chorus', 'pre[\s\-]?ritornello', 'pre[\s\-]?coro',
|
||
'pre[\s\-]?refrain', 'pre[\s\-]?refrão', 'pré[\s\-]?refrain',
|
||
'bridge', 'ponte', 'puente', 'pont', 'brücke', 'brucke',
|
||
'hook', 'drop', 'breakdown', 'tag', 'vamp', 'coda', 'reprise',
|
||
// CJK / Thai / Arabic / Korean
|
||
'サビ', 'コーラス', 'バース', 'ブリッジ', 'イントロ', 'アウトロ', 'フック', '間奏',
|
||
'ท่อน', 'คอรัส', 'ฮุก', 'บริดจ์', 'อินโทร', 'เอาท์โทร',
|
||
'前奏', '副歌', '桥段', '主歌', '尾奏',
|
||
'كورس', 'بريدج', 'كوبليه',
|
||
'후렴', '브릿지', '인트로', '아웃트로', '훅',
|
||
];
|
||
$rootRe = implode('|', $roots);
|
||
// Optional trailing number, "final/finale/reprise", roman numerals.
|
||
$pattern = '/^(?:' . $rootRe . ')[\s\d:\-—\.]*(?:final|finale|reprise|ii|iii|iv|v|vi|2|3|4|5)?\s*$/iu';
|
||
return (bool) preg_match($pattern, $t);
|
||
}
|
||
|
||
/** Returns the cleaned line, or null if it should be discarded. */
|
||
private static function cleanLine(string $line): ?string
|
||
{
|
||
$line = trim($line);
|
||
if ($line === '') return null;
|
||
|
||
// Strip markdown emphasis (* _ ~) and leading list bullets / quote markers.
|
||
$line = preg_replace('/^[\s>\-\*•♪♫·]+/u', '', $line);
|
||
$line = preg_replace('/[\*_~`]+/u', '', $line);
|
||
|
||
// Drop instrument / section annotations inside Japanese-style brackets:
|
||
// 【 箏・尺八・篠笛・優しい歌声 】 — these aren't lyrics.
|
||
$line = preg_replace('/【[^】]*】/u', '', $line);
|
||
$line = preg_replace('/〔[^〕]*〕/u', '', $line);
|
||
$line = preg_replace('/\[\[[^\]]*\]\]/u', '', $line);
|
||
|
||
// Strip emoji / pictographic symbols and the invisible glue that often
|
||
// sticks to them (variation selectors, ZWJ) so nothing leaves behind a
|
||
// bare diacritic when the visible emoji is removed.
|
||
$line = preg_replace(
|
||
'/[\x{1F000}-\x{1FFFF}\x{2600}-\x{27BF}\x{2B00}-\x{2BFF}\x{0F3A}-\x{0F3D}\x{FE00}-\x{FE0F}\x{200B}-\x{200F}\x{2060}]/u',
|
||
'', $line
|
||
);
|
||
|
||
// Collapse internal whitespace.
|
||
$line = preg_replace('/\s+/u', ' ', $line);
|
||
$line = trim($line);
|
||
|
||
if ($line === '') return null;
|
||
|
||
// Must contain at least one letter (Unicode), and at least 3 characters
|
||
// after stripping — discards "🌸 平穏 🌸" (header) and "──" separators.
|
||
if (! preg_match('/\p{L}/u', $line)) return null;
|
||
if (mb_strlen($line) < 3) return null;
|
||
|
||
return $line;
|
||
}
|
||
}
|