ghassan a4384113c2 Audio songs: one-folder storage, version-aware download/share, GPU-checked renders
Storage structure
- All audio tracks (primary + per-language) now live in one folder per song with
  unique lowercase names ({slug}-{lang}-{id}); no more tracks/ subfolder.
- Generated renders (download video + HLS) moved into the song's local-only cache/
  subfolder, separated from source files (never synced to NAS, safe to wipe).
- tracks:reorganize artisan command (dry-run default) consolidates legacy files,
  updates DB paths, and deletes orphans + empty folders.
- CLAUDE.md documents the canonical layout as a global rule (identical local + NAS).

Version-aware download & share
- Download MP3/Video and Share now act on the version being played; ?track={id}
  is carried through share links and auto-selects audio + title + flag + about +
  OG/meta on open.

GPU + visualizer
- Setting::gpuUsable() runs a cached health probe (nvidia-smi + nvenc smoke test,
  256x144) before sending any encode to the GPU; falls back to CPU otherwise.
- Visualizer "Download Video" bakes in equal-width, cover-coloured, translucent
  frequency bars; loop-filter rebuild makes generation ~25x faster.

Image cropper
- result-callback mode + per-song cover-slide cropper in upload/edit (modal + mobile).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 14:03:43 +03:00

224 lines
7.3 KiB
PHP

<?php
namespace App\Support;
use DOMDocument;
use DOMElement;
use DOMNode;
/**
* Allowlist-based HTML sanitizer for rich-text descriptions.
*
* Descriptions are authored in a self-hosted contenteditable editor and stored
* as HTML. clean() strips everything not on the allowlist before storage;
* render() produces safe display HTML (and upgrades legacy plain-text values).
*/
class HtmlSanitizer
{
/** Tags allowed in stored description HTML. */
private const ALLOWED_TAGS = [
'p', 'br', 'div', 'span',
'b', 'strong', 'i', 'em', 'u', 's', 'strike',
'h2', 'h3',
'ul', 'ol', 'li',
'blockquote', 'a',
];
/** CSS class values permitted on <a> (button-link styling). */
private const ALLOWED_CLASSES = [
'action-btn', 'action-btn-primary', 'action-btn-danger',
'action-btn-link', 'primary', 'danger', 'icon-only',
];
/** URL schemes permitted in href. */
private const ALLOWED_SCHEMES = ['http', 'https', 'mailto'];
/** Tags removed wholesale, including their text content. */
private const DROP_TAGS = [
'script', 'style', 'iframe', 'object', 'embed', 'form',
'input', 'button', 'textarea', 'select', 'option', 'link', 'meta',
'svg', 'math', 'noscript', 'template',
];
/**
* Sanitize untrusted HTML down to the allowlist, for storage.
*/
public static function clean(?string $html): string
{
$html = trim((string) $html);
if ($html === '') {
return '';
}
// Wrap so DOMDocument has a single root and a known encoding.
$wrapped = '<?xml encoding="UTF-8"><div id="__rte_root__">' . $html . '</div>';
$doc = new DOMDocument();
$libxmlPrev = libxml_use_internal_errors(true);
$doc->loadHTML($wrapped, LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
libxml_use_internal_errors($libxmlPrev);
$root = $doc->getElementById('__rte_root__');
if (!$root) {
return '';
}
self::sanitizeChildren($root);
$out = '';
foreach (iterator_to_array($root->childNodes) as $child) {
$out .= $doc->saveHTML($child);
}
return self::tidy($out);
}
/**
* Produce safe display HTML. Legacy plain-text values (no tags) are
* escaped and converted to <br>; rich values are run through clean().
*/
public static function render(?string $value): string
{
$value = (string) $value;
if (trim($value) === '') {
return '';
}
if (strip_tags($value) === $value) {
return nl2br(e($value), false);
}
return self::clean($value);
}
private static function sanitizeChildren(DOMNode $node): void
{
foreach (iterator_to_array($node->childNodes) as $child) {
if ($child instanceof DOMElement) {
self::sanitizeElement($child);
} elseif ($child->nodeType === XML_COMMENT_NODE) {
$child->parentNode->removeChild($child);
}
// Text nodes are kept as-is (saveHTML re-encodes them safely).
}
}
private static function sanitizeElement(DOMElement $el): void
{
$tag = strtolower($el->nodeName);
if (in_array($tag, self::DROP_TAGS, true)) {
$el->parentNode->removeChild($el);
return;
}
if (!in_array($tag, self::ALLOWED_TAGS, true)) {
// Unwrap: keep sanitized children, drop the disallowed wrapper.
self::sanitizeChildren($el);
$parent = $el->parentNode;
while ($el->firstChild) {
$parent->insertBefore($el->firstChild, $el);
}
$parent->removeChild($el);
return;
}
// Strip every attribute, then re-add only the allowed ones.
$keep = [];
$alignAttr = null;
foreach (iterator_to_array($el->attributes) as $attr) {
$name = strtolower($attr->nodeName);
$val = $attr->nodeValue;
if ($name === 'align') {
$a = strtolower(trim($val));
if (in_array($a, ['left', 'right', 'center', 'justify'], true)) {
$alignAttr = $a;
}
} elseif ($name === 'href' && $tag === 'a') {
$href = self::safeUrl($val);
if ($href !== null) {
$keep['href'] = $href;
}
} elseif ($name === 'class') {
$classes = array_values(array_intersect(
preg_split('/\s+/', trim($val)) ?: [],
self::ALLOWED_CLASSES
));
if ($classes) {
$keep['class'] = implode(' ', $classes);
}
} elseif ($name === 'style') {
$style = self::safeStyle($val);
if ($style !== '') {
$keep['style'] = $style;
}
} elseif ($name === 'target' && $tag === 'a') {
if ($val === '_blank') {
$keep['target'] = '_blank';
}
}
}
// Fold a legacy align="" attribute into the text-align style.
if ($alignAttr !== null && !str_contains($keep['style'] ?? '', 'text-align')) {
$keep['style'] = trim(($keep['style'] ?? '') . ';text-align:' . $alignAttr, ';');
}
while ($el->attributes->length) {
$el->removeAttribute($el->attributes->item(0)->nodeName);
}
foreach ($keep as $name => $val) {
$el->setAttribute($name, $val);
}
// Any link opening a new tab gets rel protection.
if ($tag === 'a' && ($keep['target'] ?? '') === '_blank') {
$el->setAttribute('rel', 'noopener noreferrer');
}
self::sanitizeChildren($el);
}
private static function safeUrl(?string $url): ?string
{
$url = trim((string) $url);
if ($url === '') {
return null;
}
// Reject control chars that could smuggle a scheme.
$stripped = preg_replace('/[\x00-\x20]+/', '', $url);
if (preg_match('/^([a-z][a-z0-9+.\-]*):/i', $stripped, $m)) {
if (!in_array(strtolower($m[1]), self::ALLOWED_SCHEMES, true)) {
return null;
}
}
// Allow relative URLs and fragments/anchors as-is.
return $url;
}
private static function safeStyle(?string $style): string
{
$out = [];
foreach (explode(';', (string) $style) as $decl) {
if (!str_contains($decl, ':')) {
continue;
}
[$prop, $val] = array_map('trim', explode(':', $decl, 2));
$prop = strtolower($prop);
$val = strtolower($val);
if ($prop === 'text-align' && in_array($val, ['left', 'right', 'center', 'justify'], true)) {
$out[] = "text-align:{$val}";
}
}
return implode(';', $out);
}
private static function tidy(string $html): string
{
// DOMDocument can emit empty paragraphs/divs from editor churn.
$html = preg_replace('#<(p|div)>(\s|&nbsp;|<br\s*/?>)*</\1>#i', '', $html);
return trim((string) $html);
}
}