Storage structure
- All audio tracks (primary + per-language) now live in one folder per song with
unique lowercase names ({slug}-{lang}-{id}); no more tracks/ subfolder.
- Generated renders (download video + HLS) moved into the song's local-only cache/
subfolder, separated from source files (never synced to NAS, safe to wipe).
- tracks:reorganize artisan command (dry-run default) consolidates legacy files,
updates DB paths, and deletes orphans + empty folders.
- CLAUDE.md documents the canonical layout as a global rule (identical local + NAS).
Version-aware download & share
- Download MP3/Video and Share now act on the version being played; ?track={id}
is carried through share links and auto-selects audio + title + flag + about +
OG/meta on open.
GPU + visualizer
- Setting::gpuUsable() runs a cached health probe (nvidia-smi + nvenc smoke test,
256x144) before sending any encode to the GPU; falls back to CPU otherwise.
- Visualizer "Download Video" bakes in equal-width, cover-coloured, translucent
frequency bars; loop-filter rebuild makes generation ~25x faster.
Image cropper
- result-callback mode + per-song cover-slide cropper in upload/edit (modal + mobile).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
224 lines
7.3 KiB
PHP
224 lines
7.3 KiB
PHP
<?php
|
|
|
|
namespace App\Support;
|
|
|
|
use DOMDocument;
|
|
use DOMElement;
|
|
use DOMNode;
|
|
|
|
/**
|
|
* Allowlist-based HTML sanitizer for rich-text descriptions.
|
|
*
|
|
* Descriptions are authored in a self-hosted contenteditable editor and stored
|
|
* as HTML. clean() strips everything not on the allowlist before storage;
|
|
* render() produces safe display HTML (and upgrades legacy plain-text values).
|
|
*/
|
|
class HtmlSanitizer
|
|
{
|
|
/** Tags allowed in stored description HTML. */
|
|
private const ALLOWED_TAGS = [
|
|
'p', 'br', 'div', 'span',
|
|
'b', 'strong', 'i', 'em', 'u', 's', 'strike',
|
|
'h2', 'h3',
|
|
'ul', 'ol', 'li',
|
|
'blockquote', 'a',
|
|
];
|
|
|
|
/** CSS class values permitted on <a> (button-link styling). */
|
|
private const ALLOWED_CLASSES = [
|
|
'action-btn', 'action-btn-primary', 'action-btn-danger',
|
|
'action-btn-link', 'primary', 'danger', 'icon-only',
|
|
];
|
|
|
|
/** URL schemes permitted in href. */
|
|
private const ALLOWED_SCHEMES = ['http', 'https', 'mailto'];
|
|
|
|
/** Tags removed wholesale, including their text content. */
|
|
private const DROP_TAGS = [
|
|
'script', 'style', 'iframe', 'object', 'embed', 'form',
|
|
'input', 'button', 'textarea', 'select', 'option', 'link', 'meta',
|
|
'svg', 'math', 'noscript', 'template',
|
|
];
|
|
|
|
/**
|
|
* Sanitize untrusted HTML down to the allowlist, for storage.
|
|
*/
|
|
public static function clean(?string $html): string
|
|
{
|
|
$html = trim((string) $html);
|
|
if ($html === '') {
|
|
return '';
|
|
}
|
|
|
|
// Wrap so DOMDocument has a single root and a known encoding.
|
|
$wrapped = '<?xml encoding="UTF-8"><div id="__rte_root__">' . $html . '</div>';
|
|
|
|
$doc = new DOMDocument();
|
|
$libxmlPrev = libxml_use_internal_errors(true);
|
|
$doc->loadHTML($wrapped, LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
|
libxml_clear_errors();
|
|
libxml_use_internal_errors($libxmlPrev);
|
|
|
|
$root = $doc->getElementById('__rte_root__');
|
|
if (!$root) {
|
|
return '';
|
|
}
|
|
|
|
self::sanitizeChildren($root);
|
|
|
|
$out = '';
|
|
foreach (iterator_to_array($root->childNodes) as $child) {
|
|
$out .= $doc->saveHTML($child);
|
|
}
|
|
|
|
return self::tidy($out);
|
|
}
|
|
|
|
/**
|
|
* Produce safe display HTML. Legacy plain-text values (no tags) are
|
|
* escaped and converted to <br>; rich values are run through clean().
|
|
*/
|
|
public static function render(?string $value): string
|
|
{
|
|
$value = (string) $value;
|
|
if (trim($value) === '') {
|
|
return '';
|
|
}
|
|
|
|
if (strip_tags($value) === $value) {
|
|
return nl2br(e($value), false);
|
|
}
|
|
|
|
return self::clean($value);
|
|
}
|
|
|
|
private static function sanitizeChildren(DOMNode $node): void
|
|
{
|
|
foreach (iterator_to_array($node->childNodes) as $child) {
|
|
if ($child instanceof DOMElement) {
|
|
self::sanitizeElement($child);
|
|
} elseif ($child->nodeType === XML_COMMENT_NODE) {
|
|
$child->parentNode->removeChild($child);
|
|
}
|
|
// Text nodes are kept as-is (saveHTML re-encodes them safely).
|
|
}
|
|
}
|
|
|
|
private static function sanitizeElement(DOMElement $el): void
|
|
{
|
|
$tag = strtolower($el->nodeName);
|
|
|
|
if (in_array($tag, self::DROP_TAGS, true)) {
|
|
$el->parentNode->removeChild($el);
|
|
return;
|
|
}
|
|
|
|
if (!in_array($tag, self::ALLOWED_TAGS, true)) {
|
|
// Unwrap: keep sanitized children, drop the disallowed wrapper.
|
|
self::sanitizeChildren($el);
|
|
$parent = $el->parentNode;
|
|
while ($el->firstChild) {
|
|
$parent->insertBefore($el->firstChild, $el);
|
|
}
|
|
$parent->removeChild($el);
|
|
return;
|
|
}
|
|
|
|
// Strip every attribute, then re-add only the allowed ones.
|
|
$keep = [];
|
|
$alignAttr = null;
|
|
foreach (iterator_to_array($el->attributes) as $attr) {
|
|
$name = strtolower($attr->nodeName);
|
|
$val = $attr->nodeValue;
|
|
|
|
if ($name === 'align') {
|
|
$a = strtolower(trim($val));
|
|
if (in_array($a, ['left', 'right', 'center', 'justify'], true)) {
|
|
$alignAttr = $a;
|
|
}
|
|
} elseif ($name === 'href' && $tag === 'a') {
|
|
$href = self::safeUrl($val);
|
|
if ($href !== null) {
|
|
$keep['href'] = $href;
|
|
}
|
|
} elseif ($name === 'class') {
|
|
$classes = array_values(array_intersect(
|
|
preg_split('/\s+/', trim($val)) ?: [],
|
|
self::ALLOWED_CLASSES
|
|
));
|
|
if ($classes) {
|
|
$keep['class'] = implode(' ', $classes);
|
|
}
|
|
} elseif ($name === 'style') {
|
|
$style = self::safeStyle($val);
|
|
if ($style !== '') {
|
|
$keep['style'] = $style;
|
|
}
|
|
} elseif ($name === 'target' && $tag === 'a') {
|
|
if ($val === '_blank') {
|
|
$keep['target'] = '_blank';
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fold a legacy align="" attribute into the text-align style.
|
|
if ($alignAttr !== null && !str_contains($keep['style'] ?? '', 'text-align')) {
|
|
$keep['style'] = trim(($keep['style'] ?? '') . ';text-align:' . $alignAttr, ';');
|
|
}
|
|
|
|
while ($el->attributes->length) {
|
|
$el->removeAttribute($el->attributes->item(0)->nodeName);
|
|
}
|
|
foreach ($keep as $name => $val) {
|
|
$el->setAttribute($name, $val);
|
|
}
|
|
// Any link opening a new tab gets rel protection.
|
|
if ($tag === 'a' && ($keep['target'] ?? '') === '_blank') {
|
|
$el->setAttribute('rel', 'noopener noreferrer');
|
|
}
|
|
|
|
self::sanitizeChildren($el);
|
|
}
|
|
|
|
private static function safeUrl(?string $url): ?string
|
|
{
|
|
$url = trim((string) $url);
|
|
if ($url === '') {
|
|
return null;
|
|
}
|
|
// Reject control chars that could smuggle a scheme.
|
|
$stripped = preg_replace('/[\x00-\x20]+/', '', $url);
|
|
if (preg_match('/^([a-z][a-z0-9+.\-]*):/i', $stripped, $m)) {
|
|
if (!in_array(strtolower($m[1]), self::ALLOWED_SCHEMES, true)) {
|
|
return null;
|
|
}
|
|
}
|
|
// Allow relative URLs and fragments/anchors as-is.
|
|
return $url;
|
|
}
|
|
|
|
private static function safeStyle(?string $style): string
|
|
{
|
|
$out = [];
|
|
foreach (explode(';', (string) $style) as $decl) {
|
|
if (!str_contains($decl, ':')) {
|
|
continue;
|
|
}
|
|
[$prop, $val] = array_map('trim', explode(':', $decl, 2));
|
|
$prop = strtolower($prop);
|
|
$val = strtolower($val);
|
|
if ($prop === 'text-align' && in_array($val, ['left', 'right', 'center', 'justify'], true)) {
|
|
$out[] = "text-align:{$val}";
|
|
}
|
|
}
|
|
return implode(';', $out);
|
|
}
|
|
|
|
private static function tidy(string $html): string
|
|
{
|
|
// DOMDocument can emit empty paragraphs/divs from editor churn.
|
|
$html = preg_replace('#<(p|div)>(\s| |<br\s*/?>)*</\1>#i', '', $html);
|
|
return trim((string) $html);
|
|
}
|
|
}
|