(button-link styling). */ private const ALLOWED_CLASSES = [ 'action-btn', 'action-btn-primary', 'action-btn-danger', 'action-btn-link', 'primary', 'danger', 'icon-only', ]; /** URL schemes permitted in href. */ private const ALLOWED_SCHEMES = ['http', 'https', 'mailto']; /** Tags removed wholesale, including their text content. */ private const DROP_TAGS = [ 'script', 'style', 'iframe', 'object', 'embed', 'form', 'input', 'button', 'textarea', 'select', 'option', 'link', 'meta', 'svg', 'math', 'noscript', 'template', ]; /** * Sanitize untrusted HTML down to the allowlist, for storage. */ public static function clean(?string $html): string { $html = trim((string) $html); if ($html === '') { return ''; } // Wrap so DOMDocument has a single root and a known encoding. $wrapped = '
' . $html . '
'; $doc = new DOMDocument(); $libxmlPrev = libxml_use_internal_errors(true); $doc->loadHTML($wrapped, LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); libxml_clear_errors(); libxml_use_internal_errors($libxmlPrev); $root = $doc->getElementById('__rte_root__'); if (!$root) { return ''; } self::sanitizeChildren($root); $out = ''; foreach (iterator_to_array($root->childNodes) as $child) { $out .= $doc->saveHTML($child); } return self::tidy($out); } /** * Produce safe display HTML. Legacy plain-text values (no tags) are * escaped and converted to
; rich values are run through clean(). */ public static function render(?string $value): string { $value = (string) $value; if (trim($value) === '') { return ''; } if (strip_tags($value) === $value) { return nl2br(e($value), false); } return self::clean($value); } private static function sanitizeChildren(DOMNode $node): void { foreach (iterator_to_array($node->childNodes) as $child) { if ($child instanceof DOMElement) { self::sanitizeElement($child); } elseif ($child->nodeType === XML_COMMENT_NODE) { $child->parentNode->removeChild($child); } // Text nodes are kept as-is (saveHTML re-encodes them safely). } } private static function sanitizeElement(DOMElement $el): void { $tag = strtolower($el->nodeName); if (in_array($tag, self::DROP_TAGS, true)) { $el->parentNode->removeChild($el); return; } if (!in_array($tag, self::ALLOWED_TAGS, true)) { // Unwrap: keep sanitized children, drop the disallowed wrapper. self::sanitizeChildren($el); $parent = $el->parentNode; while ($el->firstChild) { $parent->insertBefore($el->firstChild, $el); } $parent->removeChild($el); return; } // Strip every attribute, then re-add only the allowed ones. $keep = []; $alignAttr = null; foreach (iterator_to_array($el->attributes) as $attr) { $name = strtolower($attr->nodeName); $val = $attr->nodeValue; if ($name === 'align') { $a = strtolower(trim($val)); if (in_array($a, ['left', 'right', 'center', 'justify'], true)) { $alignAttr = $a; } } elseif ($name === 'href' && $tag === 'a') { $href = self::safeUrl($val); if ($href !== null) { $keep['href'] = $href; } } elseif ($name === 'class') { $classes = array_values(array_intersect( preg_split('/\s+/', trim($val)) ?: [], self::ALLOWED_CLASSES )); if ($classes) { $keep['class'] = implode(' ', $classes); } } elseif ($name === 'style') { $style = self::safeStyle($val); if ($style !== '') { $keep['style'] = $style; } } elseif ($name === 'target' && $tag === 'a') { if ($val === '_blank') { $keep['target'] = '_blank'; } } } // Fold a legacy align="" attribute into the text-align style. if ($alignAttr !== null && !str_contains($keep['style'] ?? '', 'text-align')) { $keep['style'] = trim(($keep['style'] ?? '') . ';text-align:' . $alignAttr, ';'); } while ($el->attributes->length) { $el->removeAttribute($el->attributes->item(0)->nodeName); } foreach ($keep as $name => $val) { $el->setAttribute($name, $val); } // Any link opening a new tab gets rel protection. if ($tag === 'a' && ($keep['target'] ?? '') === '_blank') { $el->setAttribute('rel', 'noopener noreferrer'); } self::sanitizeChildren($el); } private static function safeUrl(?string $url): ?string { $url = trim((string) $url); if ($url === '') { return null; } // Reject control chars that could smuggle a scheme. $stripped = preg_replace('/[\x00-\x20]+/', '', $url); if (preg_match('/^([a-z][a-z0-9+.\-]*):/i', $stripped, $m)) { if (!in_array(strtolower($m[1]), self::ALLOWED_SCHEMES, true)) { return null; } } // Allow relative URLs and fragments/anchors as-is. return $url; } private static function safeStyle(?string $style): string { $out = []; foreach (explode(';', (string) $style) as $decl) { if (!str_contains($decl, ':')) { continue; } [$prop, $val] = array_map('trim', explode(':', $decl, 2)); $prop = strtolower($prop); $val = strtolower($val); if ($prop === 'text-align' && in_array($val, ['left', 'right', 'center', 'justify'], true)) { $out[] = "text-align:{$val}"; } } return implode(';', $out); } private static function tidy(string $html): string { // DOMDocument can emit empty paragraphs/divs from editor churn. $html = preg_replace('#<(p|div)>(\s| |)*#i', '', $html); return trim((string) $html); } }