diff options
| author | Philipp Schüttlöffel <schuettloeffel@zqs.uni-hannover.de> | 2024-09-24 10:53:31 +0200 |
|---|---|---|
| committer | Philipp Schüttlöffel <schuettloeffel@zqs.uni-hannover.de> | 2024-09-24 10:53:31 +0200 |
| commit | 4459dd7917f4d1c34f40bb68f0e991e9c3d53e4c (patch) | |
| tree | 5c07151ae61276d334e88f6309c30d439a85c12e /lib/classes/Markup.class.php | |
| parent | da0022e5c1abbf9825ae76debaabdff7e8623bb4 (diff) | |
| parent | 97a188592c679890a25c37ab78463add76a52ff7 (diff) | |
Merge branch 'main' into issue-3911issue-3911
Diffstat (limited to 'lib/classes/Markup.class.php')
| -rw-r--r-- | lib/classes/Markup.class.php | 787 |
1 files changed, 0 insertions, 787 deletions
diff --git a/lib/classes/Markup.class.php b/lib/classes/Markup.class.php deleted file mode 100644 index da040fa..0000000 --- a/lib/classes/Markup.class.php +++ /dev/null @@ -1,787 +0,0 @@ -<?php -/** - * Markup.class.php - Handling of Stud.IP- and HTML-markup. - ** - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * @category Stud.IP - * @copyright (c) 2014 Stud.IP e.V. - * @license http://www.gnu.org/licenses/gpl-2.0.html GPL version 2 - * @since File available since Release 3.0 - * @author Robert Costa <rcosta@uos.de> - */ -namespace Studip; - -require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_ClassifyLinks.php'; -require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_ClassifyTables.php'; -require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_LinkifyEmail.php'; -require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_TransformLinks.php'; -require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_Unlinkify.php'; - -class Markup -{ - /** - * Apply markup rules and clean the text up. - * - * @param TextFormat $markup Markup rules applied on marked-up text. - * @param string $text Marked-up text on which rules are applied. - * @param boolean $trim Trim text before applying markup rules, if TRUE. - * - * @return string HTML code computed from marked-up text. - */ - public static function apply($markup, $text, $trim) - { - return $markup->format(self::markupToHtml($text, $trim, false)); - } - - // signature for HTML entries - const HTML_MARKER = '<!--HTML-->'; - - // signature for HTML fallback entries - const HTML_MARKER_FALLBACK = '<!-- HTML: Insert text after this line only. -->'; - - // regular expression for detecting HTML signature - const HTML_MARKER_REGEXP = '/^\s*<!--\s*HTML.*?-->/i'; - - /** - * Return `true` if the WYSIWYG editor is enabled for this user. - * @deprecated since Stud.IP 5.5 - * - * @return boolean always returns `true`. - */ - public static function editorEnabled() - { - return true; - } - - /** - * Return `true` for HTML code and `false` for plain text. - * - * HTML code must either match `HTML_MARKER_REGEXP` or begin - * with '<' and end with '>' (leading and trailing whitespace - * is ignored). Everything else is considered to be plain - * text. - * - * @param string $text HTML code or plain text. - * - * @return boolean `true` for HTML code, `false` for plain text. - */ - public static function isHtml($text) - { - return self::hasHtmlMarker($text); - } - - /** - * Return `true` for Stud.IP-HTML and `false` otherwise. - * - * Stud.IP-HTML is HTML that can contain Stud.IP Markup. - * - * Stud.IP-HTML must match Stud.IP 3.2's HTML marker. - * Leading and trailing whitespace is ignored. - * - * Everything else is considered not Stud.IP-HTML. In other - * words, if it's not Stud.IP-HTML it might be everything - * from plain text to binary code. But usually it's either - * Stud.IP markup or plain HTML code, then. - * - * @param string $text Text that is or isn't Stud.IP-HTML. - * - * @return boolean `true` for Stud.IP-HTML - */ - public static function isHtmlFallback($text) - { - $text = trim($text); - - // it's not fallback if the new HTML marker is detected - if (MarkupPrivate\Text\startsWith($text, self::HTML_MARKER)) { - return false; - } - - // it's Stud.IP-HTML if Stud.IP 3.2's HTML marker is detected - if (MarkupPrivate\Text\startsWith($text, self::HTML_MARKER_FALLBACK)) { - return true; - } - - return false; - } - - /** - * Return `true` for HTML code and `false` for plain text. - * - * HTML code must start with a match for `HTML_MARKER_REGEXP`. - * - * @param string $text HTML code or plain text. - * - * @return boolean `true` for HTML code, `false` for plain text. - */ - public static function hasHtmlMarker($text) - { - return preg_match(self::HTML_MARKER_REGEXP, $text); - } - - /** - * Mark a given text as HTML code. - * - * No sanity-checking is done on the given text. It is simply - * marked up so to be identified by Markup::isHtml as HTML - * code. - * - * @param string $text The text to be marked up as HTML code. - * - * @return string The text marked up as HTML code. - */ - public static function markAsHtml($text) - { - // NOTE keep this function in sync with the JavaScript - // function markAsHtml in WyswygHtmlHead.php - if (self::hasHtmlMarker($text) || trim($text) === '') { - return $text; // marker already set, don't set twice - } - return self::HTML_MARKER . $text; - } - - /** - * Apply markup rules after running text through HTML ready. - * - * @param TextFormat $markup Markup rules applied on marked-up text. - * @param string $text Marked-up text on which rules are applied. - * @param boolean $trim Trim text before applying markup rules, if TRUE. - * - * @return string HTML code computed from marked-up text. - */ - private static function markupHtmlReady($markup, $text, $trim) - { - return str_replace("\n", '<br>', self::markupText( - $markup, self::htmlReady(self::unixEOL($text), $trim))); - } - - /** - * Convert line break to Unix format. - * - * @param string $text Text with possibly mixed line breaks (Win, Mac, Unix). - * - * @return string Text with Unix line breaks only. - */ - private static function unixEOL($text) - { - return preg_replace("/\r\n?/", "\n", $text); - } - - /** - * Apply markup rules on plain text. - * - * @param TextFormat $markup Markup rules applied on marked-up text. - * @param string $text Marked-up text on which rules are applied. - * - * @return string HTML code computed from marked-up text. - */ - private static function markupText($markup, $text) - { - return symbol($markup->format($text)); - } - - /** - * Call HTMLPurifier to create safe HTML. - * - * @param string $dirty_html Unsafe or 'uncleaned' HTML code. - * @param boolean $autoformat Apply the AutoFormat rules - * @return string Clean and safe HTML code. - */ - private static function purify($dirty_html, $autoformat = true) - { - $purifier = self::createPurifier($autoformat); - - return $purifier->purify($dirty_html); - } - - /** - * Call HTMLPurifier to filter the HTML code (if the source is detected - * to contain HTML, returns the argument unchanged otherwise). The HTML - * marker is restored afterwards, if it was present. - * - * @param string $dirty_html Unsafe or 'uncleaned' HTML code. - * @return string Clean and safe HTML code. - */ - public static function purifyHtml($html) - { - if ($html instanceof \I18NString) { - $base = self::purifyHtml($html->original()); - $lang = $html->toArray(); - - foreach ($lang as &$value) { - $value = self::purifyHtml($value); - } - - return new \I18NString($base, $lang); - } - - if (self::isHtml($html)) { - $html = self::markAsHtml(self::purify($html)); - } - - return $html; - } - - /** - * Create HTML purifier instance with Stud.IP-specific configuration. - * - * @param boolean $autoformat Apply the AutoFormat rules - * @return \HTMLPurifier A new instance of the HTML purifier. - */ - private static function createPurifier($autoformat) - { - $config = \HTMLPurifier_Config::createDefault(); - $config->set('Cache.SerializerPath', $GLOBALS['TMP_PATH']); - $config->set('Core.RemoveInvalidImg', true); - - // restrict allowed HTML tags and attributes - // - // note that changes here should also be reflected in CKEditor's - // settings!! - // - // NOTE The list could be restricted even further by allowing only - // specific values for some attributes and CSS styles, but that is not - // directly supported by HTMLPurifier and would need to be implemented - // with a filter similar to ClassifyLinks. - // - // This is a list of further restrictions that can/should be introduced - // at a later time point maybe, if possible: - // - // - always open external links in a new tab or window - // a[class="link-extern" href="..." target="_blank"] - // - only allow left margin and horizontal text alignment to be set in - // divs (NOTE maybe remove these two features completely?): - // div[style="margin-left:(40|80|...)px; text-align:(center|right|justify)"] - // - img[style] should only allow float:left or float:right - // - only allow text color and background color to be set in a span's - // style attribute (NOTE 'wiki-links' are currently set here due to - // implementation difficulties, but probably this should be - // changed...): - // span[style="color:(#000000|#800000|...); - // background-color:(#000000|#800000|...)" - // class="wiki-link"] - // - tables should always have the class "content" (it should not be - // optional and no other class should be set): - // table[class="content"] - // - table headings should have a column and/or a row scope or no scope - // at all, but nothing else: - // th[scope="(col | row)"] - // - fonts: only Stud.IP-specific fonts should be allowed - // - $config->set('HTML.Allowed', ' - a[class|href|target|rel|name|id] - audio[controls|src|height|width|style] - big - blockquote - br - caption - code[class] - div[class|style] - em - figure[class|style] - figcaption - h1 - h2 - h3 - h4 - h5 - h6 - hr - i - img[alt|src|height|width|class|style] - li - ol[reversed|start|style] - p[style] - pre[class] - span[style|class] - strong - u - ul[style] - s - small - sub - sup - table[class|style] - tbody - td[colspan|rowspan|style] - thead - th[colspan|rowspan|style|scope] - tr - tt - video[controls|src|height|width|style] - '); - - $config->set('Attr.AllowedFrameTargets', ['_blank']); - $config->set('Attr.AllowedRel', ['nofollow']); - $config->set('Attr.EnableID', true); - $config->set('Attr.AllowedClasses', [ - 'author', - 'content', - 'image', - 'image-style-side', - 'image_resized', - 'language-cpp', - 'language-css', - 'language-diff', - 'language-java', - 'language-javascript', - 'language-json', - 'language-php', - 'language-python', - 'language-ruby', - 'language-scss', - 'language-sql', - 'language-xml', - 'link-extern', - 'link-intern', - 'math-tex', - 'table', - 'usercode', - 'wiki-link' - ]); - $config->set('CSS.AllowedFonts', [ - 'serif', - 'sans-serif', - 'monospace', - 'cursive' - ]); - $config->set('CSS.AllowedProperties', [ - 'margin-left', - 'text-align', - 'width', - 'height', - 'color', - 'background-color', // needed by span, td - 'border-color', - 'border-style', - 'float', - 'border' - ]); - $config->set('CSS.MaxImgLength', null); - - if ($autoformat) { - $config->set('AutoFormat.Linkify', true); - $config->set('AutoFormat.Custom', [ - 'ClassifyLinks', - 'ClassifyTables', - 'LinkifyEmail' - ]); - $config->set('AutoFormat.RemoveSpansWithoutAttributes', true); - } else { - $config->set('AutoFormat.Custom', ['TransformLinks']); - } - - // avoid <img src="evil_CSRF_stuff"> - $def = $config->getHTMLDefinition(true); - $img = $def->addBlankElement('img'); - $img->attr_transform_post[] - = new MarkupPrivate\Purifier\AttrTransform_Image_Source(); - - $def->addElement('audio', 'Inline', 'Flow', 'Common', [ - 'src*' => 'URI', - 'width' => 'Length', - 'height' => 'Length', - 'controls' => 'Text', // Bool triggers bug in HTMLPurifier - ]); - - $def->addElement('video', 'Inline', 'Flow', 'Common', [ - 'src*' => 'URI', - 'width' => 'Length', - 'height' => 'Length', - 'controls' => 'Text', // Bool triggers bug in HTMLPurifier - ]); - - $def->addElement('figcaption', 'Inline', 'Flow', 'Common'); - $def->addElement('figure', 'Block', 'Optional: (figcaption, Flow) | (Flow, figcaption) | Flow', 'Common'); - - $def->addAttribute('ol', 'reversed', 'Bool'); - $def->addAttribute('ol', 'style', 'Text'); - $def->addAttribute('ul', 'style', 'Text'); - - return new \HTMLPurifier($config); - } - - /** - * Convert special characters to HTML entities, and clean up. - * - * @param string $text This text's special chars will be converted. - * @param boolean $trim Trim text before applying markup rules, if TRUE. - * @param boolean $br Replace newlines by <br>, if TRUE. - * @param boolean $double_encode Encode existing HTML entities, if TRUE. - * @return string The converted string. - */ - public static function htmlReady( - $text, $trim = true, $br = false, $double_encode = true - ) { - $text = htmlspecialchars($text, ENT_QUOTES, 'utf-8', $double_encode); - - if ($trim) { - $text = trim($text); - } - if ($br) { // fix newlines - $text = nl2br($text, false); - } - return $text; - } - - /** - * Prepare text for wysiwyg (if enabled), otherwise convert special - * characters using htmlReady. - * - * @param string $text The text. - * @param boolean $trim Trim text before applying markup rules, if TRUE. - * @param boolean $br Replace newlines by <br>, if TRUE and wysiwyg editor disabled. - * @param boolean $double_encode Encode existing HTML entities, if TRUE and wysiwyg editor disabled. - * @return string The converted string. - */ - public static function wysiwygReady( - $text, $trim = true, $br = false, $double_encode = true - ) { - if (self::editorEnabled()) { - $text = self::markupToHtml($text, $trim); - } - return self::htmlReady($text, $trim, $br, $double_encode); - } - - /** - * Convert Stud.IP markup (possibly mixed with HTML if fallback mode is - * enabled) to editable HTML. Pure HTML will only run through the purifier. - * - * @param string $text The text. - * @param boolean $trim Trim text before applying markup rules, if TRUE. - * @param boolean $mark Mark result text as HTML, if TRUE. - * @return string The converted string. - */ - public static function markupToHtml($text, $trim = true, $mark = true) - { - if (!trim($text)) { - return $text; - } - if (self::isHtml($text)) { - $is_fallback = self::isHtmlFallback($text); - $text = self::purify($text, false); - - if ($is_fallback) { - $text = self::markupText(new \StudipCoreFormat(), $text); - } - } else { - $text = self::markupHtmlReady(new \StudipCoreFormat(), $text, $trim); - } - - return $mark ? self::markAsHtml($text) : $text; - } - - /** - * Call HTMLPurifier to remove all HTML tags from the string (if the source - * is detected to contain HTML, returns the argument unchanged otherwise). - * - * @param string $html HTML code to filter - * @return string The converted string. - */ - public static function removeHtml($html) - { - if (self::isHtml($html)) { - $config = \HTMLPurifier_Config::createDefault(); - $config->set('Cache.SerializerPath', $GLOBALS['TMP_PATH']); - $config->set('HTML.Allowed', 'a[href],img[alt|src],br'); - $config->set('AutoFormat.Custom', ['Unlinkify']); - - $html = str_replace('</li>', '</li><br>', $html); - $html = str_replace('</ol>', '</ol><br>', $html); - $html = str_replace('</ul>', '</ul><br>', $html); - $html = str_replace('</tr>', '</tr><br>', $html); - $html = str_replace('</p>', '</p><br><br>', $html); - $html = str_replace('</div>', '</div><br><br>', $html); - - $purifier = new \HTMLPurifier($config); - $html = $purifier->purify($html); - - // Replace new lines with simple line break; twice because we don't - // want to create unneccessary white space if a <br /> is followed - // by a new line - $html = str_replace('<br />' . PHP_EOL, PHP_EOL, $html); - $html = str_replace('<br />', PHP_EOL, $html); - - $html = \decodeHTML(trim($html)); - } - - return $html; - } -} - -/** - * Members of Studip\MarkupPrivate must not be used outside of this file!! - */ - -namespace Studip\MarkupPrivate\Purifier; - -use Studip\MarkupPrivate\MediaProxy; - -/** - * Remove invalid <img src> attributes. - */ -class AttrTransform_Image_Source extends \HTMLPurifier_AttrTransform -{ - /** - * Implements abstract method of base class. - */ - function transform($attr, $config, $context) - { - try { - $attr['src'] = MediaProxy\getMediaUrl($attr['src']); - } catch (MediaProxy\InvalidInternalLinkException $e) { - // invalid internal link ==> remove <img src> attribute - $GLOBALS['msg'][] = _('Ungültige interne Medienverknüpfung entfernt: ') - . \htmlentities($e->getUrl()); - $attr['src'] = NULL; // remove <img src> attribute - } catch (MediaProxy\ExternalMediaDeniedException $e) { - $GLOBALS['msg'][] = _('Verbotene externe Medienverknüpfung entfernt: ') - . \htmlentities($e->getUrl()); - $attr['src'] = NULL; // remove <img src> attribute - } - return $attr; - } -} - -//// media proxy ////////////////////////////////////////////////////////////// - -namespace Studip\MarkupPrivate\MediaProxy; - -use Studip\MarkupPrivate\Text; - -/** - * Check if media proxy should be used and if so return the respective URL. - * - * @param string $url URL to media file. - * @return mixed URL string to media file (possibly 'proxied') - * or NULL if URL is invalid. - */ -function getMediaUrl($url) { - // even though proxied URLs shouldn't be stored in the database, the - // next line will handle those cases where they're accidentally there - $url = decodeMediaProxyUrl($url); - - // handle internal media links - if (isStudipMediaUrl($url)) { - return transformInternalIdnaLink($url); - } - if (isInternalLink($url)) { - // link is studip-internal, but not to a valid media location - throw new InvalidInternalLinkException($url); - } - - // handle external media links - $external_media = \Config::get()->LOAD_EXTERNAL_MEDIA; - if ($external_media === 'proxy' && - \Seminar_Session::is_current_session_authenticated() - ) { - // media proxy must be accessed by an internal link - return encodeMediaProxyUrl($url); - } - if ($external_media === 'allow') { - return $url; - } - throw new ExternalMediaDeniedException($url); -} - -/** - * Return media proxy URL for an unproxied URL. - * - * @params string $url Unproxied media URL. - * @return string Media proxy URL for accessing the same resource. - */ -function encodeMediaProxyUrl($url) { - return transformInternalIdnaLink( - getMediaProxyUrl() .'?url=' . \urlencode(\idna_link($url))); -} - -/** - * Extract the original URL from a media proxy URL. - * - * @param string $url The media proxy URL. - * return string The original URL. If $url does not point to the media - * proxy then this is the exact same value given by $url. - */ -function decodeMediaProxyUrl($url) { - # TODO make it work for 'url=' at any position in query - $urlpath = removeStudipDomain($url); - $proxypath = removeStudipDomain(getMediaProxyUrl()) . '?url='; - if (Text\startsWith($urlpath, $proxypath)) { - return \urldecode(Text\removePrefix($urlpath, $proxypath)); - } - return $url; -} - -/** - * Return Stud.IP's absolute media proxy URL. - */ -function getMediaProxyUrl() { - return $GLOBALS['ABSOLUTE_URI_STUDIP'] . 'dispatch.php/media_proxy'; -} - -/** - * Test if an URL points to a valid internal Stud.IP media path. - * - * @param string $url Internal Stud.IP URL. - * @returns boolean TRUE for internal media link URLs, FALSE otherwise. - */ -function isStudipMediaUrl($url) { - return isInternalLink($url) && - isStudipMediaUrlPath(getStudipRelativePath($url)); -} - -function isInternalLink($url) { - return is_internal_url(transformInternalIdnaLink($url)); -} - -//// url utilities //////////////////////////////////////////////////////////// - -/** - * Remove domain name from internal URLs. - * - * Remove scheme, domain and authentication information from internal - * Stud.IP URLs. Leave external URLs untouched. - * - * @param string $url URL from which to remove internal domain. - * @returns string URL without internal domain or the exact same - * value as $url for external URLs. - */ -function removeStudipDomain($url) { - if (!isInternalLink($url)) { - return $url; - } - $parsed_url = \parse_url(transformInternalIdnaLink($url)); - $path = isset($parsed_url['path']) ? $parsed_url['path'] : ''; - $query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : ''; - $fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : ''; - return $path . $query . $fragment; -} - -/** - * Return a URL's path component with the absolute Stud.IP path removed. - * - * NOTE: If the URL is not an internal Stud.IP URL, the path component will - * nevertheless be returned without issuing an error message. - * - * Example: - * >>> getStudipRelativePath('http://localhost:8080' - * . '/studip/sendfile.php?type=0&file_id=ABC123&file_name=nice.jpg') - * 'sendfile.php' - * - * @param string $url The URL from which to return the Stud.IP-relative - * path component. - * returns string Stud.IP-relative path component of $url. - */ -function getStudipRelativePath($url) { - $parsed_url = \parse_url(transformInternalIdnaLink($url)); - $parsed_studip_url = getParsedStudipUrl(); - return Text\removePrefix($parsed_url['path'], $parsed_studip_url['path']); -} - -/** - * Return an associative array containing the Stud.IP URL elements. - * - * see also: http://php.net/manual/en/function.parse-url.php - * - * @returns mixed Same values that PHP's parse_url() returns. - */ -function getParsedStudipUrl() { - return \parse_url($GLOBALS['ABSOLUTE_URI_STUDIP']); -} - -/** - * Test if path is valid for internal Stud.IP media URLs. - * - * @params string $path The path component of an URL. - * return boolean TRUE for valid media paths, FALSE otherwise. - */ -function isStudipMediaUrlPath($path) { - list($path_head) = \explode('/', $path); - $valid_paths = ['sendfile.php', 'download', 'assets', 'pictures']; - return \mb_strpos(\urldecode($path), '../') === false && \in_array($path_head, $valid_paths); -} - -/** - * Return a normalized, internal URL. - * - * @params string $url An internal URL. - * @returns string Normalized internal URL. - */ -function transformInternalIdnaLink($url) { - return \idna_link(\TransformInternalLinks($url)); -} - -//// url exceptions /////////////////////////////////////////////////////////// - -class UrlException extends \Exception -{ - private $url; - - public function __construct($url) { - parent::__construct(); - $this->url = $url; - } - - public function getUrl() - { - return $this->url; - } -} - -class InvalidInternalLinkException extends UrlException -{ -} - -class ExternalMediaDeniedException extends UrlException -{ -} - -//// string utilities ///////////////////////////////////////////////////////// - -namespace Studip\MarkupPrivate\Text; - -/** - * Test if string starts with prefix. - * - * @param string $string Tested string. - * @param string $prefix Prefix of tested string. - * - * @return boolean TRUE if string starts with prefix. - */ -function startsWith($string, $prefix) { - return \mb_substr($string, 0, \mb_strlen($prefix)) === $prefix; -} - -/** - * Test if string ends with suffix. - * - * @param string $string Tested string. - * @param string $suffix Suffix of tested string. - * - * @return boolean TRUE if string ends with suffix. - */ -function endsWith($string, $suffix) { - return \mb_substr($string, - \mb_strlen($suffix)) === $suffix; -} - -/** - * Remove prefix from string. - * - * Does not change the string if it has a different prefix. - * - * @param string $string The string that must start with the prefix. - * @param string $prefix The prefix of the string. - * - * @return string String without prefix. - */ -function removePrefix($string, $prefix) { - return startsWith($string, $prefix) ? \mb_substr($string, \mb_strlen($prefix)) : $string; -} |
