aboutsummaryrefslogtreecommitdiff
path: root/lib/classes/Markup.php
diff options
context:
space:
mode:
Diffstat (limited to 'lib/classes/Markup.php')
-rw-r--r--lib/classes/Markup.php788
1 files changed, 788 insertions, 0 deletions
diff --git a/lib/classes/Markup.php b/lib/classes/Markup.php
new file mode 100644
index 0000000..dc6820f
--- /dev/null
+++ b/lib/classes/Markup.php
@@ -0,0 +1,788 @@
+<?php
+/**
+ * Markup.php - Handling of Stud.IP- and HTML-markup.
+ **
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @category Stud.IP
+ * @copyright (c) 2014 Stud.IP e.V.
+ * @license http://www.gnu.org/licenses/gpl-2.0.html GPL version 2
+ * @since File available since Release 3.0
+ * @author Robert Costa <rcosta@uos.de>
+ */
+namespace Studip;
+
+require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_ClassifyLinks.php';
+require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_ClassifyTables.php';
+require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_LinkifyEmail.php';
+require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_TransformLinks.php';
+require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_Unlinkify.php';
+
+class Markup
+{
+ /**
+ * Apply markup rules and clean the text up.
+ *
+ * @param TextFormat $markup Markup rules applied on marked-up text.
+ * @param string $text Marked-up text on which rules are applied.
+ * @param boolean $trim Trim text before applying markup rules, if TRUE.
+ *
+ * @return string HTML code computed from marked-up text.
+ */
+ public static function apply($markup, $text, $trim)
+ {
+ return $markup->format(self::markupToHtml($text, $trim, false));
+ }
+
+ // signature for HTML entries
+ const HTML_MARKER = '<!--HTML-->';
+
+ // signature for HTML fallback entries
+ const HTML_MARKER_FALLBACK = '<!-- HTML: Insert text after this line only. -->';
+
+ // regular expression for detecting HTML signature
+ const HTML_MARKER_REGEXP = '/^\s*<!--\s*HTML.*?-->/i';
+
+ /**
+ * Return `true` if the WYSIWYG editor is enabled for this user.
+ * @deprecated since Stud.IP 5.5
+ *
+ * @return boolean always returns `true`.
+ */
+ public static function editorEnabled()
+ {
+ return true;
+ }
+
+ /**
+ * Return `true` for HTML code and `false` for plain text.
+ *
+ * HTML code must either match `HTML_MARKER_REGEXP` or begin
+ * with '<' and end with '>' (leading and trailing whitespace
+ * is ignored). Everything else is considered to be plain
+ * text.
+ *
+ * @param string $text HTML code or plain text.
+ *
+ * @return boolean `true` for HTML code, `false` for plain text.
+ */
+ public static function isHtml($text)
+ {
+ return self::hasHtmlMarker($text);
+ }
+
+ /**
+ * Return `true` for Stud.IP-HTML and `false` otherwise.
+ *
+ * Stud.IP-HTML is HTML that can contain Stud.IP Markup.
+ *
+ * Stud.IP-HTML must match Stud.IP 3.2's HTML marker.
+ * Leading and trailing whitespace is ignored.
+ *
+ * Everything else is considered not Stud.IP-HTML. In other
+ * words, if it's not Stud.IP-HTML it might be everything
+ * from plain text to binary code. But usually it's either
+ * Stud.IP markup or plain HTML code, then.
+ *
+ * @param string $text Text that is or isn't Stud.IP-HTML.
+ *
+ * @return boolean `true` for Stud.IP-HTML
+ */
+ public static function isHtmlFallback($text)
+ {
+ $text = trim($text);
+
+ // it's not fallback if the new HTML marker is detected
+ if (MarkupPrivate\Text\startsWith($text, self::HTML_MARKER)) {
+ return false;
+ }
+
+ // it's Stud.IP-HTML if Stud.IP 3.2's HTML marker is detected
+ if (MarkupPrivate\Text\startsWith($text, self::HTML_MARKER_FALLBACK)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Return `true` for HTML code and `false` for plain text.
+ *
+ * HTML code must start with a match for `HTML_MARKER_REGEXP`.
+ *
+ * @param string $text HTML code or plain text.
+ *
+ * @return boolean `true` for HTML code, `false` for plain text.
+ */
+ public static function hasHtmlMarker($text)
+ {
+ return preg_match(self::HTML_MARKER_REGEXP, $text);
+ }
+
+ /**
+ * Mark a given text as HTML code.
+ *
+ * No sanity-checking is done on the given text. It is simply
+ * marked up so to be identified by Markup::isHtml as HTML
+ * code.
+ *
+ * @param string $text The text to be marked up as HTML code.
+ *
+ * @return string The text marked up as HTML code.
+ */
+ public static function markAsHtml($text)
+ {
+ // NOTE keep this function in sync with the JavaScript
+ // function markAsHtml in WyswygHtmlHead.php
+ if (self::hasHtmlMarker($text) || trim($text) === '') {
+ return $text; // marker already set, don't set twice
+ }
+ return self::HTML_MARKER . $text;
+ }
+
+ /**
+ * Apply markup rules after running text through HTML ready.
+ *
+ * @param TextFormat $markup Markup rules applied on marked-up text.
+ * @param string $text Marked-up text on which rules are applied.
+ * @param boolean $trim Trim text before applying markup rules, if TRUE.
+ *
+ * @return string HTML code computed from marked-up text.
+ */
+ private static function markupHtmlReady($markup, $text, $trim)
+ {
+ return str_replace("\n", '<br>', self::markupText(
+ $markup, self::htmlReady(self::unixEOL($text), $trim)));
+ }
+
+ /**
+ * Convert line break to Unix format.
+ *
+ * @param string $text Text with possibly mixed line breaks (Win, Mac, Unix).
+ *
+ * @return string Text with Unix line breaks only.
+ */
+ private static function unixEOL($text)
+ {
+ return preg_replace("/\r\n?/", "\n", $text);
+ }
+
+ /**
+ * Apply markup rules on plain text.
+ *
+ * @param TextFormat $markup Markup rules applied on marked-up text.
+ * @param string $text Marked-up text on which rules are applied.
+ *
+ * @return string HTML code computed from marked-up text.
+ */
+ private static function markupText($markup, $text)
+ {
+ return symbol($markup->format($text));
+ }
+
+ /**
+ * Call HTMLPurifier to create safe HTML.
+ *
+ * @param string $dirty_html Unsafe or 'uncleaned' HTML code.
+ * @param boolean $autoformat Apply the AutoFormat rules
+ * @return string Clean and safe HTML code.
+ */
+ private static function purify($dirty_html, $autoformat = true)
+ {
+ $purifier = self::createPurifier($autoformat);
+
+ return $purifier->purify($dirty_html);
+ }
+
+ /**
+ * Call HTMLPurifier to filter the HTML code (if the source is detected
+ * to contain HTML, returns the argument unchanged otherwise). The HTML
+ * marker is restored afterwards, if it was present.
+ *
+ * @param string $dirty_html Unsafe or 'uncleaned' HTML code.
+ * @return string Clean and safe HTML code.
+ */
+ public static function purifyHtml($html)
+ {
+ if ($html instanceof \I18NString) {
+ $base = self::purifyHtml($html->original());
+ $lang = $html->toArray();
+
+ foreach ($lang as &$value) {
+ $value = self::purifyHtml($value);
+ }
+
+ return new \I18NString($base, $lang);
+ }
+
+ if (self::isHtml($html)) {
+ $html = self::markAsHtml(self::purify($html));
+ }
+
+ return $html;
+ }
+
+ /**
+ * Create HTML purifier instance with Stud.IP-specific configuration.
+ *
+ * @param boolean $autoformat Apply the AutoFormat rules
+ * @return \HTMLPurifier A new instance of the HTML purifier.
+ */
+ private static function createPurifier($autoformat)
+ {
+ $config = \HTMLPurifier_Config::createDefault();
+ $config->set('Cache.SerializerPath', $GLOBALS['TMP_PATH']);
+ $config->set('Core.RemoveInvalidImg', true);
+
+ // restrict allowed HTML tags and attributes
+ //
+ // note that changes here should also be reflected in CKEditor's
+ // settings!!
+ //
+ // NOTE The list could be restricted even further by allowing only
+ // specific values for some attributes and CSS styles, but that is not
+ // directly supported by HTMLPurifier and would need to be implemented
+ // with a filter similar to ClassifyLinks.
+ //
+ // This is a list of further restrictions that can/should be introduced
+ // at a later time point maybe, if possible:
+ //
+ // - always open external links in a new tab or window
+ // a[class="link-extern" href="..." target="_blank"]
+ // - only allow left margin and horizontal text alignment to be set in
+ // divs (NOTE maybe remove these two features completely?):
+ // div[style="margin-left:(40|80|...)px; text-align:(center|right|justify)"]
+ // - img[style] should only allow float:left or float:right
+ // - only allow text color and background color to be set in a span's
+ // style attribute (NOTE 'wiki-links' are currently set here due to
+ // implementation difficulties, but probably this should be
+ // changed...):
+ // span[style="color:(#000000|#800000|...);
+ // background-color:(#000000|#800000|...)"
+ // class="wiki-link"]
+ // - tables should always have the class "content" (it should not be
+ // optional and no other class should be set):
+ // table[class="content"]
+ // - table headings should have a column and/or a row scope or no scope
+ // at all, but nothing else:
+ // th[scope="(col | row)"]
+ // - fonts: only Stud.IP-specific fonts should be allowed
+ //
+ $config->set('HTML.Allowed', '
+ a[class|href|target|rel|name|id]
+ audio[controls|src|height|width|style]
+ big
+ blockquote
+ br
+ caption
+ code[class]
+ div[class|style]
+ em
+ figure[class|style]
+ figcaption
+ h1
+ h2
+ h3
+ h4
+ h5
+ h6
+ hr
+ i
+ img[alt|src|height|width|class|style]
+ li
+ ol[reversed|start|style]
+ p[style]
+ pre[class]
+ span[style|class]
+ strong
+ u
+ ul[style]
+ s
+ small
+ sub
+ sup
+ table[class|style]
+ tbody
+ td[colspan|rowspan|style]
+ thead
+ th[colspan|rowspan|style|scope]
+ tr
+ tt
+ video[controls|src|height|width|style]
+ ');
+
+ $config->set('Attr.AllowedFrameTargets', ['_blank']);
+ $config->set('Attr.AllowedRel', ['nofollow']);
+ $config->set('Attr.EnableID', true);
+ $config->set('Attr.AllowedClasses', [
+ 'author',
+ 'content',
+ 'image',
+ 'image-style-side',
+ 'image_resized',
+ 'language-cpp',
+ 'language-css',
+ 'language-diff',
+ 'language-java',
+ 'language-javascript',
+ 'language-json',
+ 'language-php',
+ 'language-python',
+ 'language-ruby',
+ 'language-scss',
+ 'language-sql',
+ 'language-xml',
+ 'link-extern',
+ 'link-intern',
+ 'math-tex',
+ 'table',
+ 'usercode',
+ 'wiki-link'
+ ]);
+ $config->set('CSS.AllowedFonts', [
+ 'serif',
+ 'sans-serif',
+ 'monospace',
+ 'cursive'
+ ]);
+ $config->set('CSS.AllowedProperties', [
+ 'margin-left',
+ 'text-align',
+ 'width',
+ 'height',
+ 'color',
+ 'background-color', // needed by span, td
+ 'border-color',
+ 'border-style',
+ 'float',
+ 'border',
+ 'vertical-align'
+ ]);
+ $config->set('CSS.MaxImgLength', null);
+
+ if ($autoformat) {
+ $config->set('AutoFormat.Linkify', true);
+ $config->set('AutoFormat.Custom', [
+ 'ClassifyLinks',
+ 'ClassifyTables',
+ 'LinkifyEmail'
+ ]);
+ $config->set('AutoFormat.RemoveSpansWithoutAttributes', true);
+ } else {
+ $config->set('AutoFormat.Custom', ['TransformLinks']);
+ }
+
+ // avoid <img src="evil_CSRF_stuff">
+ $def = $config->getHTMLDefinition(true);
+ $img = $def->addBlankElement('img');
+ $img->attr_transform_post[]
+ = new MarkupPrivate\Purifier\AttrTransform_Image_Source();
+
+ $def->addElement('audio', 'Inline', 'Flow', 'Common', [
+ 'src*' => 'URI',
+ 'width' => 'Length',
+ 'height' => 'Length',
+ 'controls' => 'Text', // Bool triggers bug in HTMLPurifier
+ ]);
+
+ $def->addElement('video', 'Inline', 'Flow', 'Common', [
+ 'src*' => 'URI',
+ 'width' => 'Length',
+ 'height' => 'Length',
+ 'controls' => 'Text', // Bool triggers bug in HTMLPurifier
+ ]);
+
+ $def->addElement('figcaption', 'Inline', 'Flow', 'Common');
+ $def->addElement('figure', 'Block', 'Optional: (figcaption, Flow) | (Flow, figcaption) | Flow', 'Common');
+
+ $def->addAttribute('ol', 'reversed', 'Bool');
+ $def->addAttribute('ol', 'style', 'Text');
+ $def->addAttribute('ul', 'style', 'Text');
+
+ return new \HTMLPurifier($config);
+ }
+
+ /**
+ * Convert special characters to HTML entities, and clean up.
+ *
+ * @param string $text This text's special chars will be converted.
+ * @param boolean $trim Trim text before applying markup rules, if TRUE.
+ * @param boolean $br Replace newlines by <br>, if TRUE.
+ * @param boolean $double_encode Encode existing HTML entities, if TRUE.
+ * @return string The converted string.
+ */
+ public static function htmlReady(
+ $text, $trim = true, $br = false, $double_encode = true
+ ) {
+ $text = htmlspecialchars($text, ENT_QUOTES, 'utf-8', $double_encode);
+
+ if ($trim) {
+ $text = trim($text);
+ }
+ if ($br) { // fix newlines
+ $text = nl2br($text, false);
+ }
+ return $text;
+ }
+
+ /**
+ * Prepare text for wysiwyg (if enabled), otherwise convert special
+ * characters using htmlReady.
+ *
+ * @param string $text The text.
+ * @param boolean $trim Trim text before applying markup rules, if TRUE.
+ * @param boolean $br Replace newlines by <br>, if TRUE and wysiwyg editor disabled.
+ * @param boolean $double_encode Encode existing HTML entities, if TRUE and wysiwyg editor disabled.
+ * @return string The converted string.
+ */
+ public static function wysiwygReady(
+ $text, $trim = true, $br = false, $double_encode = true
+ ) {
+ if (self::editorEnabled()) {
+ $text = self::markupToHtml($text, $trim);
+ }
+ return self::htmlReady($text, $trim, $br, $double_encode);
+ }
+
+ /**
+ * Convert Stud.IP markup (possibly mixed with HTML if fallback mode is
+ * enabled) to editable HTML. Pure HTML will only run through the purifier.
+ *
+ * @param string $text The text.
+ * @param boolean $trim Trim text before applying markup rules, if TRUE.
+ * @param boolean $mark Mark result text as HTML, if TRUE.
+ * @return string The converted string.
+ */
+ public static function markupToHtml($text, $trim = true, $mark = true)
+ {
+ if (!trim($text)) {
+ return $text;
+ }
+ if (self::isHtml($text)) {
+ $is_fallback = self::isHtmlFallback($text);
+ $text = self::purify($text, false);
+
+ if ($is_fallback) {
+ $text = self::markupText(new \StudipCoreFormat(), $text);
+ }
+ } else {
+ $text = self::markupHtmlReady(new \StudipCoreFormat(), $text, $trim);
+ }
+
+ return $mark ? self::markAsHtml($text) : $text;
+ }
+
+ /**
+ * Call HTMLPurifier to remove all HTML tags from the string (if the source
+ * is detected to contain HTML, returns the argument unchanged otherwise).
+ *
+ * @param string $html HTML code to filter
+ * @return string The converted string.
+ */
+ public static function removeHtml($html)
+ {
+ if (self::isHtml($html)) {
+ $config = \HTMLPurifier_Config::createDefault();
+ $config->set('Cache.SerializerPath', $GLOBALS['TMP_PATH']);
+ $config->set('HTML.Allowed', 'a[href],img[alt|src],br');
+ $config->set('AutoFormat.Custom', ['Unlinkify']);
+
+ $html = str_replace('</li>', '</li><br>', $html);
+ $html = str_replace('</ol>', '</ol><br>', $html);
+ $html = str_replace('</ul>', '</ul><br>', $html);
+ $html = str_replace('</tr>', '</tr><br>', $html);
+ $html = str_replace('</p>', '</p><br><br>', $html);
+ $html = str_replace('</div>', '</div><br><br>', $html);
+
+ $purifier = new \HTMLPurifier($config);
+ $html = $purifier->purify($html);
+
+ // Replace new lines with simple line break; twice because we don't
+ // want to create unneccessary white space if a <br /> is followed
+ // by a new line
+ $html = str_replace('<br />' . PHP_EOL, PHP_EOL, $html);
+ $html = str_replace('<br />', PHP_EOL, $html);
+
+ $html = \decodeHTML(trim($html));
+ }
+
+ return $html;
+ }
+}
+
+/**
+ * Members of Studip\MarkupPrivate must not be used outside of this file!!
+ */
+
+namespace Studip\MarkupPrivate\Purifier;
+
+use Studip\MarkupPrivate\MediaProxy;
+
+/**
+ * Remove invalid <img src> attributes.
+ */
+class AttrTransform_Image_Source extends \HTMLPurifier_AttrTransform
+{
+ /**
+ * Implements abstract method of base class.
+ */
+ function transform($attr, $config, $context)
+ {
+ try {
+ $attr['src'] = MediaProxy\getMediaUrl($attr['src']);
+ } catch (MediaProxy\InvalidInternalLinkException $e) {
+ // invalid internal link ==> remove <img src> attribute
+ $GLOBALS['msg'][] = _('Ungültige interne Medienverknüpfung entfernt: ')
+ . \htmlentities($e->getUrl());
+ $attr['src'] = NULL; // remove <img src> attribute
+ } catch (MediaProxy\ExternalMediaDeniedException $e) {
+ $GLOBALS['msg'][] = _('Verbotene externe Medienverknüpfung entfernt: ')
+ . \htmlentities($e->getUrl());
+ $attr['src'] = NULL; // remove <img src> attribute
+ }
+ return $attr;
+ }
+}
+
+//// media proxy //////////////////////////////////////////////////////////////
+
+namespace Studip\MarkupPrivate\MediaProxy;
+
+use Studip\MarkupPrivate\Text;
+
+/**
+ * Check if media proxy should be used and if so return the respective URL.
+ *
+ * @param string $url URL to media file.
+ * @return mixed URL string to media file (possibly 'proxied')
+ * or NULL if URL is invalid.
+ */
+function getMediaUrl($url) {
+ // even though proxied URLs shouldn't be stored in the database, the
+ // next line will handle those cases where they're accidentally there
+ $url = decodeMediaProxyUrl($url);
+
+ // handle internal media links
+ if (isStudipMediaUrl($url)) {
+ return transformInternalIdnaLink($url);
+ }
+ if (isInternalLink($url)) {
+ // link is studip-internal, but not to a valid media location
+ throw new InvalidInternalLinkException($url);
+ }
+
+ // handle external media links
+ $external_media = \Config::get()->LOAD_EXTERNAL_MEDIA;
+ if ($external_media === 'proxy' &&
+ \Seminar_Session::is_current_session_authenticated()
+ ) {
+ // media proxy must be accessed by an internal link
+ return encodeMediaProxyUrl($url);
+ }
+ if ($external_media === 'allow') {
+ return $url;
+ }
+ throw new ExternalMediaDeniedException($url);
+}
+
+/**
+ * Return media proxy URL for an unproxied URL.
+ *
+ * @params string $url Unproxied media URL.
+ * @return string Media proxy URL for accessing the same resource.
+ */
+function encodeMediaProxyUrl($url) {
+ return transformInternalIdnaLink(
+ getMediaProxyUrl() .'?url=' . \urlencode(\idna_link($url)));
+}
+
+/**
+ * Extract the original URL from a media proxy URL.
+ *
+ * @param string $url The media proxy URL.
+ * return string The original URL. If $url does not point to the media
+ * proxy then this is the exact same value given by $url.
+ */
+function decodeMediaProxyUrl($url) {
+ # TODO make it work for 'url=' at any position in query
+ $urlpath = removeStudipDomain($url);
+ $proxypath = removeStudipDomain(getMediaProxyUrl()) . '?url=';
+ if (Text\startsWith($urlpath, $proxypath)) {
+ return \urldecode(Text\removePrefix($urlpath, $proxypath));
+ }
+ return $url;
+}
+
+/**
+ * Return Stud.IP's absolute media proxy URL.
+ */
+function getMediaProxyUrl() {
+ return $GLOBALS['ABSOLUTE_URI_STUDIP'] . 'dispatch.php/media_proxy';
+}
+
+/**
+ * Test if an URL points to a valid internal Stud.IP media path.
+ *
+ * @param string $url Internal Stud.IP URL.
+ * @returns boolean TRUE for internal media link URLs, FALSE otherwise.
+ */
+function isStudipMediaUrl($url) {
+ return isInternalLink($url) &&
+ isStudipMediaUrlPath(getStudipRelativePath($url));
+}
+
+function isInternalLink($url) {
+ return is_internal_url(transformInternalIdnaLink($url));
+}
+
+//// url utilities ////////////////////////////////////////////////////////////
+
+/**
+ * Remove domain name from internal URLs.
+ *
+ * Remove scheme, domain and authentication information from internal
+ * Stud.IP URLs. Leave external URLs untouched.
+ *
+ * @param string $url URL from which to remove internal domain.
+ * @returns string URL without internal domain or the exact same
+ * value as $url for external URLs.
+ */
+function removeStudipDomain($url) {
+ if (!isInternalLink($url)) {
+ return $url;
+ }
+ $parsed_url = \parse_url(transformInternalIdnaLink($url));
+ $path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
+ $query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
+ $fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
+ return $path . $query . $fragment;
+}
+
+/**
+ * Return a URL's path component with the absolute Stud.IP path removed.
+ *
+ * NOTE: If the URL is not an internal Stud.IP URL, the path component will
+ * nevertheless be returned without issuing an error message.
+ *
+ * Example:
+ * >>> getStudipRelativePath('http://localhost:8080'
+ * . '/studip/sendfile.php?type=0&file_id=ABC123&file_name=nice.jpg')
+ * 'sendfile.php'
+ *
+ * @param string $url The URL from which to return the Stud.IP-relative
+ * path component.
+ * returns string Stud.IP-relative path component of $url.
+ */
+function getStudipRelativePath($url) {
+ $parsed_url = \parse_url(transformInternalIdnaLink($url));
+ $parsed_studip_url = getParsedStudipUrl();
+ return Text\removePrefix($parsed_url['path'], $parsed_studip_url['path']);
+}
+
+/**
+ * Return an associative array containing the Stud.IP URL elements.
+ *
+ * see also: http://php.net/manual/en/function.parse-url.php
+ *
+ * @returns mixed Same values that PHP's parse_url() returns.
+ */
+function getParsedStudipUrl() {
+ return \parse_url($GLOBALS['ABSOLUTE_URI_STUDIP']);
+}
+
+/**
+ * Test if path is valid for internal Stud.IP media URLs.
+ *
+ * @params string $path The path component of an URL.
+ * return boolean TRUE for valid media paths, FALSE otherwise.
+ */
+function isStudipMediaUrlPath($path) {
+ list($path_head) = \explode('/', $path);
+ $valid_paths = ['sendfile.php', 'download', 'assets', 'pictures'];
+ return \mb_strpos(\urldecode($path), '../') === false && \in_array($path_head, $valid_paths);
+}
+
+/**
+ * Return a normalized, internal URL.
+ *
+ * @params string $url An internal URL.
+ * @returns string Normalized internal URL.
+ */
+function transformInternalIdnaLink($url) {
+ return \idna_link(\TransformInternalLinks($url));
+}
+
+//// url exceptions ///////////////////////////////////////////////////////////
+
+class UrlException extends \Exception
+{
+ private $url;
+
+ public function __construct($url) {
+ parent::__construct();
+ $this->url = $url;
+ }
+
+ public function getUrl()
+ {
+ return $this->url;
+ }
+}
+
+class InvalidInternalLinkException extends UrlException
+{
+}
+
+class ExternalMediaDeniedException extends UrlException
+{
+}
+
+//// string utilities /////////////////////////////////////////////////////////
+
+namespace Studip\MarkupPrivate\Text;
+
+/**
+ * Test if string starts with prefix.
+ *
+ * @param string $string Tested string.
+ * @param string $prefix Prefix of tested string.
+ *
+ * @return boolean TRUE if string starts with prefix.
+ */
+function startsWith($string, $prefix) {
+ return \mb_substr($string, 0, \mb_strlen($prefix)) === $prefix;
+}
+
+/**
+ * Test if string ends with suffix.
+ *
+ * @param string $string Tested string.
+ * @param string $suffix Suffix of tested string.
+ *
+ * @return boolean TRUE if string ends with suffix.
+ */
+function endsWith($string, $suffix) {
+ return \mb_substr($string, - \mb_strlen($suffix)) === $suffix;
+}
+
+/**
+ * Remove prefix from string.
+ *
+ * Does not change the string if it has a different prefix.
+ *
+ * @param string $string The string that must start with the prefix.
+ * @param string $prefix The prefix of the string.
+ *
+ * @return string String without prefix.
+ */
+function removePrefix($string, $prefix) {
+ return startsWith($string, $prefix) ? \mb_substr($string, \mb_strlen($prefix)) : $string;
+}