*/
namespace Studip;
require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_ClassifyLinks.php';
require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_ClassifyTables.php';
require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_LinkifyEmail.php';
require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_TransformLinks.php';
require_once __DIR__ . '/htmlpurifier/HTMLPurifier_Injector_Unlinkify.php';
class Markup
{
/**
* Apply markup rules and clean the text up.
*
* @param TextFormat $markup Markup rules applied on marked-up text.
* @param string $text Marked-up text on which rules are applied.
* @param boolean $trim Trim text before applying markup rules, if TRUE.
*
* @return string HTML code computed from marked-up text.
*/
public static function apply($markup, $text, $trim)
{
return $markup->format(self::markupToHtml($text, $trim, false));
}
// signature for HTML entries
const HTML_MARKER = '';
// signature for HTML fallback entries
const HTML_MARKER_FALLBACK = '';
// regular expression for detecting HTML signature
const HTML_MARKER_REGEXP = '/^\s*/i';
/**
* Return `true` if the WYSIWYG editor is enabled for this user.
*
* @return boolean `true` if the editor is enabled.
*/
public static function editorEnabled()
{
return \Config::get()->WYSIWYG && !$GLOBALS['user']->cfg->WYSIWYG_DISABLED;
}
/**
* Return `true` for HTML code and `false` for plain text.
*
* HTML code must either match `HTML_MARKER_REGEXP` or begin
* with '<' and end with '>' (leading and trailing whitespace
* is ignored). Everything else is considered to be plain
* text.
*
* @param string $text HTML code or plain text.
*
* @return boolean `true` for HTML code, `false` for plain text.
*/
public static function isHtml($text)
{
return self::hasHtmlMarker($text);
}
/**
* Return `true` for Stud.IP-HTML and `false` otherwise.
*
* Stud.IP-HTML is HTML that can contain Stud.IP Markup.
*
* Stud.IP-HTML must match Stud.IP 3.2's HTML marker.
* Leading and trailing whitespace is ignored.
*
* Everything else is considered not Stud.IP-HTML. In other
* words, if it's not Stud.IP-HTML it might be everything
* from plain text to binary code. But usually it's either
* Stud.IP markup or plain HTML code, then.
*
* @param string $text Text that is or isn't Stud.IP-HTML.
*
* @return boolean `true` for Stud.IP-HTML
*/
public static function isHtmlFallback($text)
{
$text = trim($text);
// it's not fallback if the new HTML marker is detected
if (MarkupPrivate\Text\startsWith($text, self::HTML_MARKER)) {
return false;
}
// it's Stud.IP-HTML if Stud.IP 3.2's HTML marker is detected
if (MarkupPrivate\Text\startsWith($text, self::HTML_MARKER_FALLBACK)) {
return true;
}
return false;
}
/**
* Return `true` for HTML code and `false` for plain text.
*
* HTML code must start with a match for `HTML_MARKER_REGEXP`.
*
* @param string $text HTML code or plain text.
*
* @return boolean `true` for HTML code, `false` for plain text.
*/
public static function hasHtmlMarker($text)
{
return preg_match(self::HTML_MARKER_REGEXP, $text);
}
/**
* Mark a given text as HTML code.
*
* No sanity-checking is done on the given text. It is simply
* marked up so to be identified by Markup::isHtml as HTML
* code.
*
* @param string $text The text to be marked up as HTML code.
*
* @return string The text marked up as HTML code.
*/
public static function markAsHtml($text)
{
// NOTE keep this function in sync with the JavaScript
// function markAsHtml in WyswygHtmlHead.php
if (self::hasHtmlMarker($text) || trim($text) === '') {
return $text; // marker already set, don't set twice
}
return self::HTML_MARKER . $text;
}
/**
* Apply markup rules after running text through HTML ready.
*
* @param TextFormat $markup Markup rules applied on marked-up text.
* @param string $text Marked-up text on which rules are applied.
* @param boolean $trim Trim text before applying markup rules, if TRUE.
*
* @return string HTML code computed from marked-up text.
*/
private static function markupHtmlReady($markup, $text, $trim)
{
return str_replace("\n", '
', self::markupText(
$markup, self::htmlReady(self::unixEOL($text), $trim)));
}
/**
* Convert line break to Unix format.
*
* @param string $text Text with possibly mixed line breaks (Win, Mac, Unix).
*
* @return string Text with Unix line breaks only.
*/
private static function unixEOL($text)
{
return preg_replace("/\r\n?/", "\n", $text);
}
/**
* Apply markup rules on plain text.
*
* @param TextFormat $markup Markup rules applied on marked-up text.
* @param string $text Marked-up text on which rules are applied.
*
* @return string HTML code computed from marked-up text.
*/
private static function markupText($markup, $text)
{
return symbol($markup->format($text));
}
/**
* Call HTMLPurifier to create safe HTML.
*
* @param string $dirty_html Unsafe or 'uncleaned' HTML code.
* @param boolean $autoformat Apply the AutoFormat rules
* @return string Clean and safe HTML code.
*/
private static function purify($dirty_html, $autoformat = true)
{
$purifier = self::createPurifier($autoformat);
return $purifier->purify($dirty_html);
}
/**
* Call HTMLPurifier to filter the HTML code (if the source is detected
* to contain HTML, returns the argument unchanged otherwise). The HTML
* marker is restored afterwards, if it was present.
*
* @param string $dirty_html Unsafe or 'uncleaned' HTML code.
* @return string Clean and safe HTML code.
*/
public static function purifyHtml($html)
{
if ($html instanceof \I18NString) {
$base = self::purifyHtml($html->original());
$lang = $html->toArray();
foreach ($lang as &$value) {
$value = self::purifyHtml($value);
}
return new \I18NString($base, $lang);
}
if (self::isHtml($html)) {
$html = self::markAsHtml(self::purify($html));
}
return $html;
}
/**
* Create HTML purifier instance with Stud.IP-specific configuration.
*
* @param boolean $autoformat Apply the AutoFormat rules
* @return \HTMLPurifier A new instance of the HTML purifier.
*/
private static function createPurifier($autoformat)
{
$config = \HTMLPurifier_Config::createDefault();
$config->set('Cache.SerializerPath', $GLOBALS['TMP_PATH']);
$config->set('Core.RemoveInvalidImg', true);
// restrict allowed HTML tags and attributes
//
// note that changes here should also be reflected in CKEditor's
// settings!!
//
// NOTE The list could be restricted even further by allowing only
// specific values for some attributes and CSS styles, but that is not
// directly supported by HTMLPurifier and would need to be implemented
// with a filter similar to ClassifyLinks.
//
// This is a list of further restrictions that can/should be introduced
// at a later time point maybe, if possible:
//
// - always open external links in a new tab or window
// a[class="link-extern" href="..." target="_blank"]
// - only allow left margin and horizontal text alignment to be set in
// divs (NOTE maybe remove these two features completely?):
// div[style="margin-left:(40|80|...)px; text-align:(center|right|justify)"]
// - img[style] should only allow float:left or float:right
// - only allow text color and background color to be set in a span's
// style attribute (NOTE 'wiki-links' are currently set here due to
// implementation difficulties, but probably this should be
// changed...):
// span[style="color:(#000000|#800000|...);
// background-color:(#000000|#800000|...)"
// class="wiki-link"]
// - tables should always have the class "content" (it should not be
// optional and no other class should be set):
// table[class="content"]
// - table headings should have a column and/or a row scope or no scope
// at all, but nothing else:
// th[scope="(col | row)"]
// - fonts: only Stud.IP-specific fonts should be allowed
//
$config->set('HTML.Allowed', '
a[class|href|target|rel|name|id]
audio[controls|src|height|width|style]
big
blockquote
br
caption
code[class]
div[class|style]
em
figure[class|style]
figcaption
h1
h2
h3
h4
h5
h6
hr
i
img[alt|src|height|width|class|style]
li
ol
p[style]
pre[class]
span[style|class]
strong
u
ul
s
small
sub
sup
table[class|style]
tbody
td[colspan|rowspan|style]
thead
th[colspan|rowspan|style|scope]
tr
tt
video[controls|src|height|width|style]
');
$config->set('Attr.AllowedFrameTargets', ['_blank']);
$config->set('Attr.AllowedRel', ['nofollow']);
$config->set('Attr.EnableID', true);
$config->set('Attr.AllowedClasses', [
'author',
'content',
'image',
'image-style-side',
'image_resized',
'language-cpp',
'language-css',
'language-diff',
'language-java',
'language-javascript',
'language-json',
'language-php',
'language-python',
'language-ruby',
'language-scss',
'language-sql',
'language-xml',
'link-extern',
'link-intern',
'math-tex',
'table',
'usercode',
'wiki-link'
]);
$config->set('CSS.AllowedFonts', [
'serif',
'sans-serif',
'monospace',
'cursive'
]);
$config->set('CSS.AllowedProperties', [
'margin-left',
'text-align',
'width',
'height',
'color',
'background-color', // needed by span, td
'border-color',
'border-style',
'float',
'border'
]);
$config->set('CSS.MaxImgLength', null);
if ($autoformat) {
$config->set('AutoFormat.Linkify', true);
$config->set('AutoFormat.Custom', [
'ClassifyLinks',
'ClassifyTables',
'LinkifyEmail'
]);
$config->set('AutoFormat.RemoveSpansWithoutAttributes', true);
} else {
$config->set('AutoFormat.Custom', ['TransformLinks']);
}
// avoid
$def = $config->getHTMLDefinition(true);
$img = $def->addBlankElement('img');
$img->attr_transform_post[]
= new MarkupPrivate\Purifier\AttrTransform_Image_Source();
$def->addElement('audio', 'Inline', 'Flow', 'Common', [
'src*' => 'URI',
'width' => 'Length',
'height' => 'Length',
'controls' => 'Text', // Bool triggers bug in HTMLPurifier
]);
$def->addElement('video', 'Inline', 'Flow', 'Common', [
'src*' => 'URI',
'width' => 'Length',
'height' => 'Length',
'controls' => 'Text', // Bool triggers bug in HTMLPurifier
]);
$def->addElement('figcaption', 'Inline', 'Flow', 'Common');
$def->addElement('figure', 'Block', 'Optional: (figcaption, Flow) | (Flow, figcaption) | Flow', 'Common');
return new \HTMLPurifier($config);
}
/**
* Convert special characters to HTML entities, and clean up.
*
* @param string $text This text's special chars will be converted.
* @param boolean $trim Trim text before applying markup rules, if TRUE.
* @param boolean $br Replace newlines by
, if TRUE.
* @param boolean $double_encode Encode existing HTML entities, if TRUE.
* @return string The converted string.
*/
public static function htmlReady(
$text, $trim = true, $br = false, $double_encode = true
) {
$text = htmlspecialchars($text, ENT_QUOTES, 'utf-8', $double_encode);
if ($trim) {
$text = trim($text);
}
if ($br) { // fix newlines
$text = nl2br($text, false);
}
return $text;
}
/**
* Prepare text for wysiwyg (if enabled), otherwise convert special
* characters using htmlReady.
*
* @param string $text The text.
* @param boolean $trim Trim text before applying markup rules, if TRUE.
* @param boolean $br Replace newlines by
, if TRUE and wysiwyg editor disabled.
* @param boolean $double_encode Encode existing HTML entities, if TRUE and wysiwyg editor disabled.
* @return string The converted string.
*/
public static function wysiwygReady(
$text, $trim = true, $br = false, $double_encode = true
) {
if (self::editorEnabled()) {
$text = self::markupToHtml($text, $trim);
}
return self::htmlReady($text, $trim, $br, $double_encode);
}
/**
* Convert Stud.IP markup (possibly mixed with HTML if fallback mode is
* enabled) to editable HTML. Pure HTML will only run through the purifier.
*
* @param string $text The text.
* @param boolean $trim Trim text before applying markup rules, if TRUE.
* @param boolean $mark Mark result text as HTML, if TRUE.
* @return string The converted string.
*/
public static function markupToHtml($text, $trim = true, $mark = true)
{
if (!trim($text)) {
return $text;
}
if (self::isHtml($text)) {
$is_fallback = self::isHtmlFallback($text);
$text = self::purify($text, false);
if ($is_fallback) {
$text = self::markupText(new \StudipCoreFormat(), $text);
}
} else {
$text = self::markupHtmlReady(new \StudipCoreFormat(), $text, $trim);
}
return $mark ? self::markAsHtml($text) : $text;
}
/**
* Call HTMLPurifier to remove all HTML tags from the string (if the source
* is detected to contain HTML, returns the argument unchanged otherwise).
*
* @param string $html HTML code to filter
* @return string The converted string.
*/
public static function removeHtml($html)
{
if (self::isHtml($html)) {
$config = \HTMLPurifier_Config::createDefault();
$config->set('Cache.SerializerPath', $GLOBALS['TMP_PATH']);
$config->set('HTML.Allowed', 'a[href],img[alt|src],br');
$config->set('AutoFormat.Custom', ['Unlinkify']);
$html = str_replace('', '
', $html);
$html = str_replace('', '
', $html);
$html = str_replace('', '
', $html);
$html = str_replace('', '
', $html);
$html = str_replace('