Wikia code/includes/Sanitizer.php
< Wikia code | includes
This page is obsolete. It is being retained for archival purposes. It may document extensions or features that are obsolete and/or no longer supported. Do not rely on the information here being up-to-date. The information shown below refers to the now unmaintained 1.16 MediaWiki release. The current stable release number is 1.43.0. |
--- D:\Programming\SVN\mediawiki\branches\REL1_16\phase3\includes\Sanitizer.php 2011-07-18 22:31:28.153320300 +0100
+++ D:\Programming\SVN\wikia\trunk\includes\Sanitizer.php 2011-08-17 15:28:46.516601600 +0100
@@ -40,7 +40,8 @@
* Allows some... latitude.
* Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes
*/
-$attrib = '[A-Za-z0-9]';
+$attrib_first = '[:A-Z_a-z]';
+$attrib = '[:A-Z_a-z-.0-9]';
$space = '[\x09\x0a\x0d\x20]';
define( 'MW_ATTRIBS_REGEX',
"/(?:^|$space)((?:xml:|xmlns:)?$attrib+)
@@ -353,7 +354,7 @@
* @return string
*/
static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) {
- global $wgUseTidy;
+ global $wgUseTidy, $wgRTEParserEnabled;
static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
$htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
@@ -367,7 +368,7 @@
'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
'strike', 'strong', 'tt', 'var', 'div', 'center',
'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
- 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u', 'abbr'
+ 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u', 'abbr', 'q', 'acronym'
);
$htmlsingle = array(
'br', 'hr', 'li', 'dt', 'dd'
@@ -376,17 +377,18 @@
'br', 'hr'
);
$htmlnest = array( # Tags that can be nested--??
- 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
+ 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'q',
'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
);
$tabletags = array( # Can only appear inside table, we will close them
'td', 'th', 'tr',
+ 'thead', 'tbody', 'tfoot',
);
$htmllist = array( # Tags used by list
- 'ul','ol',
+ 'ul', 'ol', 'dl'
);
$listtags = array( # Tags that can appear in a list
- 'li',
+ 'li', 'dt', 'dd'
);
$htmlsingleallowed = array_unique( array_merge( $htmlsingle, $tabletags ) );
@@ -506,6 +508,16 @@
if ( !$badtag ) {
$rest = str_replace( '>', '>', $rest );
$close = ( $brace == '/>' && !$slash ) ? ' /' : '';
+
+ # RTE (Rich Text Editor) - begin
+ # @author: Inez Korczyński
+ if(!empty($wgRTEParserEnabled)) {
+ if(!$slash && strpos($newparams, 'data-rte-meta') === false) {
+ $newparams = ' data-rte-washtml="1"' . $newparams;
+ }
+ }
+ # RTE - end
+
$text .= "<$slash$t$newparams$close>$rest";
continue;
}
@@ -529,6 +541,16 @@
}
$newparams = Sanitizer::fixTagAttributes( $params, $t );
$rest = str_replace( '>', '>', $rest );
+
+ # RTE (Rich Text Editor) - begin
+ # @author: Inez Korczyński
+ if(!empty($wgRTEParserEnabled)) {
+ if(!$slash) {
+ $newparams = ' data-rte-washtml="1"' . $newparams;
+ }
+ }
+ # RTE - end
+
$text .= "<$slash$t$newparams$brace$rest";
} else {
$text .= '<' . str_replace( '>', '>', $x);
@@ -616,6 +638,8 @@
* @param $whitelist Array: list of allowed attribute names
* @return Array
*
+ * data-* attribute support added by christian@wikia-inc.com
+ *
* @todo Check for legal values where the DTD limits things.
* @todo Check for unique id attribute :P
*/
@@ -847,8 +871,31 @@
$encAttribute = htmlspecialchars( $attribute );
$encValue = Sanitizer::safeEncodeAttribute( $value );
+ # RTE (Rich Text Editor) - begin
+ # @author: Inez Korczyński, macbre
+ global $wgRTEParserEnabled;
+ if(!empty($wgRTEParserEnabled) && $encAttribute == 'style') {
+ // BugId:2462 - remove apostrophes from style attribute
+ $encValue = str_replace(''', '', $encValue);
+
+ $attribs[] = "data-rte-style=\"$encValue\"";
+ }
+ # RTE - end
+
$attribs[] = "$encAttribute=\"$encValue\"";
}
+
+ # RTE (Rich Text Editor) - begin
+ # @author: Inez Korczyński
+ global $wgRTEParserEnabled;
+ if(!empty($wgRTEParserEnabled)) {
+ if(strpos($text, "\x7f") !== false) {
+ RTE::$edgeCases[] = 'COMPLEX.08';
+ }
+ $attribs[] = RTEParser::encodeAttributesStr($text);
+ }
+ # RTE - end
+
return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
}
@@ -1322,7 +1369,7 @@
# 7.5.4
'div' => $block,
'center' => $common, # deprecated
- 'span' => $block, # ??
+ 'span' => $block, //$block, # ??
# 7.5.5
'h1' => $block,
@@ -1342,6 +1389,8 @@
'em' => $common,
'strong' => $common,
'cite' => $common,
+ 'abbr' => $common,
+ 'acronym' => $common,
# dfn
'code' => $common,
# samp