Wikia code/includes/parser/Preprocessor DOM.php

--- D:\Programming\SVN\mediawiki\branches\REL1_16\phase3\includes\parser\Preprocessor_DOM.php	2011-07-18 22:30:54.902343800 +0100
+++ D:\Programming\SVN\wikia\trunk\includes\parser\Preprocessor_DOM.php	2011-08-17 15:28:13.351562500 +0100
@@ -69,6 +69,21 @@
 		
 		$xml = false;
 		$cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
+
+		global $wgCategorySelectEnabled;
+		if(!empty($wgCategorySelectEnabled)) {
+			$cacheable = false;
+		}
+
+		# RTE (Rich Text Editor) - begin
+		# @author: Inez Korczy┼äski
+		# Disable preprocessor cache for RTE mode
+		global $wgRTETemplateParams;
+		if(!empty($wgRTETemplateParams) || !empty($wgRTEParserEnabled)) {
+			$cacheable = false;
+		}
+		# RTE - end
+
 		if ( $cacheable ) {
 			wfProfileIn( __METHOD__.'-cacheable' );
 
@@ -119,6 +134,7 @@
 	}
 	
 	function preprocessToXml( $text, $flags = 0 ) {
+		global $wgCategorySelectEnabled;
 		wfProfileIn( __METHOD__ );
 		$rules = array(
 			'{' => array(
@@ -154,6 +170,33 @@
 			$ignoredElements = array( 'includeonly' );
 			$xmlishElements[] = 'includeonly';
 		}
+
+		# RTE (Rich Text Editor) - begin
+		# @author: Inez Korczy┼äski
+		global $wgRTEParserEnabled, $wgRTETemplateParams;
+		if(!empty($wgRTEParserEnabled)) {
+			$rules['['] = array(
+				'end' => ']',
+				'names' => array( 1=> 'external', 2 => null ),
+				'min' => 1,
+				'max' => 2
+			);
+			$RTE_flags = $flags;
+			$ignoredTags = $ignoredElements = array();
+		}
+		if(!empty($wgRTETemplateParams)) {
+			$rules['{']['min'] = 3;
+			$rules['{']['names'] = array(3 => 'tplarg');
+		}
+		# RTE - end
+
+		//CategorySelect
+		if (!empty($wgCategorySelectEnabled)) {
+			$ignoredTags = $ignoredElements = array();
+			$xmlishElements[] = 'noinclude';
+			$xmlishElements[] = 'onlyinclude';
+		}
+
 		$xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
 
 		// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
@@ -174,6 +217,12 @@
 		$noMoreGT = false;         # True if there are no more greater-than (>) signs right of $i
 		$findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude>
 		$fakeLineStart = true;     # Do a line-start run without outputting an LF character
+		$openAt = $closeAt = array(); # CategorySelect
+
+		# RTE (Rich Text Editor) - begin
+		# @author: Inez Korczy┼äski
+		$openAt = $closeAt = array();
+		# RTE - end
 
 		while ( true ) {
 			//$this->memCheck();
@@ -389,6 +438,15 @@
 					// Note that the attr element contains the whitespace between name and attribute,
 					// this is necessary for precise reconstruction during pre-save transform.
 					'<attr>' . htmlspecialchars( $attr ) . '</attr>';
+
+				# RTE (Rich Text Editor) - begin
+				# @author: Inez Korczy┼äski
+				if(!empty($wgRTEParserEnabled)) {
+					$accum .= '<inner>' . RTEMarker::generate(RTEMarker::EXT_WIKITEXT, RTEData::put('wikitext', substr( $text, $tagStartPos, $i - $tagStartPos ))) . '</inner>';
+					$inner = null;
+				}
+				# RTE - end
+
 				if ( $inner !== null ) {
 					$accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
 				}
@@ -505,7 +563,19 @@
 					# Add literal brace(s)
 					$accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
 				}
+
+				# Wysiwyg
+				if($flags == 0 && ($wgCategorySelectEnabled) && $count == 2 && $curChar == "{") {
+					$openAt[] = $i;
+				}
+
 				$i += $count;
+				# RTE (Rich Text Editor) - begin
+				# @author: Inez Korczy┼äski
+				if(!empty($wgRTEParserEnabled) && $RTE_flags === 0 && ($count == 2 || $count == 3) && $curChar == "{") {
+					$openAt[] = $i;
+				}
+				# RTE - end
 			}
 
 			elseif ( $found == 'close' ) {
@@ -540,9 +610,22 @@
 					continue;
 				}
 				$name = $rule['names'][$matchingCount];
-				if ( $name === null ) {
+				# RTE (Rich Text Editor) - begin
+				# @author: Inez Korczy┼äski
+				if ( $name === null || $name == 'external' || (!empty($wgRTEParserEnabled) && $name == 'external')) {
 					// No element, just literal text
 					$element = $piece->breakSyntax( $matchingCount ) . str_repeat( $rule['end'], $matchingCount );
+
+					if(!empty($wgRTEParserEnabled)) {
+						if($name === null) {
+							$dataIdx = RTEData::put('wikitext', $element);
+							$element = '[[' . RTEMarker::generate(RTEMarker::INTERNAL_WIKITEXT, $dataIdx) . substr($element, 2);
+						} else {
+							$dataIdx = RTEData::put('wikitext', $element);
+							$element .= RTEMarker::generate(RTEMarker::EXTERNAL_WIKITEXT, $dataIdx);
+						}
+					}
+				# RTE - end
 				} else {
 					# Create XML element
 					# Note: $parts is already XML, does not need to be encoded further
@@ -558,8 +641,22 @@
 						$attr = '';
 					}
 
+					# RTE (Rich Text Editor) - begin
+					# @author: Inez Korczy┼äski
+					if(!empty($wgRTEParserEnabled) && $RTE_flags === 0 && ($count == 2 || $count == 3) && $curChar == '}') {
+						$closeAt[] = $i;
+						if(count($closeAt) == count($openAt)) {
+							$openIdx = $openAt[0];
+							$closeIdx = $closeAt[count($closeAt)-1];
+							$openAt = $closeAt = array();
+							$attr .= ' _rte_wikitextidx="'.RTEData::put('wikitext', substr($text, $openIdx-$count, $closeIdx-$openIdx+2*$count)).'"';
+						}
+					}
+					# RTE - end
+
 					$element = "<$name$attr>";
 					$element .= "<title>$title</title>";
+
 					$argIndex = 1;
 					foreach ( $parts as $partIndex => $part ) {
 						if ( isset( $part->eqpos ) ) {
@@ -899,7 +996,16 @@
 		$iteratorStack = array( false, $root );
 		$indexStack = array( 0, 0 );
 
+		$RTEext_1 = false;
+		$RTEext_2 = false;
+
 		while ( count( $iteratorStack ) > 1 ) {
+
+			if($RTEext_1) {
+				$RTEext_1 = false;
+				$RTEext_2 = true;
+			}
+
 			$level = count( $outStack ) - 1;
 			$iteratorNode =& $iteratorStack[ $level ];
 			$out =& $outStack[$level];
@@ -944,8 +1050,21 @@
 				$newIterator = $contextNode;
 			} elseif ( $contextNode instanceof DOMNode ) {
 				if ( $contextNode->nodeType == XML_TEXT_NODE ) {
+
+					# RTE (Rich Text Editor) - begin
+					global $wgRTEParserEnabled;
+					if(!empty($wgRTEParserEnabled)) {
+						if($RTEext_2) {
+							if(strpos($contextNode->nodeValue, 'table') !== false) {
+								RTE::$edgeCases[] = 'COMPLEX.11';
+							}
+						}
+					}
+					# RTE - end
+
 					$out .= $contextNode->nodeValue;
 				} elseif ( $contextNode->nodeName == 'template' ) {
+
 					# Double-brace expansion
 					$xpath = new DOMXPath( $contextNode->ownerDocument );
 					$titles = $xpath->query( 'title', $contextNode );
@@ -951,6 +1070,18 @@
 					$titles = $xpath->query( 'title', $contextNode );
 					$title = $titles->item( 0 );
 					$parts = $xpath->query( 'part', $contextNode );
+
+					# RTE (Rich Text Editor) - begin
+					# @author: Inez Korczy┼äski
+					global $wgRTEParserEnabled;
+					if(!empty($wgRTEParserEnabled)) {
+						$dataIdx = RTEData::put('placeholder', array(
+							'type' => 'double-brackets',
+							'wikitextIdx' => $contextNode->getAttribute('_rte_wikitextidx'),
+							'lineStart' => $contextNode->getAttribute('lineStart'),
+							'title' => $title->textContent));
+						$out .= RTEMarker::generate(RTEMarker::PLACEHOLDER, $dataIdx);
+					} else {
 					if ( $flags & self::NO_TEMPLATES ) {
 						$newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $title, $parts );
 					} else {
@@ -966,12 +1097,28 @@
 							$out .= $ret['text'];
 						}
 					}
+					}
+					# RTE - end
+
 				} elseif ( $contextNode->nodeName == 'tplarg' ) {
 					# Triple-brace expansion
 					$xpath = new DOMXPath( $contextNode->ownerDocument );
 					$titles = $xpath->query( 'title', $contextNode );
 					$title = $titles->item( 0 );
 					$parts = $xpath->query( 'part', $contextNode );
+
+					# RTE (Rich Text Editor) - begin
+					# @author: Wladyslaw Bodzek
+					global $wgRTEParserEnabled;
+					if ( !empty($wgRTEParserEnabled) ) {
+						//var_dump($contextNode->getAttribute('_rte_wikitextidx'));
+						$dataIdx = RTEData::put('placeholder', array(
+							'type' => 'tplarg',
+							'wikitextIdx' => $contextNode->getAttribute('_rte_wikitextidx'),
+							'lineStart' => $contextNode->getAttribute('lineStart'),
+							'title' => $title->textContent));
+						$out .= RTEMarker::generate(RTEMarker::PLACEHOLDER, $dataIdx);
+					} else {
 					if ( $flags & self::NO_ARGS ) {
 						$newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $title, $parts );
 					} else {
@@ -985,6 +1132,8 @@
 							$out .= $ret['text'];
 						}
 					}
+					}
+					# RTE - end
 				} elseif ( $contextNode->nodeName == 'comment' ) {
 					# HTML-style comment
 					# Remove it in HTML, pre+remove and STRIP_COMMENTS modes
@@ -992,8 +1141,32 @@
 						|| ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
 						|| ( $flags & self::STRIP_COMMENTS ) )
 					{
+						# RTE (Rich Text Editor) - begin
+						# @author: Inez Korczy┼äski
+						global $wgRTEParserEnabled;
+						if(!empty($wgRTEParserEnabled)) {
+							if(strlen($out) === 0 || substr($out, -1) == "\n") {
+								if(substr($contextNode->textContent, -1) == "\n") {
+									$add = "\n";
+									$text = substr($contextNode->textContent, 0, -1);
+								} else {
+									$add = "";
+									$text = $contextNode->textContent;
+								}
+								$dataIdx = RTEData::put('placeholder', array(
+									'type' => 'comment',
+									'wikitext' => $text));
+								$out .= RTEMarker::generate(RTEMarker::PLACEHOLDER, $dataIdx) . $add;
+							} else {
+								RTE::$edgeCases[] = 'COMMENT';
+								$out .= '';
+							}
+						} else {
 						$out .= '';
 					}
+						# RTE - end
+
+					}
 					# Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
 					# Not in RECOVER_COMMENTS mode (extractSections) though
 					elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) {
@@ -1027,6 +1200,7 @@
 						'close' => $closes->length > 0 ? new PPNode_DOM( $closes->item( 0 ) ) : null,
 					);
 					$out .= $this->parser->extensionSubstitution( $params, $this );
+					$RTEext_1 = true;
 				} elseif ( $contextNode->nodeName == 'h' ) {
 					# Heading
 					$s = $this->expand( $contextNode->childNodes, $flags );
@@ -1074,6 +1248,7 @@
 					$level--;
 				}
 			}
+			$RTEext_2 = false;
 		}
 		--$expansionDepth;
 		wfProfileOut( __METHOD__ );