. * * @package MediaWiki2DokuWiki * @author Andrei Nicholson * @copyright Copyright (C) 2011-2013 Andrei Nicholson * @link https://github.com/tetsuo13/MediaWiki-to-DokuWiki-Importer */ /** * Convert syntaxes. * * Regular expressions originally by Johannes Buchner * . * * Changes by Frederik Tilkin: * * * * @author Andrei Nicholson * @author Johannes Buchner * @author Frederik Tilkin * @since 2012-05-07 */ class MediaWiki2DokuWiki_MediaWiki_SyntaxConverter { /** Original MediaWiki record. */ private $record = ''; /** Stored code blocks to prevent further conversions. */ private $codeBlock = array(); /** What string should never occur in user content? */ private $placeholder = ''; /** * Constructor. * * @param string $record MediaWiki record. */ public function __construct($record) { $this->placeholder = '@@' . __CLASS__ . '_'; $this->record = $record; } /** * Convert page syntax from MediaWiki to DokuWiki. * * @return string DokuWiki page. * @author Johannes Buchner * @author Frederik Tilkin */ public function convert() { $record = $this->convertCodeBlocks($this->record); $record = $this->convertHeadings($record); $record = $this->convertList($record); $record = $this->convertUrlText($record); $record = $this->convertLink($record); $record = $this->convertDoubleSlash($record); $record = $this->convertBoldItalic($record); $record = $this->convertTalks($record); $record = $this->convertImagesFiles($record); if (count($this->codeBlock) > 0) { $record = $this->replaceStoredCodeBlocks($record); } return $record; } /** * Double forward slashes are not italic. There is no double slash syntax * rule in MediaWiki. This conversion must happen before the conversion of * italic markup. * * @param string $record * * @return string */ private function convertDoubleSlash($record) { $patterns = array( '/([^:])\/\//m' => '\1//', ); return preg_replace( array_keys($patterns), array_values($patterns), $record ); } /** * Code blocks. * * @param string $record * * @return string */ private function convertCodeBlocks($record) { $patterns = array( // Change the ones that have been replaced in a link [] BACK to // normal (do it twice in case // [http://addres.com http://address.com] ) [quick and dirty] '/([\[][^\[]*)()(\/\/+)(<\/nowiki>)([^\]]*)/' => '\1\3\5', '/([\[][^\[]*)()(\/\/+)(<\/nowiki>)([^\]]*)/' => '\1\3\5', '@
(.*?)?
@es' => '$this->storeCodeBlock(\'\1\')', '@\n[ \t]*\n@' => '' ); return preg_replace( array_keys($patterns), array_values($patterns), $record ); } /** * Replace content in PRE tag with placeholder. This is done so no more * conversions are performed with the contents. The last thing this class * will do is replace those placeholders with their original content. * * @param string $code Contents of PRE tag. * * @return string CODE tag with placeholder in content. */ private function storeCodeBlock($code) { $this->codeBlock[] = $code; $replace = $this->placeholder . (count($this->codeBlock) - 1) . '@@'; return "$replace"; } /** * Replace PRE tag placeholders back with their original content. * * @param string $record Converted record. * * @return string Record with placeholders removed. */ private function replaceStoredCodeBlocks($record) { for ($i = 0, $numBlocks = count($this->codeBlock); $i < $numBlocks; $i++) { $record = str_replace( $this->placeholder . $i . '@@', $this->codeBlock[$i], $record ); } return $record; } /** * Convert images and files. * * @param string $record Converted record. * * @return string */ private function convertImagesFiles($record) { $numMatches = preg_match_all( '/\[\[(Image|File):(.*?)\]\]/', $record, $matches ); if ($numMatches === 0 || $numMatches === false) { return $record; } for ($i = 0; $i < $numMatches; $i++) { $converted = $this->convertImage($matches[2][$i]); // Replace the full tag, [[File:example.jpg|options|caption]], // with the DokuWiki equivalent. $record = str_replace($matches[0][$i], $converted, $record); } return $record; } /** * Process a MediaWiki image tag. * * @param string $detail Filename and options, ie. * example.jpg|options|caption. * * @return string DokuWiki version of tag. */ private function convertImage($detail) { $parts = explode('|', $detail); $numParts = count($parts); // Image link. if ($numParts == 2 && substr($parts[1], 0, 5) == 'link=') { return '[[' . substr($parts[1], 5) . '|{{wiki:' . $parts[0] . '}}]]'; } $converted = '{{'; $leftAlign = ''; $rightAlign = ''; $imageSize = ''; $caption = ''; if ($numParts > 1) { $imageFilename = array_shift($parts); foreach ($parts as $part) { if ($part == 'left') { $leftAlign = ' '; continue; } else if ($part == 'right') { $rightAlign = ' '; continue; } else if ($part == 'center') { $leftAlign = $rightAlign = ' '; continue; } if (substr($part, -2) == 'px') { preg_match('/((\d+)x)?(\d+)px/', $part, $matches); if (count($matches) > 0) { if ($matches[1] == '') { $imageSize = $matches[3]; } else { $imageSize = $matches[2] . 'x' . $matches[3]; } } continue; } $caption = $part; } $converted .= $leftAlign . 'wiki:' . $imageFilename . $rightAlign; if ($imageSize != '') { $converted .= '?' . $imageSize; } if ($caption != '') { $converted .= '|' . $caption; } } else { $converted .= "wiki:$detail"; } $converted .= '}}'; return $converted; } /** * Convert talks. * * @param string $record * * @return string */ private function convertTalks($record) { $patterns = array( '/^[ ]*:/' => '>', '/>:/' => '>>', '/>>:/' => '>>>', '/>>>:/' => '>>>>', '/>>>>:/' => '>>>>>', '/>>>>>:/' => '>>>>>>', '/>>>>>>:/' => '>>>>>>>' ); return preg_replace( array_keys($patterns), array_values($patterns), $record ); } /** * Convert bold and italic. * * @param string $record * * @return string */ private function convertBoldItalic($record) { $patterns = array( "/'''''(.*)'''''/" => '//**\1**//', "/'''/" => '**', "/''/" => '//', // Changes by Reiner Rottmann: - fixed erroneous interpretation // of combined bold and italic text. '@\*\*//@' => '//**' ); return preg_replace( array_keys($patterns), array_values($patterns), $record ); } /** * Convert [link] => [[link]]. * * @param string $record * * @return string */ private function convertLink($record) { $patterns = array('/([^[]|^)(\[[^]]*\])([^]]|$)/' => '\1[\2]\3'); return preg_replace( array_keys($patterns), array_values($patterns), $record ); } /** * Convert [url text] => [url|text]. * * @param string $record * * @return string */ private function convertUrlText($record) { $patterns = array( '/([^[]|^)(\[[^] ]*) ([^]]*\])([^]]|$)/' => '\1\2|\3\4' ); return preg_replace( array_keys($patterns), array_values($patterns), $record ); } /** * Convert lists. * * @param string $record * * @return string */ private function convertList($record) { $patterns = array( '/^\* /m' => ' * ', '/^\*{2} /m' => ' * ', '/^\*{3} /m' => ' * ', '/^\*{4} /m' => ' * ', '/^# /m' => ' - ', '/^#{2} /m' => ' - ', '/^#{3} /m' => ' - ', '/^#{4} /m' => ' - ' ); return preg_replace( array_keys($patterns), array_values($patterns), $record ); } /** * Convert headings. Syntax between MediaWiki and DokuWiki is completely * opposite: the largest heading in MediaWiki is two equal marks while in * DokuWiki it's six equal marks. This creates a problem since the first * replaced string of two marks will be caught by the last search string * also of two marks, resulting in eight total equal marks. * * @param string $record * * @return string */ private function convertHeadings($record) { $patterns = array( '/^======(.+)======\s*$/m' => '==\1==', '/^=====(.+)=====\s*$/m' => '==\1==', '/^====(.+)====\s*$/m' => '==\1==', '/^===(.+)===\s*$/m' => '===\1===', '/^==(.+)==\s*$/m' => '====\1====', '/^=(.+)=\s*$/m' => '=====\1=====' ); // Insert a unique string to the replacement so that it won't be // caught in a search later. // @todo A lambda function can be used when PHP 5.4 is required. array_walk( $patterns, create_function( '&$v, $k', '$v = "' . $this->placeholder . '" . $v;' ) ); $convertedRecord = preg_replace( array_keys($patterns), array_values($patterns), $record ); // No headings were found. if ($convertedRecord == $record) { return $record; } // Strip out the unique strings. return str_replace($this->placeholder, '', $convertedRecord); } }