1<?php 2/** 3 * MediaWiki2DokuWiki importer. 4 * Copyright (C) 2011-2013 Andrei Nicholson 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 * 19 * @package MediaWiki2DokuWiki 20 * @author Andrei Nicholson 21 * @copyright Copyright (C) 2011-2013 Andrei Nicholson 22 * @link https://github.com/tetsuo13/MediaWiki-to-DokuWiki-Importer 23 */ 24 25/** 26 * Convert syntaxes. 27 * 28 * Regular expressions originally by Johannes Buchner 29 * <buchner.johannes [at] gmx.at>. 30 * 31 * Changes by Frederik Tilkin: 32 * 33 * <ul> 34 * <li>uses sed instead of perl</li> 35 * <li>resolved some bugs ('''''IMPORTANT!!!''''' becomes //**IMPORTANT!!!** //, 36 * // becomes <nowiki>//</nowiki> if it is not in a CODE block)</li> 37 * <li>added functionality (multiple lines starting with a space become CODE 38 * blocks)</li> 39 * </ul> 40 * 41 * @author Andrei Nicholson 42 * @author Johannes Buchner 43 * @author Frederik Tilkin 44 * @since 2012-05-07 45 */ 46class MediaWiki2DokuWiki_MediaWiki_SyntaxConverter 47{ 48 /** Original MediaWiki record. */ 49 private $record = ''; 50 51 /** Stored code blocks to prevent further conversions. */ 52 private $codeBlock = array(); 53 54 /** What string should never occur in user content? */ 55 private $placeholder = ''; 56 57 /** 58 * Constructor. 59 * 60 * @param string $record MediaWiki record. 61 */ 62 public function __construct($record) 63 { 64 $this->placeholder = '@@' . __CLASS__ . '_'; 65 $this->record = $record; 66 } 67 68 /** 69 * Convert page syntax from MediaWiki to DokuWiki. 70 * 71 * @return string DokuWiki page. 72 * @author Johannes Buchner <buchner.johannes [at] gmx.at> 73 * @author Frederik Tilkin 74 */ 75 public function convert() 76 { 77 $record = $this->convertCodeBlocks($this->record); 78 $record = $this->convertHeadings($record); 79 $record = $this->convertList($record); 80 $record = $this->convertUrlText($record); 81 $record = $this->convertLink($record); 82 $record = $this->convertDoubleSlash($record); 83 $record = $this->convertBoldItalic($record); 84 $record = $this->convertTalks($record); 85 $record = $this->convertImagesFiles($record); 86 87 if (count($this->codeBlock) > 0) { 88 $record = $this->replaceStoredCodeBlocks($record); 89 } 90 91 return $record; 92 } 93 94 /** 95 * Double forward slashes are not italic. There is no double slash syntax 96 * rule in MediaWiki. This conversion must happen before the conversion of 97 * italic markup. 98 * 99 * @param string $record 100 * 101 * @return string 102 */ 103 private function convertDoubleSlash($record) 104 { 105 $patterns = array( 106 '/([^:])\/\//m' => '\1<nowiki>//</nowiki>', 107 ); 108 return preg_replace( 109 array_keys($patterns), 110 array_values($patterns), 111 $record 112 ); 113 } 114 115 /** 116 * Code blocks. 117 * 118 * @param string $record 119 * 120 * @return string 121 */ 122 private function convertCodeBlocks($record) 123 { 124 $patterns = array( 125 // Change the ones that have been replaced in a link [] BACK to 126 // normal (do it twice in case 127 // [http://addres.com http://address.com] ) [quick and dirty] 128 '/([\[][^\[]*)(<nowiki>)(\/\/+)(<\/nowiki>)([^\]]*)/' => '\1\3\5', 129 '/([\[][^\[]*)(<nowiki>)(\/\/+)(<\/nowiki>)([^\]]*)/' => '\1\3\5', 130 131 '@<pre>(.*?)?</pre>@es' => '$this->storeCodeBlock(\'\1\')', 132 '@</code>\n[ \t]*\n<code>@' => '' 133 ); 134 135 return preg_replace( 136 array_keys($patterns), 137 array_values($patterns), 138 $record 139 ); 140 } 141 142 /** 143 * Replace content in PRE tag with placeholder. This is done so no more 144 * conversions are performed with the contents. The last thing this class 145 * will do is replace those placeholders with their original content. 146 * 147 * @param string $code Contents of PRE tag. 148 * 149 * @return string CODE tag with placeholder in content. 150 */ 151 private function storeCodeBlock($code) 152 { 153 $this->codeBlock[] = $code; 154 155 $replace = $this->placeholder . (count($this->codeBlock) - 1) . '@@'; 156 157 return "<code>$replace</code>"; 158 } 159 160 /** 161 * Replace PRE tag placeholders back with their original content. 162 * 163 * @param string $record Converted record. 164 * 165 * @return string Record with placeholders removed. 166 */ 167 private function replaceStoredCodeBlocks($record) 168 { 169 for ($i = 0, $numBlocks = count($this->codeBlock); $i < $numBlocks; $i++) { 170 $record = str_replace( 171 $this->placeholder . $i . '@@', 172 $this->codeBlock[$i], 173 $record 174 ); 175 } 176 return $record; 177 } 178 179 /** 180 * Convert images and files. 181 * 182 * @param string $record Converted record. 183 * 184 * @return string 185 */ 186 private function convertImagesFiles($record) 187 { 188 $numMatches = preg_match_all( 189 '/\[\[(Image|File):(.*?)\]\]/', 190 $record, 191 $matches 192 ); 193 194 if ($numMatches === 0 || $numMatches === false) { 195 return $record; 196 } 197 198 for ($i = 0; $i < $numMatches; $i++) { 199 $converted = $this->convertImage($matches[2][$i]); 200 201 // Replace the full tag, [[File:example.jpg|options|caption]], 202 // with the DokuWiki equivalent. 203 $record = str_replace($matches[0][$i], $converted, $record); 204 } 205 206 return $record; 207 } 208 209 /** 210 * Process a MediaWiki image tag. 211 * 212 * @param string $detail Filename and options, ie. 213 * example.jpg|options|caption. 214 * 215 * @return string DokuWiki version of tag. 216 */ 217 private function convertImage($detail) 218 { 219 $parts = explode('|', $detail); 220 $numParts = count($parts); 221 222 // Image link. 223 if ($numParts == 2 && substr($parts[1], 0, 5) == 'link=') { 224 return '[[' . substr($parts[1], 5) . '|{{wiki:' . $parts[0] . '}}]]'; 225 } 226 227 $converted = '{{'; 228 $leftAlign = ''; 229 $rightAlign = ''; 230 $imageSize = ''; 231 $caption = ''; 232 233 if ($numParts > 1) { 234 $imageFilename = array_shift($parts); 235 236 foreach ($parts as $part) { 237 if ($part == 'left') { 238 $leftAlign = ' '; 239 continue; 240 } else if ($part == 'right') { 241 $rightAlign = ' '; 242 continue; 243 } else if ($part == 'center') { 244 $leftAlign = $rightAlign = ' '; 245 continue; 246 } 247 248 if (substr($part, -2) == 'px') { 249 preg_match('/((\d+)x)?(\d+)px/', $part, $matches); 250 251 if (count($matches) > 0) { 252 if ($matches[1] == '') { 253 $imageSize = $matches[3]; 254 } else { 255 $imageSize = $matches[2] . 'x' . $matches[3]; 256 } 257 } 258 259 continue; 260 } 261 262 $caption = $part; 263 } 264 265 $converted .= $leftAlign . 'wiki:' . $imageFilename . $rightAlign; 266 267 if ($imageSize != '') { 268 $converted .= '?' . $imageSize; 269 } 270 271 if ($caption != '') { 272 $converted .= '|' . $caption; 273 } 274 } else { 275 $converted .= "wiki:$detail"; 276 } 277 278 $converted .= '}}'; 279 280 return $converted; 281 } 282 283 /** 284 * Convert talks. 285 * 286 * @param string $record 287 * 288 * @return string 289 */ 290 private function convertTalks($record) 291 { 292 $patterns = array( 293 '/^[ ]*:/' => '>', 294 '/>:/' => '>>', 295 '/>>:/' => '>>>', 296 '/>>>:/' => '>>>>', 297 '/>>>>:/' => '>>>>>', 298 '/>>>>>:/' => '>>>>>>', 299 '/>>>>>>:/' => '>>>>>>>' 300 ); 301 302 return preg_replace( 303 array_keys($patterns), 304 array_values($patterns), 305 $record 306 ); 307 } 308 309 /** 310 * Convert bold and italic. 311 * 312 * @param string $record 313 * 314 * @return string 315 */ 316 private function convertBoldItalic($record) 317 { 318 $patterns = array( 319 "/'''''(.*)'''''/" => '//**\1**//', 320 "/'''/" => '**', 321 "/''/" => '//', 322 323 // Changes by Reiner Rottmann: - fixed erroneous interpretation 324 // of combined bold and italic text. 325 '@\*\*//@' => '//**' 326 ); 327 328 return preg_replace( 329 array_keys($patterns), 330 array_values($patterns), 331 $record 332 ); 333 } 334 335 /** 336 * Convert [link] => [[link]]. 337 * 338 * @param string $record 339 * 340 * @return string 341 */ 342 private function convertLink($record) 343 { 344 $patterns = array('/([^[]|^)(\[[^]]*\])([^]]|$)/' => '\1[\2]\3'); 345 346 return preg_replace( 347 array_keys($patterns), 348 array_values($patterns), 349 $record 350 ); 351 } 352 353 /** 354 * Convert [url text] => [url|text]. 355 * 356 * @param string $record 357 * 358 * @return string 359 */ 360 private function convertUrlText($record) 361 { 362 $patterns = array( 363 '/([^[]|^)(\[[^] ]*) ([^]]*\])([^]]|$)/' => '\1\2|\3\4' 364 ); 365 366 return preg_replace( 367 array_keys($patterns), 368 array_values($patterns), 369 $record 370 ); 371 } 372 373 /** 374 * Convert lists. 375 * 376 * @param string $record 377 * 378 * @return string 379 */ 380 private function convertList($record) 381 { 382 $patterns = array( 383 '/^\* /m' => ' * ', 384 '/^\*{2} /m' => ' * ', 385 '/^\*{3} /m' => ' * ', 386 '/^\*{4} /m' => ' * ', 387 '/^# /m' => ' - ', 388 '/^#{2} /m' => ' - ', 389 '/^#{3} /m' => ' - ', 390 '/^#{4} /m' => ' - ' 391 ); 392 393 return preg_replace( 394 array_keys($patterns), 395 array_values($patterns), 396 $record 397 ); 398 } 399 400 /** 401 * Convert headings. Syntax between MediaWiki and DokuWiki is completely 402 * opposite: the largest heading in MediaWiki is two equal marks while in 403 * DokuWiki it's six equal marks. This creates a problem since the first 404 * replaced string of two marks will be caught by the last search string 405 * also of two marks, resulting in eight total equal marks. 406 * 407 * @param string $record 408 * 409 * @return string 410 */ 411 private function convertHeadings($record) 412 { 413 $patterns = array( 414 '/^======(.+)======\s*$/m' => '==\1==', 415 '/^=====(.+)=====\s*$/m' => '==\1==', 416 '/^====(.+)====\s*$/m' => '==\1==', 417 '/^===(.+)===\s*$/m' => '===\1===', 418 '/^==(.+)==\s*$/m' => '====\1====', 419 '/^=(.+)=\s*$/m' => '=====\1=====' 420 ); 421 422 // Insert a unique string to the replacement so that it won't be 423 // caught in a search later. 424 // @todo A lambda function can be used when PHP 5.4 is required. 425 array_walk( 426 $patterns, 427 create_function( 428 '&$v, $k', 429 '$v = "' . $this->placeholder . '" . $v;' 430 ) 431 ); 432 433 $convertedRecord = preg_replace( 434 array_keys($patterns), 435 array_values($patterns), 436 $record 437 ); 438 439 // No headings were found. 440 if ($convertedRecord == $record) { 441 return $record; 442 } 443 444 // Strip out the unique strings. 445 return str_replace($this->placeholder, '', $convertedRecord); 446 } 447} 448 449