1<?php
2/**
3 * MediaWiki2DokuWiki importer.
4 * Copyright (C) 2011-2013  Andrei Nicholson
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 *
19 * @package   MediaWiki2DokuWiki
20 * @author    Andrei Nicholson
21 * @copyright Copyright (C) 2011-2013 Andrei Nicholson
22 * @link      https://github.com/tetsuo13/MediaWiki-to-DokuWiki-Importer
23 */
24
25/**
26 * Convert syntaxes.
27 *
28 * Regular expressions originally by Johannes Buchner
29 * <buchner.johannes [at] gmx.at>.
30 *
31 * Changes by Frederik Tilkin:
32 *
33 * <ul>
34 * <li>uses sed instead of perl</li>
35 * <li>resolved some bugs ('''''IMPORTANT!!!''''' becomes //**IMPORTANT!!!** //,
36 *     // becomes <nowiki>//</nowiki> if it is not in a CODE block)</li>
37 * <li>added functionality (multiple lines starting with a space become CODE
38 *     blocks)</li>
39 * </ul>
40 *
41 * @author Andrei Nicholson
42 * @author Johannes Buchner
43 * @author Frederik Tilkin
44 * @since  2012-05-07
45 */
46class MediaWiki2DokuWiki_MediaWiki_SyntaxConverter
47{
48    /** Original MediaWiki record. */
49    private $record = '';
50
51    /** Stored code blocks to prevent further conversions. */
52    private $codeBlock = array();
53
54    /** What string should never occur in user content? */
55    private $placeholder = '';
56
57    /**
58     * Constructor.
59     *
60     * @param string $record MediaWiki record.
61     */
62    public function __construct($record)
63    {
64        $this->placeholder = '@@' . __CLASS__ . '_';
65        $this->record = $record;
66    }
67
68    /**
69     * Convert page syntax from MediaWiki to DokuWiki.
70     *
71     * @return string DokuWiki page.
72     * @author Johannes Buchner <buchner.johannes [at] gmx.at>
73     * @author Frederik Tilkin
74     */
75    public function convert()
76    {
77        $record = $this->convertCodeBlocks($this->record);
78        $record = $this->convertHeadings($record);
79        $record = $this->convertList($record);
80        $record = $this->convertUrlText($record);
81        $record = $this->convertLink($record);
82        $record = $this->convertDoubleSlash($record);
83        $record = $this->convertBoldItalic($record);
84        $record = $this->convertTalks($record);
85        $record = $this->convertImagesFiles($record);
86
87        if (count($this->codeBlock) > 0) {
88            $record = $this->replaceStoredCodeBlocks($record);
89        }
90
91        return $record;
92    }
93
94    /**
95     * Double forward slashes are not italic. There is no double slash syntax
96     * rule in MediaWiki. This conversion must happen before the conversion of
97     * italic markup.
98     *
99     * @param string $record
100     *
101     * @return string
102     */
103    private function convertDoubleSlash($record)
104    {
105        $patterns = array(
106            '/([^:])\/\//m' => '\1<nowiki>//</nowiki>',
107        );
108        return preg_replace(
109            array_keys($patterns),
110            array_values($patterns),
111            $record
112        );
113    }
114
115    /**
116     * Code blocks.
117     *
118     * @param string $record
119     *
120     * @return string
121     */
122    private function convertCodeBlocks($record)
123    {
124        $patterns = array(
125            // Change the ones that have been replaced in a link [] BACK to
126            // normal (do it twice in case
127            // [http://addres.com http://address.com] ) [quick and dirty]
128            '/([\[][^\[]*)(<nowiki>)(\/\/+)(<\/nowiki>)([^\]]*)/' => '\1\3\5',
129            '/([\[][^\[]*)(<nowiki>)(\/\/+)(<\/nowiki>)([^\]]*)/' => '\1\3\5',
130
131            '@<pre>(.*?)?</pre>@es'     => '$this->storeCodeBlock(\'\1\')',
132            '@</code>\n[ \t]*\n<code>@' => ''
133        );
134
135        return preg_replace(
136            array_keys($patterns),
137            array_values($patterns),
138            $record
139        );
140    }
141
142    /**
143     * Replace content in PRE tag with placeholder. This is done so no more
144     * conversions are performed with the contents. The last thing this class
145     * will do is replace those placeholders with their original content.
146     *
147     * @param string $code Contents of PRE tag.
148     *
149     * @return string CODE tag with placeholder in content.
150     */
151    private function storeCodeBlock($code)
152    {
153        $this->codeBlock[] = $code;
154
155        $replace = $this->placeholder . (count($this->codeBlock) - 1) . '@@';
156
157        return "<code>$replace</code>";
158    }
159
160    /**
161     * Replace PRE tag placeholders back with their original content.
162     *
163     * @param string $record Converted record.
164     *
165     * @return string Record with placeholders removed.
166     */
167    private function replaceStoredCodeBlocks($record)
168    {
169        for ($i = 0, $numBlocks = count($this->codeBlock); $i < $numBlocks; $i++) {
170            $record = str_replace(
171                $this->placeholder . $i . '@@',
172                $this->codeBlock[$i],
173                $record
174            );
175        }
176        return $record;
177    }
178
179    /**
180     * Convert images and files.
181     *
182     * @param string $record Converted record.
183     *
184     * @return string
185     */
186    private function convertImagesFiles($record)
187    {
188        $numMatches = preg_match_all(
189            '/\[\[(Image|File):(.*?)\]\]/',
190            $record,
191            $matches
192        );
193
194        if ($numMatches === 0 || $numMatches === false) {
195            return $record;
196        }
197
198        for ($i = 0; $i < $numMatches; $i++) {
199            $converted = $this->convertImage($matches[2][$i]);
200
201            // Replace the full tag, [[File:example.jpg|options|caption]],
202            // with the DokuWiki equivalent.
203            $record = str_replace($matches[0][$i], $converted, $record);
204        }
205
206        return $record;
207    }
208
209    /**
210     * Process a MediaWiki image tag.
211     *
212     * @param string $detail Filename and options, ie.
213     *                       example.jpg|options|caption.
214     *
215     * @return string DokuWiki version of tag.
216     */
217    private function convertImage($detail)
218    {
219        $parts = explode('|', $detail);
220        $numParts = count($parts);
221
222        // Image link.
223        if ($numParts == 2 && substr($parts[1], 0, 5) == 'link=') {
224            return '[[' . substr($parts[1], 5) . '|{{wiki:' . $parts[0] . '}}]]';
225        }
226
227        $converted = '{{';
228        $leftAlign = '';
229        $rightAlign = '';
230        $imageSize = '';
231        $caption = '';
232
233        if ($numParts > 1) {
234            $imageFilename = array_shift($parts);
235
236            foreach ($parts as $part) {
237                if ($part == 'left') {
238                    $leftAlign = ' ';
239                    continue;
240                } else if ($part == 'right') {
241                    $rightAlign = ' ';
242                    continue;
243                } else if ($part == 'center') {
244                    $leftAlign = $rightAlign = ' ';
245                    continue;
246                }
247
248                if (substr($part, -2) == 'px') {
249                    preg_match('/((\d+)x)?(\d+)px/', $part, $matches);
250
251                    if (count($matches) > 0) {
252                        if ($matches[1] == '') {
253                            $imageSize = $matches[3];
254                        } else {
255                            $imageSize = $matches[2] . 'x' . $matches[3];
256                        }
257                    }
258
259                    continue;
260                }
261
262                $caption = $part;
263            }
264
265            $converted .= $leftAlign . 'wiki:' . $imageFilename . $rightAlign;
266
267            if ($imageSize != '') {
268                $converted .= '?' . $imageSize;
269            }
270
271            if ($caption != '') {
272                $converted .= '|' . $caption;
273            }
274        } else {
275            $converted .= "wiki:$detail";
276        }
277
278        $converted .= '}}';
279
280        return $converted;
281    }
282
283    /**
284     * Convert talks.
285     *
286     * @param string $record
287     *
288     * @return string
289     */
290    private function convertTalks($record)
291    {
292        $patterns = array(
293            '/^[ ]*:/'  => '>',
294            '/>:/'      => '>>',
295            '/>>:/'     => '>>>',
296            '/>>>:/'    => '>>>>',
297            '/>>>>:/'   => '>>>>>',
298            '/>>>>>:/'  => '>>>>>>',
299            '/>>>>>>:/' => '>>>>>>>'
300        );
301
302        return preg_replace(
303            array_keys($patterns),
304            array_values($patterns),
305            $record
306        );
307    }
308
309    /**
310     * Convert bold and italic.
311     *
312     * @param string $record
313     *
314     * @return string
315     */
316    private function convertBoldItalic($record)
317    {
318        $patterns = array(
319            "/'''''(.*)'''''/" => '//**\1**//',
320            "/'''/"            => '**',
321            "/''/"             => '//',
322
323            // Changes by Reiner Rottmann: - fixed erroneous interpretation
324            // of combined bold and italic text.
325            '@\*\*//@'         => '//**'
326        );
327
328        return preg_replace(
329            array_keys($patterns),
330            array_values($patterns),
331            $record
332        );
333    }
334
335    /**
336     * Convert [link] => [[link]].
337     *
338     * @param string $record
339     *
340     * @return string
341     */
342    private function convertLink($record)
343    {
344        $patterns = array('/([^[]|^)(\[[^]]*\])([^]]|$)/' => '\1[\2]\3');
345
346        return preg_replace(
347            array_keys($patterns),
348            array_values($patterns),
349            $record
350        );
351    }
352
353    /**
354     * Convert [url text] => [url|text].
355     *
356     * @param string $record
357     *
358     * @return string
359     */
360    private function convertUrlText($record)
361    {
362        $patterns = array(
363            '/([^[]|^)(\[[^] ]*) ([^]]*\])([^]]|$)/' => '\1\2|\3\4'
364        );
365
366        return preg_replace(
367            array_keys($patterns),
368            array_values($patterns),
369            $record
370        );
371    }
372
373    /**
374     * Convert lists.
375     *
376     * @param string $record
377     *
378     * @return string
379     */
380    private function convertList($record)
381    {
382        $patterns = array(
383            '/^\* /m'    => '  * ',
384            '/^\*{2} /m' => '    * ',
385            '/^\*{3} /m' => '      * ',
386            '/^\*{4} /m' => '        * ',
387            '/^# /m'     => '  - ',
388            '/^#{2} /m'  => '    - ',
389            '/^#{3} /m'  => '      - ',
390            '/^#{4} /m'  => '        - '
391        );
392
393        return preg_replace(
394            array_keys($patterns),
395            array_values($patterns),
396            $record
397        );
398    }
399
400    /**
401     * Convert headings. Syntax between MediaWiki and DokuWiki is completely
402     * opposite: the largest heading in MediaWiki is two equal marks while in
403     * DokuWiki it's six equal marks. This creates a problem since the first
404     * replaced string of two marks will be caught by the last search string
405     * also of two marks, resulting in eight total equal marks.
406     *
407     * @param string $record
408     *
409     * @return string
410     */
411    private function convertHeadings($record)
412    {
413        $patterns = array(
414            '/^======(.+)======\s*$/m' => '==\1==',
415            '/^=====(.+)=====\s*$/m'   => '==\1==',
416	    '/^====(.+)====\s*$/m'     => '==\1==',
417            '/^===(.+)===\s*$/m'       => '===\1===',
418            '/^==(.+)==\s*$/m'         => '====\1====',
419	    '/^=(.+)=\s*$/m'           => '=====\1====='
420        );
421
422        // Insert a unique string to the replacement so that it won't be
423        // caught in a search later.
424        // @todo A lambda function can be used when PHP 5.4 is required.
425        array_walk(
426            $patterns,
427            create_function(
428                '&$v, $k',
429                '$v = "' . $this->placeholder . '" . $v;'
430            )
431        );
432
433        $convertedRecord = preg_replace(
434            array_keys($patterns),
435            array_values($patterns),
436            $record
437        );
438
439        // No headings were found.
440        if ($convertedRecord == $record) {
441            return $record;
442        }
443
444        // Strip out the unique strings.
445        return str_replace($this->placeholder, '', $convertedRecord);
446    }
447}
448
449