1<?php
2/**
3 * Interwiki Formatting Plugin (Syntax Component)
4 *
5 * Description: The Interwiki Formatting Plugin automatically produces formatted titles for interwiki links.
6 *
7 * Syntax: Default Dokuwiki interwiki link syntax. See: https://www.dokuwiki.org/wiki:syntax#interwiki
8 *
9 * @license    The MIT License (https://opensource.org/licenses/MIT)
10 * @author     Jay Jeckel
11 *
12 * Copyright (c) 2016 Jay Jeckel
13 * Licensed under the MIT license: https://opensource.org/licenses/MIT
14 * Permission is granted to use, copy, modify, and distribute the work.
15 * Full license information available in the project LICENSE file.
16*/
17
18if(!defined('DOKU_INC')) { die(); }
19
20class syntax_plugin_interwikiformatting extends DokuWiki_Syntax_Plugin
21{
22    // Pattern to match interwiki links that do not include a manually defined title.
23    const PATTERN_INTERWIKI_LINK = '\[\[[a-zA-Z0-9\.]+?>[^\|\]]+?\]\]';
24
25    // Pattern to match capital letters that are not preceded by a word boundary, start of string, or another capital letter
26    // (unless they are followed by a lowercase letter) and are not followed by a capital letter or end of string.
27    // This is a slightly modified version of the regex found in the accepted answer to a stackoverflow question:
28    // https://stackoverflow.com/questions/8611617/how-can-i-add-a-space-in-string-at-capital-letters-but-keep-continuous-capitals
29    const PATTERN_INSERT_SPACES = '(?<!\b|^)((?<![[:upper:]])[[:upper:]]|[[:upper:]](?!([[:upper:]]|$)))';
30
31    function getInfo() { return confToHash(dirname(__FILE__) . '/plugin.info.txt'); }
32
33    function getType() { return 'substition'; }
34
35    function getSort() { return 289; }// Must come before the [[link]] pattern is handled at 290.
36
37    function connectTo($mode) { $this->Lexer->addSpecialPattern(self::PATTERN_INTERWIKI_LINK, $mode, 'plugin_interwikiformatting'); }
38
39    // All our work is done here in the handle method because we use the built-in $handler->_addCall()
40    // method. In all cases an interwiki link is created and null is returned.
41    function handle($match, $state, $pos, Doku_Handler $handler)
42    {
43        // Get text inside the [[]] enclosing characters.
44        $match = substr($match, 2, -2);
45        // Explode the matched text into its shortcut and target.
46        list($shortcut, $target) = explode('>', $match, 2);
47
48        // Define a variable for the configuration setting of which shortcuts should be ignored.
49        $ignored_shortcuts = explode(' ', $this->getConf('ignored_shortcuts'));
50        // If the shortcut is in the ignore array...
51        if (in_array($shortcut, $ignored_shortcuts))
52        {
53            // Call the built-in Dokuwiki function to create the interwiki link as it would be called normally.
54            $handler->_addCall('interwikilink', array($match, null, strtolower($shortcut), $target), $pos);
55            // Since we don't have to do any actual work, quit early and return now.
56            return null;
57        }
58
59        // Define the formatting variables for the configuration settings.
60        $strippable_extensions = explode(' ', $this->getConf('strippable_extensions'));
61        $strip_query = ($this->getConf('strip_query') == 1 ? true : false);
62        $word_separators = explode(' ', $this->getConf('word_separators'));
63        $anchor_replacement = $this->getConf('anchor_replacement');
64        $slash_replacement = $this->getConf('slash_replacement');
65        $capitalize_words = ($this->getConf('capitalize_words') == 1 ? true : false);
66        $capitalize_exceptions = explode(' ', $this->getConf('capitalize_exceptions'));
67
68        // Merge all the replacement text into one array to avoid replacements replacing other replacements.
69        $text_replacements = array();
70        foreach ($word_separators as $index => $sep) { $text_replacements[$sep] = ' '; }
71        $text_replacements['#'] = $anchor_replacement;
72        $text_replacements['/'] = $slash_replacement;
73
74        // Declare the title variable and define it as target with whitespace, anchor,
75        // and slash trimmed from both ends.
76        $title = trim($target, "\r\n\t\f\v /#");
77        // If stripping the query part is enabled...
78        if ($strip_query)
79        {
80            // Get the position of the last question mark. Will return false if nothing is found.
81            $index = strrpos($title, '?');
82            // If a question mark is found, set title to everything before the question mark.
83            if ($index !== false) { $title = substr($title, 0, -(strlen($title) - $index)); }
84        }
85        // Iterate through the strippable extensions and remove the first one that matches.
86        foreach ($strippable_extensions as $index => $ext)
87        {
88            // Declare a variable so preg_replace() can tell if an extension was removed.
89            $count = 0;
90            // Remove the extension if a match was found at the end of the string.
91            $title = preg_replace('/\.' . $ext . '$/i', '', $title, 1, $count);
92            // If an extension was removed, then break out of the loop.
93            if ($count == 1) { break; }
94        }
95        // Perform all the basic text replacement.
96        $title = str_replace(array_keys($text_replacements), array_values($text_replacements), $title);
97        // Insert a space before capital letters. See the pattern's comment for its details.
98        $title = preg_replace('/' . self::PATTERN_INSERT_SPACES . '/', ' $1', $title);
99        // Replace any non-space whitespace with a single space.
100        $title = preg_replace('/[\r\n\t\f\v]+?/', ' ', $title);
101        // Convert multiple spaces into a single space.
102        $title = preg_replace('/[ ]{2,}/', ' ', $title);
103        // Trim whitespace, anchor, and slash from the beginning and end of the title.
104        $title = trim($title, "\r\n\t\f\v /#");
105        // If capitalization of words is enabled...
106        if ($capitalize_words)
107        {
108            // Capitalize the first letter of each word.
109            $title = ucwords($title);
110            // Go through the array of capitalization exceptions...
111            foreach ($capitalize_exceptions as $index => $exception)
112            {
113                // Pattern is case insensitive and matches the exception as long as it is not the first/last word of the title
114                // and is not the first word before or after an anchor/slash replacement.
115                $pattern = "/(?<!^|" . preg_quote($slash_replacement, '/') . "|" . preg_quote($anchor_replacement, '/') . ")\b" . preg_quote($exception, '/') . "\b(?!" . preg_quote($slash_replacement, '/') . "|" . preg_quote($anchor_replacement, '/') . "|$)/i";
116                // Replace matches with their exception text. No enforcement of all lowercase is done on the replacement text
117                // and its exact form is left up to the admin's choice in the configuration setting.
118                // For example, if for some reason an exception is defined as 'bIRd', then that is how it will be displayed.
119                $title = preg_replace($pattern, $exception, $title);
120            }
121        }
122        // Call the built-in Dokuwiki function to create the interwiki link with the constructed title.
123        $handler->_addCall('interwikilink', array($match, $title, strtolower($shortcut), $target), $pos);
124        return null;
125    }
126
127    // Function isn't used as all work is done in the handle() method, but it has to be defined to avoid a 'not implemented' error.
128    // Simply returns true. As of now, the return value of this method is not used by Dokuwiki, so if that changes in a future
129    // update, then this method may need some changes.
130    function render($format, Doku_Renderer $renderer, $data) { return true; }
131}
132
133//Setup VIM: ex: et ts=4 enc=utf-8 :
134?>