<?php
/**
 * Interwiki Formatting Plugin (Syntax Component)
 *
 * Description: The Interwiki Formatting Plugin automatically produces formatted titles for interwiki links.
 *
 * Syntax: Default Dokuwiki interwiki link syntax. See: https://www.dokuwiki.org/wiki:syntax#interwiki
 *
 * @license    The MIT License (https://opensource.org/licenses/MIT)
 * @author     Jay Jeckel
 *
 * Copyright (c) 2016 Jay Jeckel
 * Licensed under the MIT license: https://opensource.org/licenses/MIT
 * Permission is granted to use, copy, modify, and distribute the work.
 * Full license information available in the project LICENSE file.
*/
 
if(!defined('DOKU_INC')) { die(); }

class syntax_plugin_interwikiformatting extends DokuWiki_Syntax_Plugin
{
    // Pattern to match interwiki links that do not include a manually defined title.
    const PATTERN_INTERWIKI_LINK = '\[\[[a-zA-Z0-9\.]+?>[^\|\]]+?\]\]';
    
    // Pattern to match capital letters that are not preceded by a word boundary, start of string, or another capital letter
    // (unless they are followed by a lowercase letter) and are not followed by a capital letter or end of string.
    // This is a slightly modified version of the regex found in the accepted answer to a stackoverflow question:
    // https://stackoverflow.com/questions/8611617/how-can-i-add-a-space-in-string-at-capital-letters-but-keep-continuous-capitals
    const PATTERN_INSERT_SPACES = '(?<!\b|^)((?<![[:upper:]])[[:upper:]]|[[:upper:]](?!([[:upper:]]|$)))';
    
    function getInfo() { return confToHash(dirname(__FILE__) . '/plugin.info.txt'); }
    
    function getType() { return 'substition'; }
    
    function getSort() { return 289; }// Must come before the [[link]] pattern is handled at 290.
    
    function connectTo($mode) { $this->Lexer->addSpecialPattern(self::PATTERN_INTERWIKI_LINK, $mode, 'plugin_interwikiformatting'); }
    
    // All our work is done here in the handle method because we use the built-in $handler->_addCall()
    // method. In all cases an interwiki link is created and null is returned.
    function handle($match, $state, $pos, Doku_Handler $handler)
    {
        // Get text inside the [[]] enclosing characters.
        $match = substr($match, 2, -2);
        // Explode the matched text into its shortcut and target.
        list($shortcut, $target) = explode('>', $match, 2);
        
        // Define a variable for the configuration setting of which shortcuts should be ignored.
        $ignored_shortcuts = explode(' ', $this->getConf('ignored_shortcuts'));
        // If the shortcut is in the ignore array...
        if (in_array($shortcut, $ignored_shortcuts))
        {
            // Call the built-in Dokuwiki function to create the interwiki link as it would be called normally.
            $handler->_addCall('interwikilink', array($match, null, strtolower($shortcut), $target), $pos);
            // Since we don't have to do any actual work, quit early and return now.
            return null;
        }
        
        // Define the formatting variables for the configuration settings.
        $strippable_extensions = explode(' ', $this->getConf('strippable_extensions'));
        $strip_query = ($this->getConf('strip_query') == 1 ? true : false);
        $word_separators = explode(' ', $this->getConf('word_separators'));
        $anchor_replacement = $this->getConf('anchor_replacement');
        $slash_replacement = $this->getConf('slash_replacement');
        $capitalize_words = ($this->getConf('capitalize_words') == 1 ? true : false);
        $capitalize_exceptions = explode(' ', $this->getConf('capitalize_exceptions'));
        
        // Merge all the replacement text into one array to avoid replacements replacing other replacements.
        $text_replacements = array();
        foreach ($word_separators as $index => $sep) { $text_replacements[$sep] = ' '; }
        $text_replacements['#'] = $anchor_replacement;
        $text_replacements['/'] = $slash_replacement;
        
        // Declare the title variable and define it as target with whitespace, anchor,
        // and slash trimmed from both ends.
        $title = trim($target, "\r\n\t\f\v /#");
        // If stripping the query part is enabled...
        if ($strip_query)
        {
            // Get the position of the last question mark. Will return false if nothing is found.
            $index = strrpos($title, '?');
            // If a question mark is found, set title to everything before the question mark.
            if ($index !== false) { $title = substr($title, 0, -(strlen($title) - $index)); }
        }
        // Iterate through the strippable extensions and remove the first one that matches.
        foreach ($strippable_extensions as $index => $ext)
        {
            // Declare a variable so preg_replace() can tell if an extension was removed.
            $count = 0;
            // Remove the extension if a match was found at the end of the string.
            $title = preg_replace('/\.' . $ext . '$/i', '', $title, 1, $count);
            // If an extension was removed, then break out of the loop.
            if ($count == 1) { break; }
        }
        // Perform all the basic text replacement.
        $title = str_replace(array_keys($text_replacements), array_values($text_replacements), $title);
        // Insert a space before capital letters. See the pattern's comment for its details.
        $title = preg_replace('/' . self::PATTERN_INSERT_SPACES . '/', ' $1', $title);
        // Replace any non-space whitespace with a single space.
        $title = preg_replace('/[\r\n\t\f\v]+?/', ' ', $title);
        // Convert multiple spaces into a single space.
        $title = preg_replace('/[ ]{2,}/', ' ', $title);
        // Trim whitespace, anchor, and slash from the beginning and end of the title.
        $title = trim($title, "\r\n\t\f\v /#");
        // If capitalization of words is enabled...
        if ($capitalize_words)
        {
            // Capitalize the first letter of each word.
            $title = ucwords($title);
            // Go through the array of capitalization exceptions...
            foreach ($capitalize_exceptions as $index => $exception)
            {
                // Pattern is case insensitive and matches the exception as long as it is not the first/last word of the title
                // and is not the first word before or after an anchor/slash replacement.
                $pattern = "/(?<!^|" . preg_quote($slash_replacement, '/') . "|" . preg_quote($anchor_replacement, '/') . ")\b" . preg_quote($exception, '/') . "\b(?!" . preg_quote($slash_replacement, '/') . "|" . preg_quote($anchor_replacement, '/') . "|$)/i";
                // Replace matches with their exception text. No enforcement of all lowercase is done on the replacement text
                // and its exact form is left up to the admin's choice in the configuration setting.
                // For example, if for some reason an exception is defined as 'bIRd', then that is how it will be displayed.
                $title = preg_replace($pattern, $exception, $title);
            }
        }
        // Call the built-in Dokuwiki function to create the interwiki link with the constructed title.
        $handler->_addCall('interwikilink', array($match, $title, strtolower($shortcut), $target), $pos);
        return null;
    }
    
    // Function isn't used as all work is done in the handle() method, but it has to be defined to avoid a 'not implemented' error.
    // Simply returns true. As of now, the return value of this method is not used by Dokuwiki, so if that changes in a future
    // update, then this method may need some changes.
    function render($format, Doku_Renderer $renderer, $data) { return true; }
}
 
//Setup VIM: ex: et ts=4 enc=utf-8 :
?>