1<?php
2
3use ComboStrap\LogUtility;
4use ComboStrap\PageRules;
5use ComboStrap\PluginUtility;
6use ComboStrap\Sqlite;
7use ComboStrap\Page;
8use ComboStrap\UrlManagerBestEndPage;
9use ComboStrap\UrlUtility;
10
11if (!defined('DOKU_INC')) die();
12if (!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
13// Needed for the page lookup
14//require_once(DOKU_INC . 'inc/fulltext.php');
15// Needed to get the redirection manager
16// require_once(DOKU_PLUGIN . 'action.php');
17
18require_once(__DIR__ . '/../ComboStrap/PageRules.php');
19require_once(__DIR__ . '/../ComboStrap/Page.php');
20require_once(__DIR__ . '/../ComboStrap/UrlUtility.php');
21require_once(__DIR__ . '/../ComboStrap/Sqlite.php');
22require_once(__DIR__ . '/../ComboStrap/UrlManagerBestEndPage.php');
23require_once(__DIR__ . '/urlmessage.php');
24
25/**
26 * Class action_plugin_combo_url
27 *
28 * The actual URL manager
29 *
30 *
31 */
32class action_plugin_combo_urlmanager extends DokuWiki_Action_Plugin
33{
34
35    const URL_MANAGER_ENABLE_CONF = "enableUrlManager";
36
37    // The redirect type
38    const REDIRECT_HTTP = 'Http';
39    const REDIRECT_ID = 'Id';
40
41    // Where the target id value comes from
42    const TARGET_ORIGIN_PAGE_RULES = 'pageRules';
43    const TARGET_ORIGIN_CANONICAL = 'canonical';
44    const TARGET_ORIGIN_START_PAGE = 'startPage';
45    const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName';
46    const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace';
47    const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine';
48    const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName';
49
50
51    // The constant parameters
52    const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
53    const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
54    const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
55    const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
56    const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
57    const GO_TO_EDIT_MODE = 'GoToEditMode';
58    const NOTHING = 'Nothing';
59
60    /** @var string - a name used in log and other places */
61    const NAME = 'Url Manager';
62    const CANONICAL = 'url/manager';
63
64
65    /**
66     * @var PageRules
67     */
68    private $pageRules;
69
70
71    function __construct()
72    {
73        // enable direct access to language strings
74        // ie $this->lang
75        $this->setupLocale();
76
77    }
78
79
80    function register(Doku_Event_Handler $controller)
81    {
82
83        if(PluginUtility::getConfValue(self::URL_MANAGER_ENABLE_CONF,1)) {
84            /* This will call the function _handle404 */
85            $controller->register_hook('DOKUWIKI_STARTED',
86                'AFTER',
87                $this,
88                '_handle404',
89                array());
90        }
91
92    }
93
94    /**
95     * Verify if there is a 404
96     * Inspiration comes from <a href="https://github.com/splitbrain/dokuwiki-plugin-notfound/blob/master/action.php">Not Found Plugin</a>
97     * @param $event Doku_Event
98     * @param $param
99     * @return false|void
100     * @throws Exception
101     */
102    function _handle404(&$event, $param)
103    {
104
105        global $ID;
106
107        /**
108         * Without SQLite, this module does not work further
109         */
110        $sqlite = Sqlite::getSqlite();
111        if ($sqlite == null) {
112            return;
113        } else {
114            $this->pageRules = new PageRules();
115        }
116
117        /**
118         * If the page exists
119         * return
120         */
121        $targetPage = Page::createPageFromId($ID);
122        if ($targetPage->exists()) {
123            action_plugin_combo_urlmessage::unsetNotification();
124            $targetPage->processAndPersistInDb();
125            return false;
126        }
127
128
129        global $ACT;
130        if ($ACT != 'show') return;
131
132
133        // Global variable needed in the process
134        global $conf;
135
136        /**
137         * Page Id is a Canonical ?
138         */
139        $targetPage = Page::createPageFromCanonical($ID);
140        if ($targetPage->exists()) {
141            $this->performIdRedirect($targetPage->getId(), self::TARGET_ORIGIN_CANONICAL);
142            return;
143        }
144
145        // If there is a redirection defined in the page rules
146        $result = $this->processingPageRules();
147        if ($result) {
148            // A redirection has occurred
149            // finish the process
150            return;
151        }
152
153        /**
154         *
155         * There was no redirection found, redirect to edit mode if writer
156         *
157         */
158        if ($this->userCanWrite() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) {
159
160            $this->gotToEditMode($event);
161            // Stop here
162            return;
163
164        }
165
166        /*
167         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
168         */
169        if ($this->getConf('ActionReaderFirst') == self::NOTHING) {
170            return;
171        }
172
173        // We are reader and their is no redirection set, we apply the algorithm
174        $readerAlgorithms = array();
175        $readerAlgorithms[0] = $this->getConf('ActionReaderFirst');
176        $readerAlgorithms[1] = $this->getConf('ActionReaderSecond');
177        $readerAlgorithms[2] = $this->getConf('ActionReaderThird');
178
179        $i = 0;
180        while (isset($readerAlgorithms[$i])) {
181
182            switch ($readerAlgorithms[$i]) {
183
184                case self::NOTHING:
185                    return;
186                    break;
187
188                case self::GO_TO_BEST_END_PAGE_NAME:
189
190                    list($page, $method) = UrlManagerBestEndPage::process($ID);
191                    if ($page != null) {
192                        if ($method == self::REDIRECT_HTTP) {
193                            $this->httpRedirect($page, self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
194                        } else {
195                            $this->performIdRedirect($targetPage->getId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
196                        }
197                        return;
198                    }
199                    break;
200
201                case self::GO_TO_NS_START_PAGE:
202
203                    // Start page with the conf['start'] parameter
204                    $startPage = getNS($ID) . ':' . $conf['start'];
205                    if (page_exists($startPage)) {
206                        $this->httpRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
207                        return;
208                    }
209
210                    // Start page with the same name than the namespace
211                    $startPage = getNS($ID) . ':' . curNS($ID);
212                    if (page_exists($startPage)) {
213                        $this->httpRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
214                        return;
215                    }
216                    break;
217
218                case self::GO_TO_BEST_PAGE_NAME:
219
220                    $bestPageId = null;
221
222                    $bestPage = $this->getBestPage($ID);
223                    $bestPageId = $bestPage['id'];
224                    $scorePageName = $bestPage['score'];
225
226                    // Get Score from a Namespace
227                    $bestNamespace = $this->scoreBestNamespace($ID);
228                    $bestNamespaceId = $bestNamespace['namespace'];
229                    $namespaceScore = $bestNamespace['score'];
230
231                    // Compare the two score
232                    if ($scorePageName > 0 or $namespaceScore > 0) {
233                        if ($scorePageName > $namespaceScore) {
234                            $this->httpRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
235                        } else {
236                            $this->httpRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
237                        }
238                        return;
239                    }
240                    break;
241
242                case self::GO_TO_BEST_NAMESPACE:
243
244                    $scoreNamespace = $this->scoreBestNamespace($ID);
245                    $bestNamespaceId = $scoreNamespace['namespace'];
246                    $score = $scoreNamespace['score'];
247
248                    if ($score > 0) {
249                        $this->httpRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE);
250                        return;
251                    }
252                    break;
253
254                case self::GO_TO_SEARCH_ENGINE:
255
256                    $this->redirectToSearchEngine();
257
258                    return;
259                    break;
260
261                // End Switch Action
262            }
263
264            $i++;
265            // End While Action
266        }
267        // End if not connected
268
269        return;
270
271    }
272
273
274    /**
275     * getBestNamespace
276     * Return a list with 'BestNamespaceId Score'
277     * @param $id
278     * @return array
279     */
280    private
281    function scoreBestNamespace($id)
282    {
283
284        global $conf;
285
286        // Parameters
287        $pageNameSpace = getNS($id);
288
289        // If the page has an existing namespace start page take it, other search other namespace
290        $startPageNameSpace = $pageNameSpace . ":";
291        $dateAt = '';
292        // $startPageNameSpace will get a full path (ie with start or the namespace
293        resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true);
294        if (page_exists($startPageNameSpace)) {
295            $nameSpaces = array($startPageNameSpace);
296        } else {
297            $nameSpaces = ft_pageLookup($conf['start']);
298        }
299
300        // Parameters and search the best namespace
301        $pathNames = explode(':', $pageNameSpace);
302        $bestNbWordFound = 0;
303        $bestNamespaceId = '';
304        foreach ($nameSpaces as $nameSpace) {
305
306            $nbWordFound = 0;
307            foreach ($pathNames as $pathName) {
308                if (strlen($pathName) > 2) {
309                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
310                }
311            }
312            if ($nbWordFound > $bestNbWordFound) {
313                // Take only the smallest namespace
314                if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) {
315                    $bestNbWordFound = $nbWordFound;
316                    $bestNamespaceId = $nameSpace;
317                }
318            }
319        }
320
321        $startPageFactor = $this->getConf('WeightFactorForStartPage');
322        $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace');
323        if ($bestNbWordFound > 0) {
324            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
325        } else {
326            $bestNamespaceScore = 0;
327        }
328
329
330        return array(
331            'namespace' => $bestNamespaceId,
332            'score' => $bestNamespaceScore
333        );
334
335    }
336
337    /**
338     * @param $event
339     */
340    private
341    function gotToEditMode(&$event)
342    {
343        global $ID;
344        global $conf;
345
346
347        global $ACT;
348        $ACT = 'edit';
349
350        // If this is a side bar no message.
351        // There is always other page with the same name
352        $pageName = noNS($ID);
353        if ($pageName != $conf['sidebar']) {
354
355            action_plugin_combo_urlmessage::notify($ID, self::GO_TO_EDIT_MODE);
356
357        }
358
359
360    }
361
362    /**
363     * Return if the user has the right/permission to create/write an article
364     * @return bool
365     */
366    private
367    function userCanWrite()
368    {
369        global $ID;
370
371        if ($_SERVER['REMOTE_USER']) {
372            $perm = auth_quickaclcheck($ID);
373        } else {
374            $perm = auth_aclcheck($ID, '', null);
375        }
376
377        if ($perm >= AUTH_EDIT) {
378            return true;
379        } else {
380            return false;
381        }
382    }
383
384    /**
385     * Redirect to an internal page ie:
386     *   * on the same domain
387     *   * no HTTP redirect
388     *   * id rewrite
389     * @param string $targetPageId - target page id or an URL
390     * @param string $targetOriginId - the source of the target (redirect)
391     * @return bool - return true if the user has the permission and that the redirect was done
392     * @throws Exception
393     */
394    private
395    function performIdRedirect($targetPageId, $targetOriginId)
396    {
397        /**
398         * Because we set the ID globally for the ID redirect
399         * we make sure that this is not a {@link Page}
400         * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem}
401         * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem}
402         */
403        if (is_object($targetPageId)) {
404            $class = get_class($targetPageId);
405            LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id");
406        }
407
408        if (is_object($targetOriginId)) {
409            $class = get_class($targetOriginId);
410            LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id");
411        }
412
413        //If the user have right to see the target page
414        if ($_SERVER['REMOTE_USER']) {
415            $perm = auth_quickaclcheck($targetPageId);
416        } else {
417            $perm = auth_aclcheck($targetPageId, '', null);
418        }
419        if ($perm <= AUTH_NONE) {
420            return false;
421        }
422
423        // Change the id
424        global $ID;
425        global $INFO;
426        $sourceId = $ID;
427        $ID = $targetPageId;
428        // Change the info id for the sidebar
429        $INFO['id'] = $targetPageId;
430        /**
431         * otherwise there is:
432         *   * a meta robot = noindex,follow
433         * See {@link tpl_metaheaders()}
434         */
435        $INFO['exists'] = true;
436
437        /**
438         * Not compatible with
439         * https://www.dokuwiki.org/config:send404 is enabled
440         *
441         * This check happens before that dokuwiki is started
442         * and send an header in doku.php
443         *
444         * We send a warning
445         */
446        global $conf;
447        if ($conf['send404'] == true) {
448            LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager",LogUtility::LVL_MSG_ERROR, self::CANONICAL);
449        }
450
451        // Redirection
452        $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_ID);
453
454        return true;
455
456    }
457
458    /**
459     * An HTTP Redirect to an internal page, no external resources
460     * @param string $target - a dokuwiki id or an url
461     * @param $targetOrigin - the origin of the target (the algorithm used to get the target origin)
462     * @param bool $permanent - true for a permanent redirection otherwise false
463     */
464    private
465    function httpRedirect($target, $targetOrigin, $permanent = false)
466    {
467
468        global $ID;
469
470        // No message can be shown because this is an external URL
471
472        // Log the redirections
473        $this->logRedirection($ID, $target, $targetOrigin, self::REDIRECT_HTTP);
474
475        // Notify
476        action_plugin_combo_urlmessage::notify($ID, $targetOrigin);
477
478        // An url ?
479        if (UrlUtility::isValidURL($target)) {
480
481            $targetUrl = $target;
482
483        } else {
484
485            // Explode the page ID and the anchor (#)
486            $link = explode('#', $target, 2);
487
488            // Query String to pass the message
489            $urlParams = array(
490                action_plugin_combo_urlmessage::ORIGIN_PAGE => $ID,
491                action_plugin_combo_urlmessage::ORIGIN_TYPE => $targetOrigin
492            );
493
494            $targetUrl = wl($link[0], $urlParams, true, '&');
495            if ($link[1]) {
496                $targetUrl .= '#' . rawurlencode($link[1]);
497            }
498
499        }
500
501        /*
502         * The send_redirect function below send a 302
503         */
504        if ($permanent) {
505            // Not sure
506            header('HTTP/1.1 301 Moved Permanently');
507        }
508        send_redirect($targetUrl);
509
510
511        if (defined('DOKU_UNITTEST')) return; // no exits during unit tests
512        exit();
513
514    }
515
516    /**
517     * @param $id
518     * @return array
519     */
520    private
521    function getBestPage($id)
522    {
523
524        // The return parameters
525        $bestPageId = null;
526        $scorePageName = null;
527
528        // Get Score from a page
529        $pageName = noNS($id);
530        $pagesWithSameName = ft_pageLookup($pageName);
531        if (count($pagesWithSameName) > 0) {
532
533            // Search same namespace in the page found than in the Id page asked.
534            $bestNbWordFound = 0;
535
536
537            $wordsInPageSourceId = explode(':', $id);
538            foreach ($pagesWithSameName as $targetPageId => $title) {
539
540                // Nb of word found in the target page id
541                // that are in the source page id
542                $nbWordFound = 0;
543                foreach ($wordsInPageSourceId as $word) {
544                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
545                }
546
547                if ($bestPageId == null) {
548
549                    $bestNbWordFound = $nbWordFound;
550                    $bestPageId = $targetPageId;
551
552                } else {
553
554                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
555
556                        $bestNbWordFound = $nbWordFound;
557                        $bestPageId = $targetPageId;
558
559                    }
560
561                }
562
563            }
564            $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace');
565            return array(
566                'id' => $bestPageId,
567                'score' => $scorePageName);
568        }
569        return array(
570            'id' => $bestPageId,
571            'score' => $scorePageName
572        );
573
574    }
575
576
577    /**
578     * Redirect to the search engine
579     */
580    private
581    function redirectToSearchEngine()
582    {
583
584        global $ID;
585
586        $replacementPart = array(':', '_', '-');
587        $query = str_replace($replacementPart, ' ', $ID);
588
589        $urlParams = array(
590            "do" => "search",
591            "q" => $query
592        );
593
594        $url = wl($ID, $urlParams, true, '&');
595
596        $this->httpRedirect($url, self::TARGET_ORIGIN_SEARCH_ENGINE);
597
598    }
599
600
601    /**
602     *
603     *   * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date
604     *   * For a SQlite database, it will add a row into the log
605     *
606     * @param string $sourcePageId
607     * @param $targetPageId
608     * @param $algorithmic
609     * @param $method - http or rewrite
610     */
611    function logRedirection($sourcePageId, $targetPageId, $algorithmic, $method)
612    {
613
614        $row = array(
615            "TIMESTAMP" => date("c"),
616            "SOURCE" => $sourcePageId,
617            "TARGET" => $targetPageId,
618            "REFERRER" => $_SERVER['HTTP_REFERER'],
619            "TYPE" => $algorithmic,
620            "METHOD" => $method
621        );
622        $sqlite = Sqlite::getSqlite();
623        $res = $sqlite->storeEntry('redirections_log', $row);
624
625        if (!$res) {
626            LogUtility::msg("An error occurred");
627        }
628
629    }
630
631    /**
632     * This function check if there is a redirection declared
633     * in the redirection table
634     * @return bool - true if a rewrite or redirection occurs
635     * @throws Exception
636     */
637    private function processingPageRules()
638    {
639        global $ID;
640
641        $calculatedTarget = null;
642        $ruleMatcher = null; // Used in a warning message if the target page does not exist
643        // Known redirection in the table
644        // Get the page from redirection data
645        $rules = $this->pageRules->getRules();
646        foreach ($rules as $rule) {
647
648            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
649            $ruleTarget = $rule[PageRules::TARGET_NAME];
650
651            // Glob to Rexgexp
652            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/';
653
654            // Match ?
655            // https://www.php.net/manual/en/function.preg-match.php
656            if (preg_match($regexpPattern, $ID, $matches)) {
657                $calculatedTarget = $ruleTarget;
658                foreach ($matches as $key => $match) {
659                    if ($key == 0) {
660                        continue;
661                    } else {
662                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
663                    }
664                }
665                break;
666            }
667        }
668
669        if ($calculatedTarget == null) {
670            return false;
671        }
672
673        // If this is an external redirect (other domain)
674        if (UrlUtility::isValidURL($calculatedTarget)) {
675
676            $this->httpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, true);
677            return true;
678
679        }
680
681        // If the page exist
682        if (page_exists($calculatedTarget)) {
683
684            // This is DokuWiki Id and should always be lowercase
685            // The page rule may have change that
686            $calculatedTarget = strtolower($calculatedTarget);
687            $this->performIdRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES);
688            return true;
689
690        } else {
691
692            LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existent page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR);
693            return false;
694
695        }
696
697    }
698
699
700}
701