xref: /plugin/combo/action/router.php (revision ad79af66a70046d40e27ff4cc82d28834afaf49b)
1<?php
2
3
4use ComboStrap\DatabasePageRow;
5use ComboStrap\DokuwikiId;
6use ComboStrap\ExceptionBadArgument;
7use ComboStrap\ExceptionBadSyntax;
8use ComboStrap\ExceptionCompile;
9use ComboStrap\ExceptionSqliteNotAvailable;
10use ComboStrap\ExecutionContext;
11use ComboStrap\FileSystems;
12use ComboStrap\HttpResponse;
13use ComboStrap\HttpResponseStatus;
14use ComboStrap\Identity;
15use ComboStrap\LogUtility;
16use ComboStrap\MarkupPath;
17use ComboStrap\Meta\Field\AliasType;
18use ComboStrap\Mime;
19use ComboStrap\PageId;
20use ComboStrap\PageRules;
21use ComboStrap\PageUrlPath;
22use ComboStrap\PageUrlType;
23use ComboStrap\RouterBestEndPage;
24use ComboStrap\Site;
25use ComboStrap\SiteConfig;
26use ComboStrap\Sqlite;
27use ComboStrap\Web\Url;
28use ComboStrap\Web\UrlEndpoint;
29use ComboStrap\Web\UrlRewrite;
30use ComboStrap\WikiPath;
31
32require_once(__DIR__ . '/../vendor/autoload.php');
33
34/**
35 * Class action_plugin_combo_url
36 *
37 * The actual URL manager
38 *
39 *
40 */
41class action_plugin_combo_router extends DokuWiki_Action_Plugin
42{
43
44    /**
45     * @deprecated
46     */
47    const URL_MANAGER_ENABLE_CONF = "enableUrlManager";
48    const ROUTER_ENABLE_CONF = "enableRouter";
49
50    // The redirect type
51    const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id)
52    // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects
53    const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301)
54    const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated)
55
56    public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect";
57
58    // Where the target id value comes from
59    const TARGET_ORIGIN_WELL_KNOWN = 'well-known';
60    const TARGET_ORIGIN_PAGE_RULES = 'pageRules';
61    /**
62     * Named Permalink (canonical)
63     */
64    const TARGET_ORIGIN_CANONICAL = 'canonical';
65    const TARGET_ORIGIN_ALIAS = 'alias';
66    /**
67     * Identifier Permalink (full page id)
68     */
69    const TARGET_ORIGIN_PERMALINK = "permalink";
70    /**
71     * Extended Permalink (abbreviated page id at the end)
72     */
73    const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink";
74    const TARGET_ORIGIN_START_PAGE = 'startPage';
75    const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName';
76    const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace';
77    const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine';
78    const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName';
79    const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned";
80
81
82    // The constant parameters
83    const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
84    const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
85    const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
86    const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
87    const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
88    const GO_TO_EDIT_MODE = 'GoToEditMode';
89    const NOTHING = 'Nothing';
90
91    /** @var string - a name used in log and other places */
92    const NAME = 'Url Manager';
93    const CANONICAL = 'router';
94    const PAGE_404 = "<html lang=\"en\"><body></body></html>";
95    const REFRESH_HEADER_NAME = "Refresh";
96    const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url=';
97    const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": ";
98    public const URL_MANAGER_NAME = "Router";
99
100
101    /**
102     * @var PageRules
103     */
104    private $pageRules;
105
106
107    function __construct()
108    {
109        // enable direct access to language strings
110        // ie $this->lang
111        $this->setupLocale();
112
113    }
114
115    /**
116     * @param string $refreshHeader
117     * @return false|string
118     */
119    public static function getUrlFromRefresh(string $refreshHeader)
120    {
121        return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX));
122    }
123
124    public static function getUrlFromLocation($refreshHeader)
125    {
126        return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX));
127    }
128
129    /**
130     * @return string|null
131     *
132     * Unfortunately, DOKUWIKI_STARTED is not the first event
133     * The id may have been changed by
134     * {@link action_plugin_combo_lang::load_lang()}
135     * function, that's why we have this function
136     * to get the original requested id
137     */
138    private static function getOriginalIdFromRequest(): ?string
139    {
140        // and not $_GET["id"] otherwise we may get a `/`
141        $originalId = getID();
142        if ($originalId === null) {
143            return null;
144        }
145        return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
146    }
147
148    /**
149     * Determine if the request should be banned based on the id
150     *
151     * @param string $id
152     * @return bool
153     *
154     * See also {@link https://perishablepress.com/7g-firewall/#features}
155     * for blocking rules on http request data such as:
156     *   * query_string
157     *   * user_agent,
158     *   * remote host
159     */
160    public static function isShadowBanned(string $id): bool
161    {
162        /**
163         * ie
164         * wp-json:api:flutter_woo:config_file
165         * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css
166         * wp-admin
167         * 2020:wp-includes:wlwmanifest.xml
168         * wp-content:start
169         * wp-admin:css:start
170         * sito:wp-includes:wlwmanifest.xml
171         * site:wp-includes:wlwmanifest.xml
172         * cms:wp-includes:wlwmanifest.xml
173         * test:wp-includes:wlwmanifest.xml
174         * media:wp-includes:wlwmanifest.xml
175         * wp2:wp-includes:wlwmanifest.xml
176         * 2019:wp-includes:wlwmanifest.xml
177         * shop:wp-includes:wlwmanifest.xml
178         * wp1:wp-includes:wlwmanifest.xml
179         * news:wp-includes:wlwmanifest.xml
180         * 2018:wp-includes:wlwmanifest.xml
181         */
182        if (strpos($id, 'wp-') !== false) {
183            return true;
184        }
185
186        /**
187         * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33
188         * db:oracle:999999.9:union:all:select_null:from_dual
189         * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0
190         */
191        if (preg_match('/_chr_|_0_0/', $id) === 1) {
192            return true;
193        }
194
195
196        /**
197         * ie
198         * git:objects:
199         * git:refs:heads:stable
200         * git:logs:refs:heads:main
201         * git:logs:refs:heads:stable
202         * git:hooks:pre-push.sample
203         * git:hooks:pre-receive.sample
204         */
205        if (strpos($id, "git:") === 0) {
206            return true;
207        }
208
209        return false;
210
211    }
212
213    /**
214     * @param string $id
215     * @return bool
216     * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md
217     * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/
218     * well-known:dnt-policy.txt
219     */
220    public static function isWellKnownFile(string $id): bool
221    {
222        return strpos($id, "well-known") === 0;
223    }
224
225
226    function register(Doku_Event_Handler $controller)
227    {
228
229        if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) {
230
231            /**
232             * This will call the function {@link action_plugin_combo_router::_router()}
233             * The event is not DOKUWIKI_STARTED because this is not the first one
234             *
235             * https://www.dokuwiki.org/devel:event:init_lang_load
236             */
237            $controller->register_hook('DOKUWIKI_STARTED',
238                'BEFORE',
239                $this,
240                'router',
241                array());
242
243            /**
244             * This is the real first call of Dokuwiki
245             * Unfortunately, it does not create the environment
246             * We just ban to spare server resources
247             *
248             * https://www.dokuwiki.org/devel:event:init_lang_load
249             */
250            $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array());
251
252        }
253
254
255    }
256
257    /**
258     *
259     * We have created a spacial ban function that is
260     * called before the first function
261     * {@link action_plugin_combo_metalang::load_lang()}
262     * to spare CPU.
263     *
264     * @param $event
265     * @throws Exception
266     */
267    function ban(&$event)
268    {
269
270        $id = self::getOriginalIdFromRequest();
271        if ($id === null) {
272            return;
273        }
274        $page = MarkupPath::createMarkupFromId($id);
275        if (!FileSystems::exists($page)) {
276            // Well known
277            if (self::isWellKnownFile($id)) {
278                $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD);
279                ExecutionContext::getActualOrCreateFromEnv()
280                    ->response()
281                    ->setStatus(HttpResponseStatus::NOT_FOUND)
282                    ->end();
283                return;
284            }
285
286            // Shadow banned
287            if (self::isShadowBanned($id)) {
288                $webSiteHomePage = Site::getIndexPageName();
289                $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED);
290            }
291        }
292    }
293
294    /**
295     * @param $event Doku_Event
296     * @param $param
297     * @return void
298     * @throws Exception
299     */
300    function router(&$event, $param)
301    {
302
303        /**
304         * Just the {@link ExecutionContext::SHOW_ACTION}
305         * may be redirected
306         */
307        $executionContext = ExecutionContext::getActualOrCreateFromEnv();
308        if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) {
309            return;
310        }
311
312        $urlRewrite = Site::getUrlRewrite();
313        if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) {
314            UrlRewrite::sendErrorMessage();
315            return;
316        }
317
318        global $ID;
319
320        /**
321         * Without SQLite, this module does not work further
322         */
323        try {
324            Sqlite::createOrGetSqlite();
325        } catch (ExceptionSqliteNotAvailable $e) {
326            return;
327        }
328
329        $this->pageRules = new PageRules();
330
331
332        /**
333         * Unfortunately, DOKUWIKI_STARTED is not the first event
334         * The id may have been changed by
335         * {@link action_plugin_combo_lang::load_lang()}
336         * function, that's why we check against the {@link $_REQUEST}
337         * and not the global ID
338         */
339        $originalId = self::getOriginalIdFromRequest();
340
341        /**
342         * Page is an existing id ?
343         */
344        $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
345        if (FileSystems::exists($requestedMarkupPath)) {
346
347            /**
348             * If this is not the root home page
349             * and if the canonical id is the not the same,
350             * and if this is not a historical page (revision)
351             * redirect
352             */
353            if (
354                $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
355                && $ID != Site::getIndexPageName()
356                && !isset($_REQUEST["rev"])
357            ) {
358                /**
359                 * TODO: When saving for the first time, the page is not stored in the database
360                 *   but that's not the case actually
361                 */
362                $databasePageRow = $requestedMarkupPath->getDatabasePage();
363                if ($databasePageRow->exists()) {
364                    /**
365                     * A move may leave the database in a bad state,
366                     * unfortunately (ie page is not in index, unable to update, ...)
367                     * We test therefore if the database page id exists
368                     */
369                    $targetPageId = $databasePageRow->getFromRow("id");
370                    $targetPath = WikiPath::createMarkupPathFromId($targetPageId);
371                    if (FileSystems::exists($targetPath)) {
372                        $this->executePermanentRedirect(
373                            $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(),
374                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
375                        );
376                    }
377                }
378            }
379            return;
380        }
381
382
383        $identifier = $ID;
384
385
386        /**
387         * Page Id in the url
388         */
389        $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
390        if ($shortPageId != null) {
391            $pageId = PageUrlPath::decodePageId($shortPageId);
392        } else {
393            /**
394             * Permalink with id
395             */
396            $pageId = PageUrlPath::decodePageId($identifier);
397        }
398        if ($pageId !== null) {
399
400            if ($requestedMarkupPath->getParent() === null) {
401                $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
402                if ($page !== null && $page->exists()) {
403                    $this->executePermanentRedirect(
404                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
405                        self::TARGET_ORIGIN_PERMALINK
406                    );
407                    return;
408                }
409            }
410
411            /**
412             * Page Id Abbr ?
413             * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
414             */
415            $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
416            if ($page === null) {
417                // or the length of the abbr has changed
418                $canonicalDatabasePage = new DatabasePageRow();
419                $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
420                if ($row !== null) {
421                    $canonicalDatabasePage->setRow($row);
422                    $page = $canonicalDatabasePage->getMarkupPath();
423                }
424            }
425            if ($page !== null && $page->exists()) {
426                /**
427                 * If the url canonical id has changed, we show it
428                 * to the writer by performing a permanent redirect
429                 */
430                if ($identifier != $page->getUrlId()) {
431                    // Google asks for a redirect
432                    // https://developers.google.com/search/docs/advanced/crawling/301-redirects
433                    // People access your site through several different URLs.
434                    // If, for example, your home page can be reached in multiple ways
435                    // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
436                    // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
437                    // and use redirects to send traffic from the other URLs to your preferred URL.
438                    $this->executePermanentRedirect(
439                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
440                        self::TARGET_ORIGIN_PERMALINK_EXTENDED
441                    );
442                    return;
443                }
444
445                $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED);
446                return;
447
448            }
449            // permanent url not yet in the database
450            // Other permanent such as permanent canonical ?
451            // We let the process go with the new identifier
452
453        }
454
455        // Global variable needed in the process
456        global $conf;
457
458        /**
459         * Identifier is a Canonical ?
460         */
461        $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
462        $canonicalPage = $canonicalDatabasePage->getMarkupPath();
463        if ($canonicalPage !== null && $canonicalPage->exists()) {
464            /**
465             * Does the canonical url is canonical name based
466             * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
467             */
468            if ($canonicalPage->getUrlId() === $identifier) {
469                $res = $this->executeTransparentRedirect(
470                    $canonicalPage->getWikiId(),
471                    self::TARGET_ORIGIN_CANONICAL
472                );
473            } else {
474                $res = $this->executePermanentRedirect(
475                    $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property
476                    self::TARGET_ORIGIN_CANONICAL
477                );
478            }
479            if ($res) {
480                return;
481            }
482        }
483
484        /**
485         * Identifier is an alias
486         */
487        $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
488        if (
489            $aliasRequestedPage !== null
490            && $aliasRequestedPage->exists()
491            // The build alias is the file system metadata alias
492            // it may be null if the replication in the database was not successful
493            && $aliasRequestedPage->getBuildAlias() !== null
494        ) {
495            $buildAlias = $aliasRequestedPage->getBuildAlias();
496            switch ($buildAlias->getType()) {
497                case AliasType::REDIRECT:
498                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
499                    if ($res) {
500                        return;
501                    }
502                    break;
503                case AliasType::SYNONYM:
504                    $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS);
505                    if ($res) {
506                        return;
507                    }
508                    break;
509                default:
510                    LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
511                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
512                    if ($res) {
513                        return;
514                    }
515                    break;
516            }
517        }
518
519
520        // If there is a redirection defined in the page rules
521        $result = $this->processingPageRules();
522        if ($result) {
523            // A redirection has occurred
524            // finish the process
525            return;
526        }
527
528        /**
529         *
530         * There was no redirection found, redirect to edit mode if writer
531         *
532         */
533        if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) {
534
535            $this->gotToEditMode($event);
536            // Stop here
537            return;
538
539        }
540
541        /**
542         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
543         */
544        if ($this->getConf('ActionReaderFirst') == self::NOTHING) {
545            return;
546        }
547
548        // We are reader and their is no redirection set, we apply the algorithm
549        $readerAlgorithms = array();
550        $readerAlgorithms[0] = $this->getConf('ActionReaderFirst');
551        $readerAlgorithms[1] = $this->getConf('ActionReaderSecond');
552        $readerAlgorithms[2] = $this->getConf('ActionReaderThird');
553
554        while (
555            ($algorithm = array_shift($readerAlgorithms)) != null
556        ) {
557
558            switch ($algorithm) {
559
560                case self::NOTHING:
561                    return;
562
563                case self::GO_TO_BEST_END_PAGE_NAME:
564
565                    /**
566                     * @var MarkupPath $bestEndPage
567                     */
568                    list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
569                    if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) {
570                        $res = false;
571                        switch ($method) {
572                            case self::REDIRECT_PERMANENT_METHOD:
573                                $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
574                                break;
575                            case self::REDIRECT_NOTFOUND_METHOD:
576                                $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
577                                break;
578                            default:
579                                LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
580                        }
581                        if ($res) {
582                            // Redirection has succeeded
583                            return;
584                        }
585                    }
586                    break;
587
588                case self::GO_TO_NS_START_PAGE:
589
590                    // Start page with the conf['start'] parameter
591                    $startPage = getNS($identifier) . ':' . $conf['start'];
592                    if (page_exists($startPage)) {
593                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
594                        if ($res) {
595                            return;
596                        }
597                    }
598
599                    // Start page with the same name than the namespace
600                    $startPage = getNS($identifier) . ':' . curNS($identifier);
601                    if (page_exists($startPage)) {
602                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
603                        if ($res) {
604                            return;
605                        }
606                    }
607                    break;
608
609                case self::GO_TO_BEST_PAGE_NAME:
610
611                    $bestPageId = null;
612
613                    $bestPage = $this->getBestPage($identifier);
614                    $bestPageId = $bestPage['id'];
615                    $scorePageName = $bestPage['score'];
616
617                    // Get Score from a Namespace
618                    $bestNamespace = $this->scoreBestNamespace($identifier);
619                    $bestNamespaceId = $bestNamespace['namespace'];
620                    $namespaceScore = $bestNamespace['score'];
621
622                    // Compare the two score
623                    if ($scorePageName > 0 or $namespaceScore > 0) {
624                        if ($scorePageName > $namespaceScore) {
625                            $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
626                        } else {
627                            $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
628                        }
629                        return;
630                    }
631                    break;
632
633                case self::GO_TO_BEST_NAMESPACE:
634
635                    $scoreNamespace = $this->scoreBestNamespace($identifier);
636                    $bestNamespaceId = $scoreNamespace['namespace'];
637                    $score = $scoreNamespace['score'];
638
639                    if ($score > 0) {
640                        $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE);
641                        return;
642                    }
643                    break;
644
645                case self::GO_TO_SEARCH_ENGINE:
646
647                    $this->redirectToSearchEngine();
648
649                    return;
650
651                // End Switch Action
652            }
653
654            // End While Action
655        }
656
657
658    }
659
660
661    /**
662     * getBestNamespace
663     * Return a list with 'BestNamespaceId Score'
664     * @param $id
665     * @return array
666     */
667    private
668    function scoreBestNamespace($id)
669    {
670
671        global $conf;
672
673        // Parameters
674        $pageNameSpace = getNS($id);
675
676        // If the page has an existing namespace start page take it, other search other namespace
677        $startPageNameSpace = $pageNameSpace . ":";
678        $dateAt = '';
679        // $startPageNameSpace will get a full path (ie with start or the namespace
680        resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true);
681        if (page_exists($startPageNameSpace)) {
682            $nameSpaces = array($startPageNameSpace);
683        } else {
684            $nameSpaces = ft_pageLookup($conf['start']);
685        }
686
687        // Parameters and search the best namespace
688        $pathNames = explode(':', $pageNameSpace);
689        $bestNbWordFound = 0;
690        $bestNamespaceId = '';
691        foreach ($nameSpaces as $nameSpace) {
692
693            $nbWordFound = 0;
694            foreach ($pathNames as $pathName) {
695                if (strlen($pathName) > 2) {
696                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
697                }
698            }
699            if ($nbWordFound > $bestNbWordFound) {
700                // Take only the smallest namespace
701                if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) {
702                    $bestNbWordFound = $nbWordFound;
703                    $bestNamespaceId = $nameSpace;
704                }
705            }
706        }
707
708        $startPageFactor = $this->getConf('WeightFactorForStartPage');
709        $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace');
710        if ($bestNbWordFound > 0) {
711            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
712        } else {
713            $bestNamespaceScore = 0;
714        }
715
716
717        return array(
718            'namespace' => $bestNamespaceId,
719            'score' => $bestNamespaceScore
720        );
721
722    }
723
724    /**
725     * @param $event
726     */
727    private
728    function gotToEditMode(&$event)
729    {
730        global $ACT;
731        $ACT = 'edit';
732
733    }
734
735
736    /**
737     * Redirect to an internal page ie:
738     *   * on the same domain
739     *   * no HTTP redirect
740     *   * id rewrite
741     * @param string $targetPageId - target page id
742     * @param string $targetOriginId - the source of the target (redirect)
743     * @return bool - return true if the user has the permission and that the redirect was done
744     * @throws Exception
745     */
746    private
747    function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool
748    {
749        /**
750         * Because we set the ID globally for the ID redirect
751         * we make sure that this is not a {@link MarkupPath}
752         * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem}
753         * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem}
754         */
755        if (is_object($targetPageId)) {
756            $class = get_class($targetPageId);
757            LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id");
758        }
759
760        if (is_object($targetOriginId)) {
761            $class = get_class($targetOriginId);
762            LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id");
763        }
764
765        // If the user does not have the right to see the target page
766        // don't do anything
767        if (!(Identity::isReader($targetPageId))) {
768            return false;
769        }
770
771        // Change the id
772        global $ID;
773        global $INFO;
774        $sourceId = $ID;
775        $ID = $targetPageId;
776        if (isset($_REQUEST["id"])) {
777            $_REQUEST["id"] = $targetPageId;
778        }
779        if (isset($_GET["id"])) {
780            $_GET["id"] = $targetPageId;
781        }
782
783        /**
784         * Refresh the $INFO data
785         *
786         * the info attributes are used elsewhere
787         *   'id': for the sidebar
788         *   'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()}
789         *   'rev' : for the edit button to be sure that the page is still the same
790         */
791        $INFO = pageinfo();
792
793        /**
794         * Not compatible with
795         * https://www.dokuwiki.org/config:send404 is enabled
796         *
797         * This check happens before that dokuwiki is started
798         * and send an header in doku.php
799         *
800         * We send a warning
801         */
802        global $conf;
803        if ($conf['send404'] == true) {
804            LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
805        }
806
807        // Redirection
808        $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD);
809
810        return true;
811
812    }
813
814    private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool
815    {
816        return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD);
817    }
818
819    /**
820     * The general HTTP Redirect method to an internal page
821     * where the redirection method decide which type of redirection
822     * @param string $targetIdOrUrl - a dokuwiki id or an url
823     * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin)
824     * @param string $method - the redirection method
825     */
826    private
827    function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool
828    {
829
830        global $ID;
831
832
833        // Log the redirections
834        $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method);
835
836
837        // An http external url ?
838        try {
839            $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl();
840        } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
841            $isHttpUrl = false;
842        }
843
844        // If there is a bug in the isValid function for an internal url
845        // We get a loop.
846        // The Url becomes the id, the id is unknown and we do a redirect again
847        //
848        // We check then if the target starts with the base url
849        // if this is the case, it's valid
850        if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) {
851            $isHttpUrl = true;
852        }
853        if ($isHttpUrl) {
854
855            // defend against HTTP Response Splitting
856            // https://owasp.org/www-community/attacks/HTTP_Response_Splitting
857            $targetUrl = stripctl($targetIdOrUrl);
858
859        } else {
860
861
862            // Explode the page ID and the anchor (#)
863            $link = explode('#', $targetIdOrUrl, 2);
864
865            $url = UrlEndpoint::createDokuUrl();
866
867            $urlParams = [];
868            // if this is search engine redirect
869            if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) {
870                $replacementPart = array(':', '_', '-');
871                $query = str_replace($replacementPart, ' ', $ID);
872                $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION);
873                $url->setQueryParameter("q", $query);
874            }
875
876            /**
877             * Doing a permanent redirect with a added query string
878             * create a new page url on the search engine
879             *
880             * ie
881             * http://host/page
882             * is not the same
883             * than
884             * http://host/page?whatever
885             *
886             * We can't pass query string otherwise, we get
887             * the SEO warning / error
888             * `Alternative page with proper canonical tag`
889             *
890             * Use HTTP X header for debug
891             */
892            if ($method !== self::REDIRECT_PERMANENT_METHOD) {
893                $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID);
894                $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin);
895            }
896
897            $id = $link[0];
898            $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id);
899            if (array_key_exists(1, $link)) {
900                $url->setFragment($link[1]);
901            }
902            $targetUrl = $url->toAbsoluteUrlString();
903
904        }
905
906        /**
907         * The dokuwiki function {@link send_redirect()}
908         * set the `Location header` and in php, the header function
909         * in this case change the status code to 302 Arghhhh.
910         * The code below is adapted from this function {@link send_redirect()}
911         */
912        global $MSG; // are there any undisplayed messages? keep them in session for display
913        if (isset($MSG) && count($MSG) && !defined('NOSESSION')) {
914            //reopen session, store data and close session again
915            @session_start();
916            $_SESSION[DOKU_COOKIE]['msg'] = $MSG;
917        }
918        session_write_close(); // always close the session
919
920        switch ($method) {
921
922            case self::REDIRECT_PERMANENT_METHOD:
923                ExecutionContext::getActualOrCreateFromEnv()
924                    ->response()
925                    ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT)
926                    ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl)
927                    ->end();
928                return true;
929
930            case self::REDIRECT_NOTFOUND_METHOD:
931
932
933                // Empty 404 body to not get the standard 404 page of the browser
934                // but a blank page to avoid a sort of FOUC.
935                // ie the user see a page briefly
936                ExecutionContext::getActualOrCreateFromEnv()
937                    ->response()
938                    ->setStatus(HttpResponseStatus::NOT_FOUND)
939                    ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl)
940                    ->setBody(self::PAGE_404, Mime::getHtml())
941                    ->end();
942                return true;
943
944            default:
945                LogUtility::msg("The method ($method) is not an http redirection");
946                return false;
947        }
948
949
950    }
951
952    /**
953     * @param $id
954     * @return array
955     */
956    private
957    function getBestPage($id): array
958    {
959
960        // The return parameters
961        $bestPageId = null;
962        $scorePageName = null;
963
964        // Get Score from a page
965        $pageName = noNS($id);
966        $pagesWithSameName = ft_pageLookup($pageName);
967        if (count($pagesWithSameName) > 0) {
968
969            // Search same namespace in the page found than in the Id page asked.
970            $bestNbWordFound = 0;
971
972
973            $wordsInPageSourceId = explode(':', $id);
974            foreach ($pagesWithSameName as $targetPageId => $title) {
975
976                // Nb of word found in the target page id
977                // that are in the source page id
978                $nbWordFound = 0;
979                foreach ($wordsInPageSourceId as $word) {
980                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
981                }
982
983                if ($bestPageId == null) {
984
985                    $bestNbWordFound = $nbWordFound;
986                    $bestPageId = $targetPageId;
987
988                } else {
989
990                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
991
992                        $bestNbWordFound = $nbWordFound;
993                        $bestPageId = $targetPageId;
994
995                    }
996
997                }
998
999            }
1000            $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace');
1001            return array(
1002                'id' => $bestPageId,
1003                'score' => $scorePageName);
1004        }
1005        return array(
1006            'id' => $bestPageId,
1007            'score' => $scorePageName
1008        );
1009
1010    }
1011
1012
1013    /**
1014     * Redirect to the search engine
1015     */
1016    private
1017    function redirectToSearchEngine()
1018    {
1019
1020        global $ID;
1021        $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE);
1022
1023    }
1024
1025
1026    /**
1027     *
1028     *   * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date
1029     *   * For a SQlite database, it will add a row into the log
1030     *
1031     * @param string $sourcePageId
1032     * @param $targetPageId
1033     * @param $algorithmic
1034     * @param $method - http or rewrite
1035     */
1036    function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method)
1037    {
1038
1039        $row = array(
1040            "TIMESTAMP" => date("c"),
1041            "SOURCE" => $sourcePageId,
1042            "TARGET" => $targetPageId,
1043            "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null,
1044            "TYPE" => $algorithmic,
1045            "METHOD" => $method
1046        );
1047        $request = Sqlite::createOrGetBackendSqlite()
1048            ->createRequest()
1049            ->setTableRow('redirections_log', $row);
1050        try {
1051            $request
1052                ->execute();
1053        } catch (ExceptionCompile $e) {
1054            LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}");
1055        } finally {
1056            $request->close();
1057        }
1058
1059
1060    }
1061
1062    /**
1063     * This function check if there is a redirection declared
1064     * in the redirection table
1065     * @return bool - true if a rewrite or redirection occurs
1066     * @throws Exception
1067     */
1068    private function processingPageRules(): bool
1069    {
1070        global $ID;
1071
1072        $calculatedTarget = null;
1073        $ruleMatcher = null; // Used in a warning message if the target page does not exist
1074        // Known redirection in the table
1075        // Get the page from redirection data
1076        $rules = $this->pageRules->getRules();
1077        foreach ($rules as $rule) {
1078
1079            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
1080            $ruleTarget = $rule[PageRules::TARGET_NAME];
1081
1082            // Glob to Rexgexp
1083            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
1084
1085            // Match ?
1086            // https://www.php.net/manual/en/function.preg-match.php
1087            $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
1088            if ($pregMatchResult === false) {
1089                // The `if` to take into account this problem
1090                // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
1091                LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
1092                return false;
1093            }
1094            if ($pregMatchResult) {
1095                $calculatedTarget = $ruleTarget;
1096                foreach ($matches as $key => $match) {
1097                    if ($key == 0) {
1098                        continue;
1099                    } else {
1100                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
1101                    }
1102                }
1103                break;
1104            }
1105        }
1106
1107        if ($calculatedTarget == null) {
1108            return false;
1109        }
1110
1111        // If this is an external redirect (other domain)
1112        try {
1113            $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl();
1114        } catch (ExceptionBadSyntax $e) {
1115            $isHttpUrl = false;
1116        }
1117        if ($isHttpUrl) {
1118            $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1119            return true;
1120        }
1121
1122        // If the page exist
1123        if (page_exists($calculatedTarget)) {
1124
1125            // This is DokuWiki Id and should always be lowercase
1126            // The page rule may have change that
1127            $calculatedTarget = strtolower($calculatedTarget);
1128            $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1129            if ($res) {
1130                return true;
1131            } else {
1132                return false;
1133            }
1134
1135        } else {
1136
1137            LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR);
1138            return false;
1139
1140        }
1141
1142    }
1143
1144    private function performNotFoundRedirect(string $targetId, string $origin): bool
1145    {
1146        return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD);
1147    }
1148
1149
1150}
1151