xref: /plugin/combo/action/router.php (revision 39c00e7ee80f440398e043c1b028639b65574701)
1<?php
2
3
4use ComboStrap\DatabasePageRow;
5use ComboStrap\DokuwikiId;
6use ComboStrap\ExceptionBadArgument;
7use ComboStrap\ExceptionBadSyntax;
8use ComboStrap\ExceptionCompile;
9use ComboStrap\ExceptionNotFound;
10use ComboStrap\ExceptionSqliteNotAvailable;
11use ComboStrap\ExecutionContext;
12use ComboStrap\FileSystems;
13use ComboStrap\HttpResponse;
14use ComboStrap\HttpResponseStatus;
15use ComboStrap\Identity;
16use ComboStrap\LogUtility;
17use ComboStrap\MarkupPath;
18use ComboStrap\Meta\Field\AliasType;
19use ComboStrap\Mime;
20use ComboStrap\PageId;
21use ComboStrap\PageRules;
22use ComboStrap\PageUrlPath;
23use ComboStrap\PageUrlType;
24use ComboStrap\RouterBestEndPage;
25use ComboStrap\Site;
26use ComboStrap\SiteConfig;
27use ComboStrap\Sqlite;
28use ComboStrap\Web\Url;
29use ComboStrap\Web\UrlEndpoint;
30use ComboStrap\Web\UrlRewrite;
31use ComboStrap\WikiPath;
32
33require_once(__DIR__ . '/../vendor/autoload.php');
34
35/**
36 * Class action_plugin_combo_url
37 *
38 * The actual URL manager
39 *
40 *
41 */
42class action_plugin_combo_router extends DokuWiki_Action_Plugin
43{
44
45    /**
46     * @deprecated
47     */
48    const URL_MANAGER_ENABLE_CONF = "enableUrlManager";
49    const ROUTER_ENABLE_CONF = "enableRouter";
50
51    // The redirect type
52    const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id)
53    // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects
54    const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301)
55    const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated)
56
57    public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect";
58
59    // Where the target id value comes from
60    const TARGET_ORIGIN_WELL_KNOWN = 'well-known';
61    const TARGET_ORIGIN_PAGE_RULES = 'pageRules';
62    /**
63     * Named Permalink (canonical)
64     */
65    const TARGET_ORIGIN_CANONICAL = 'canonical';
66    const TARGET_ORIGIN_ALIAS = 'alias';
67    /**
68     * Identifier Permalink (full page id)
69     */
70    const TARGET_ORIGIN_PERMALINK = "permalink";
71    /**
72     * Extended Permalink (abbreviated page id at the end)
73     */
74    const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink";
75    const TARGET_ORIGIN_START_PAGE = 'startPage';
76    const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName';
77    const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace';
78    const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine';
79    const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName';
80    const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned";
81
82
83    // The constant parameters
84    const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
85    const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
86    const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
87    const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
88    const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
89    const GO_TO_EDIT_MODE = 'GoToEditMode';
90    const NOTHING = 'Nothing';
91
92    /** @var string - a name used in log and other places */
93    const NAME = 'Url Manager';
94    const CANONICAL = 'router';
95    const PAGE_404 = "<html lang=\"en\"><body></body></html>";
96    const REFRESH_HEADER_NAME = "Refresh";
97    const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url=';
98    const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": ";
99    public const URL_MANAGER_NAME = "Router";
100
101
102    /**
103     * @var PageRules
104     */
105    private $pageRules;
106
107
108    function __construct()
109    {
110        // enable direct access to language strings
111        // ie $this->lang
112        $this->setupLocale();
113
114    }
115
116    /**
117     * @param string $refreshHeader
118     * @return false|string
119     */
120    public static function getUrlFromRefresh(string $refreshHeader)
121    {
122        return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX));
123    }
124
125    public static function getUrlFromLocation($refreshHeader)
126    {
127        return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX));
128    }
129
130    /**
131     * @return string|null
132     *
133     * Return the original id from the request
134     * ie `howto:how-to-get-started-with-combostrap-m3i8vga8`
135     * if `/howto/how-to-get-started-with-combostrap-m3i8vga8`
136     *
137     * Unfortunately, DOKUWIKI_STARTED is not the first event
138     * The id may have been changed by
139     * {@link action_plugin_combo_lang::load_lang()}
140     * function, that's why we have this function
141     * to get the original requested id
142     */
143    private static function getOriginalIdFromRequest(): ?string
144    {
145        $originalId = $_GET["id"] ?? null;
146        if ($originalId === null) {
147            return null;
148        }
149        // We get a `/` as first character
150        // because we return an id, we need to delete it
151        $originalId = substr($originalId, 1);
152        // transform / to :
153        return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
154    }
155
156    /**
157     * Determine if the request should be banned based on the id
158     *
159     * @param string $id
160     * @return bool
161     *
162     * See also {@link https://perishablepress.com/7g-firewall/#features}
163     * for blocking rules on http request data such as:
164     *   * query_string
165     *   * user_agent,
166     *   * remote host
167     */
168    public static function isShadowBanned(string $id): bool
169    {
170        /**
171         * ie
172         * wp-json:api:flutter_woo:config_file
173         * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css
174         * wp-admin
175         * 2020:wp-includes:wlwmanifest.xml
176         * wp-content:start
177         * wp-admin:css:start
178         * sito:wp-includes:wlwmanifest.xml
179         * site:wp-includes:wlwmanifest.xml
180         * cms:wp-includes:wlwmanifest.xml
181         * test:wp-includes:wlwmanifest.xml
182         * media:wp-includes:wlwmanifest.xml
183         * wp2:wp-includes:wlwmanifest.xml
184         * 2019:wp-includes:wlwmanifest.xml
185         * shop:wp-includes:wlwmanifest.xml
186         * wp1:wp-includes:wlwmanifest.xml
187         * news:wp-includes:wlwmanifest.xml
188         * 2018:wp-includes:wlwmanifest.xml
189         */
190        if (strpos($id, 'wp-') !== false) {
191            return true;
192        }
193
194        /**
195         * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33
196         * db:oracle:999999.9:union:all:select_null:from_dual
197         * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0
198         */
199        if (preg_match('/_chr_|_0_0/', $id) === 1) {
200            return true;
201        }
202
203
204        /**
205         * ie
206         * git:objects:
207         * git:refs:heads:stable
208         * git:logs:refs:heads:main
209         * git:logs:refs:heads:stable
210         * git:hooks:pre-push.sample
211         * git:hooks:pre-receive.sample
212         */
213        if (strpos($id, "git:") === 0) {
214            return true;
215        }
216
217        return false;
218
219    }
220
221    /**
222     * @param string $id
223     * @return bool
224     * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md
225     * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/
226     * well-known:dnt-policy.txt
227     */
228    public static function isWellKnownFile(string $id): bool
229    {
230        return strpos($id, "well-known") === 0;
231    }
232
233
234    function register(Doku_Event_Handler $controller)
235    {
236
237        if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) {
238
239            /**
240             * This will call the function {@link action_plugin_combo_router::_router()}
241             * The event is not DOKUWIKI_STARTED because this is not the first one
242             *
243             * https://www.dokuwiki.org/devel:event:init_lang_load
244             */
245            $controller->register_hook('DOKUWIKI_STARTED',
246                'BEFORE',
247                $this,
248                'router',
249                array());
250
251            /**
252             * Bot Ban functionality
253             *
254             * Because we make a redirection to the home page, we need to check
255             * if the home is readable, for that, the AUTH plugin needs to be initialized
256             * That's why we wait
257             * https://www.dokuwiki.org/devel:event:dokuwiki_init_done
258             *
259             * and we can't use
260             * https://www.dokuwiki.org/devel:event:init_lang_load
261             * because there is no auth setup in {@link auth_aclcheck_cb()}
262             * and the the line `if (!$auth instanceof AuthPlugin) return AUTH_NONE;` return none;
263             */
264            $controller->register_hook('DOKUWIKI_INIT_DONE', 'BEFORE', $this, 'ban', array());
265
266        }
267
268
269    }
270
271    /**
272     *
273     * We have created a spacial ban function that is
274     * called before the first function
275     * {@link action_plugin_combo_metalang::load_lang()}
276     * to spare CPU.
277     *
278     * @param $event
279     * @throws Exception
280     */
281    function ban(&$event)
282    {
283
284        $id = self::getOriginalIdFromRequest();
285        if ($id === null) {
286            return;
287        }
288        $page = MarkupPath::createMarkupFromId($id);
289        if (FileSystems::exists($page)) {
290            return;
291        }
292
293        // Well known
294        if (self::isWellKnownFile($id)) {
295            $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD);
296            ExecutionContext::getActualOrCreateFromEnv()
297                ->response()
298                ->setStatus(HttpResponseStatus::NOT_FOUND)
299                ->end();
300            return;
301        }
302
303        // Shadow banned
304        if (self::isShadowBanned($id)) {
305            $webSiteHomePage = Site::getIndexPageName();
306            $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED);
307        }
308
309    }
310
311    /**
312     * @param $event Doku_Event
313     * @param $param
314     * @return void
315     * @throws Exception
316     */
317    function router(&$event, $param)
318    {
319
320        /**
321         * Just the {@link ExecutionContext::SHOW_ACTION}
322         * may be redirected
323         */
324        $executionContext = ExecutionContext::getActualOrCreateFromEnv();
325        if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) {
326            return;
327        }
328
329        $urlRewrite = Site::getUrlRewrite();
330        if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) {
331            UrlRewrite::sendErrorMessage();
332            return;
333        }
334
335        global $ID;
336
337        /**
338         * Without SQLite, this module does not work further
339         */
340        try {
341            Sqlite::createOrGetSqlite();
342        } catch (ExceptionSqliteNotAvailable $e) {
343            return;
344        }
345
346        $this->pageRules = new PageRules();
347
348
349        /**
350         * Unfortunately, DOKUWIKI_STARTED is not the first event
351         * The id may have been changed by
352         * {@link action_plugin_combo_lang::load_lang()}
353         * function, that's why we check against the {@link $_REQUEST}
354         * and not the global ID
355         */
356        $originalId = self::getOriginalIdFromRequest();
357
358        /**
359         * Page is an existing id ?
360         */
361        $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
362        if (FileSystems::exists($requestedMarkupPath)) {
363
364            /**
365             * If this is not the root home page
366             * and if the canonical id is the not the same (the id has changed)
367             * and if this is not a historical page (revision)
368             * redirect
369             */
370            if (
371                $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
372                && $ID != Site::getIndexPageName()
373                && !isset($_REQUEST["rev"])
374            ) {
375                /**
376                 * TODO: When saving for the first time, the page is not stored in the database
377                 *   but that's not the case actually
378                 */
379                $databasePageRow = $requestedMarkupPath->getDatabasePage();
380                if ($databasePageRow->exists()) {
381                    /**
382                     * A move may leave the database in a bad state,
383                     * unfortunately (ie page is not in index, unable to update, ...)
384                     * We test therefore if the database page id exists
385                     */
386                    $targetPageId = $databasePageRow->getFromRow("id");
387                    $targetPath = WikiPath::createMarkupPathFromId($targetPageId);
388                    if (FileSystems::exists($targetPath)) {
389                        $this->executePermanentRedirect(
390                            $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(),
391                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
392                        );
393                    }
394                }
395            }
396            return;
397        }
398
399
400        $identifier = $ID;
401
402
403        /**
404         * Page Id in the url
405         */
406        $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
407        if ($shortPageId != null) {
408            $pageId = PageUrlPath::decodePageId($shortPageId);
409        } else {
410            /**
411             * Permalink with id
412             */
413            $pageId = PageUrlPath::decodePageId($identifier);
414        }
415        if ($pageId !== null) {
416
417            if ($requestedMarkupPath->getParent() === null) {
418                $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
419                if ($page !== null && $page->exists()) {
420                    $this->executePermanentRedirect(
421                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
422                        self::TARGET_ORIGIN_PERMALINK
423                    );
424                    return;
425                }
426            }
427
428            /**
429             * Page Id Abbr ?
430             * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
431             */
432            $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
433            if ($page === null) {
434                // or the length of the abbr has changed
435                $canonicalDatabasePage = new DatabasePageRow();
436                try {
437                    $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
438                    $canonicalDatabasePage->setRow($row);
439                    $page = $canonicalDatabasePage->getMarkupPath();
440                } catch (ExceptionNotFound $e) {
441                    // nothing to do
442                }
443            }
444            if ($page !== null && $page->exists()) {
445                /**
446                 * If the url canonical id has changed, we show it
447                 * to the writer by performing a permanent redirect
448                 */
449                if ($identifier != $page->getUrlId()) {
450                    // Google asks for a redirect
451                    // https://developers.google.com/search/docs/advanced/crawling/301-redirects
452                    // People access your site through several different URLs.
453                    // If, for example, your home page can be reached in multiple ways
454                    // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
455                    // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
456                    // and use redirects to send traffic from the other URLs to your preferred URL.
457                    $this->executePermanentRedirect(
458                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
459                        self::TARGET_ORIGIN_PERMALINK_EXTENDED
460                    );
461                    return;
462                }
463
464                $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED);
465                return;
466
467            }
468            // permanent url not yet in the database
469            // Other permanent such as permanent canonical ?
470            // We let the process go with the new identifier
471
472        }
473
474        // Global variable needed in the process
475        global $conf;
476
477        /**
478         * Identifier is a Canonical ?
479         */
480        $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
481        $canonicalPage = $canonicalDatabasePage->getMarkupPath();
482        if ($canonicalPage !== null && $canonicalPage->exists()) {
483            /**
484             * Does the canonical url is canonical name based
485             * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
486             */
487            if ($canonicalPage->getUrlId() === $identifier) {
488                $res = $this->executeTransparentRedirect(
489                    $canonicalPage->getWikiId(),
490                    self::TARGET_ORIGIN_CANONICAL
491                );
492            } else {
493                $res = $this->executePermanentRedirect(
494                    $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property
495                    self::TARGET_ORIGIN_CANONICAL
496                );
497            }
498            if ($res) {
499                return;
500            }
501        }
502
503        /**
504         * Identifier is an alias
505         */
506        $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
507        if (
508            $aliasRequestedPage !== null
509            && $aliasRequestedPage->exists()
510            // The build alias is the file system metadata alias
511            // it may be null if the replication in the database was not successful
512            && $aliasRequestedPage->getBuildAlias() !== null
513        ) {
514            $buildAlias = $aliasRequestedPage->getBuildAlias();
515            switch ($buildAlias->getType()) {
516                case AliasType::REDIRECT:
517                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
518                    if ($res) {
519                        return;
520                    }
521                    break;
522                case AliasType::SYNONYM:
523                    $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS);
524                    if ($res) {
525                        return;
526                    }
527                    break;
528                default:
529                    LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
530                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
531                    if ($res) {
532                        return;
533                    }
534                    break;
535            }
536        }
537
538
539        // If there is a redirection defined in the page rules
540        $result = $this->processingPageRules();
541        if ($result) {
542            // A redirection has occurred
543            // finish the process
544            return;
545        }
546
547        /**
548         *
549         * There was no redirection found, redirect to edit mode if writer
550         *
551         */
552        if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) {
553
554            $this->gotToEditMode($event);
555            // Stop here
556            return;
557
558        }
559
560        /**
561         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
562         */
563        if ($this->getConf('ActionReaderFirst') == self::NOTHING) {
564            return;
565        }
566
567        // We are reader and their is no redirection set, we apply the algorithm
568        $readerAlgorithms = array();
569        $readerAlgorithms[0] = $this->getConf('ActionReaderFirst');
570        $readerAlgorithms[1] = $this->getConf('ActionReaderSecond');
571        $readerAlgorithms[2] = $this->getConf('ActionReaderThird');
572
573        while (
574            ($algorithm = array_shift($readerAlgorithms)) != null
575        ) {
576
577            switch ($algorithm) {
578
579                case self::NOTHING:
580                    return;
581
582                case self::GO_TO_BEST_END_PAGE_NAME:
583
584                    /**
585                     * @var MarkupPath $bestEndPage
586                     */
587                    list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
588                    if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) {
589                        $res = false;
590                        switch ($method) {
591                            case self::REDIRECT_PERMANENT_METHOD:
592                                $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
593                                break;
594                            case self::REDIRECT_NOTFOUND_METHOD:
595                                $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
596                                break;
597                            default:
598                                LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
599                        }
600                        if ($res) {
601                            // Redirection has succeeded
602                            return;
603                        }
604                    }
605                    break;
606
607                case self::GO_TO_NS_START_PAGE:
608
609                    // Start page with the conf['start'] parameter
610                    $startPage = getNS($identifier) . ':' . $conf['start'];
611                    if (page_exists($startPage)) {
612                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
613                        if ($res) {
614                            return;
615                        }
616                    }
617
618                    // Start page with the same name than the namespace
619                    $startPage = getNS($identifier) . ':' . curNS($identifier);
620                    if (page_exists($startPage)) {
621                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
622                        if ($res) {
623                            return;
624                        }
625                    }
626                    break;
627
628                case self::GO_TO_BEST_PAGE_NAME:
629
630                    $bestPageId = null;
631
632                    $bestPage = $this->getBestPage($identifier);
633                    $bestPageId = $bestPage['id'];
634                    $scorePageName = $bestPage['score'];
635
636                    // Get Score from a Namespace
637                    $bestNamespace = $this->scoreBestNamespace($identifier);
638                    $bestNamespaceId = $bestNamespace['namespace'];
639                    $namespaceScore = $bestNamespace['score'];
640
641                    // Compare the two score
642                    if ($scorePageName > 0 or $namespaceScore > 0) {
643                        if ($scorePageName > $namespaceScore) {
644                            $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
645                        } else {
646                            $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
647                        }
648                        return;
649                    }
650                    break;
651
652                case self::GO_TO_BEST_NAMESPACE:
653
654                    $scoreNamespace = $this->scoreBestNamespace($identifier);
655                    $bestNamespaceId = $scoreNamespace['namespace'];
656                    $score = $scoreNamespace['score'];
657
658                    if ($score > 0) {
659                        $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE);
660                        return;
661                    }
662                    break;
663
664                case self::GO_TO_SEARCH_ENGINE:
665
666                    $this->redirectToSearchEngine();
667
668                    return;
669
670                // End Switch Action
671            }
672
673            // End While Action
674        }
675
676
677    }
678
679
680    /**
681     * getBestNamespace
682     * Return a list with 'BestNamespaceId Score'
683     * @param $id
684     * @return array
685     */
686    private
687    function scoreBestNamespace($id)
688    {
689
690        global $conf;
691
692        // Parameters
693        $pageNameSpace = getNS($id);
694
695        // If the page has an existing namespace start page take it, other search other namespace
696        $startPageNameSpace = $pageNameSpace . ":";
697        $dateAt = '';
698        // $startPageNameSpace will get a full path (ie with start or the namespace
699        resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true);
700        if (page_exists($startPageNameSpace)) {
701            $nameSpaces = array($startPageNameSpace);
702        } else {
703            $nameSpaces = ft_pageLookup($conf['start']);
704        }
705
706        // Parameters and search the best namespace
707        $pathNames = explode(':', $pageNameSpace);
708        $bestNbWordFound = 0;
709        $bestNamespaceId = '';
710        foreach ($nameSpaces as $nameSpace) {
711
712            $nbWordFound = 0;
713            foreach ($pathNames as $pathName) {
714                if (strlen($pathName) > 2) {
715                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
716                }
717            }
718            if ($nbWordFound > $bestNbWordFound) {
719                // Take only the smallest namespace
720                if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) {
721                    $bestNbWordFound = $nbWordFound;
722                    $bestNamespaceId = $nameSpace;
723                }
724            }
725        }
726
727        $startPageFactor = $this->getConf('WeightFactorForStartPage');
728        $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace');
729        if ($bestNbWordFound > 0) {
730            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
731        } else {
732            $bestNamespaceScore = 0;
733        }
734
735
736        return array(
737            'namespace' => $bestNamespaceId,
738            'score' => $bestNamespaceScore
739        );
740
741    }
742
743    /**
744     * @param $event
745     */
746    private
747    function gotToEditMode(&$event)
748    {
749        global $ACT;
750        $ACT = 'edit';
751
752    }
753
754
755    /**
756     * Redirect to an internal page ie:
757     *   * on the same domain
758     *   * no HTTP redirect
759     *   * id rewrite
760     * @param string $targetPageId - target page id
761     * @param string $targetOriginId - the source of the target (redirect)
762     * @return bool - return true if the user has the permission and that the redirect was done
763     * @throws Exception
764     */
765    private
766    function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool
767    {
768        /**
769         * Because we set the ID globally for the ID redirect
770         * we make sure that this is not a {@link MarkupPath}
771         * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem}
772         * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem}
773         */
774        if (is_object($targetPageId)) {
775            $class = get_class($targetPageId);
776            LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id");
777        }
778
779        if (is_object($targetOriginId)) {
780            $class = get_class($targetOriginId);
781            LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id");
782        }
783
784        // If the user does not have the right to see the target page
785        // don't do anything
786        if (!(Identity::isReader($targetPageId))) {
787            return false;
788        }
789
790        // Change the id
791        global $ID;
792        global $INFO;
793        $sourceId = $ID;
794        $ID = $targetPageId;
795        if (isset($_REQUEST["id"])) {
796            $_REQUEST["id"] = $targetPageId;
797        }
798        if (isset($_GET["id"])) {
799            $_GET["id"] = $targetPageId;
800        }
801
802        /**
803         * Refresh the $INFO data
804         *
805         * the info attributes are used elsewhere
806         *   'id': for the sidebar
807         *   'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()}
808         *   'rev' : for the edit button to be sure that the page is still the same
809         */
810        $INFO = pageinfo();
811
812        /**
813         * Not compatible with
814         * https://www.dokuwiki.org/config:send404 is enabled
815         *
816         * This check happens before that dokuwiki is started
817         * and send an header in doku.php
818         *
819         * We send a warning
820         */
821        global $conf;
822        if ($conf['send404'] == true) {
823            LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
824        }
825
826        // Redirection
827        $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD);
828
829        return true;
830
831    }
832
833    private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool
834    {
835        return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD);
836    }
837
838    /**
839     * The general HTTP Redirect method to an internal page
840     * where the redirection method decide which type of redirection
841     * @param string $targetIdOrUrl - a dokuwiki id or an url
842     * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin)
843     * @param string $method - the redirection method
844     */
845    private
846    function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool
847    {
848
849        global $ID;
850
851
852        // Log the redirections
853        $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method);
854
855
856        // An http external url ?
857        try {
858            $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl();
859        } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
860            $isHttpUrl = false;
861        }
862
863        // If there is a bug in the isValid function for an internal url
864        // We get a loop.
865        // The Url becomes the id, the id is unknown and we do a redirect again
866        //
867        // We check then if the target starts with the base url
868        // if this is the case, it's valid
869        if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) {
870            $isHttpUrl = true;
871        }
872        if ($isHttpUrl) {
873
874            // defend against HTTP Response Splitting
875            // https://owasp.org/www-community/attacks/HTTP_Response_Splitting
876            $targetUrl = stripctl($targetIdOrUrl);
877
878        } else {
879
880
881            // Explode the page ID and the anchor (#)
882            $link = explode('#', $targetIdOrUrl, 2);
883
884            $url = UrlEndpoint::createDokuUrl();
885
886            $urlParams = [];
887            // if this is search engine redirect
888            if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) {
889                $replacementPart = array(':', '_', '-');
890                $query = str_replace($replacementPart, ' ', $ID);
891                $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION);
892                $url->setQueryParameter("q", $query);
893            }
894
895            /**
896             * Doing a permanent redirect with a added query string
897             * create a new page url on the search engine
898             *
899             * ie
900             * http://host/page
901             * is not the same
902             * than
903             * http://host/page?whatever
904             *
905             * We can't pass query string otherwise, we get
906             * the SEO warning / error
907             * `Alternative page with proper canonical tag`
908             *
909             * Use HTTP X header for debug
910             */
911            if ($method !== self::REDIRECT_PERMANENT_METHOD) {
912                $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID);
913                $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin);
914            }
915
916            $id = $link[0];
917            $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id);
918            if (array_key_exists(1, $link)) {
919                $url->setFragment($link[1]);
920            }
921            $targetUrl = $url->toAbsoluteUrlString();
922
923        }
924
925        /**
926         * The dokuwiki function {@link send_redirect()}
927         * set the `Location header` and in php, the header function
928         * in this case change the status code to 302 Arghhhh.
929         * The code below is adapted from this function {@link send_redirect()}
930         */
931        global $MSG; // are there any undisplayed messages? keep them in session for display
932        if (isset($MSG) && count($MSG) && !defined('NOSESSION')) {
933            //reopen session, store data and close session again
934            @session_start();
935            $_SESSION[DOKU_COOKIE]['msg'] = $MSG;
936        }
937        session_write_close(); // always close the session
938
939        switch ($method) {
940
941            case self::REDIRECT_PERMANENT_METHOD:
942                ExecutionContext::getActualOrCreateFromEnv()
943                    ->response()
944                    ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT)
945                    ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl)
946                    ->end();
947                return true;
948
949            case self::REDIRECT_NOTFOUND_METHOD:
950
951
952                // Empty 404 body to not get the standard 404 page of the browser
953                // but a blank page to avoid a sort of FOUC.
954                // ie the user see a page briefly
955                ExecutionContext::getActualOrCreateFromEnv()
956                    ->response()
957                    ->setStatus(HttpResponseStatus::NOT_FOUND)
958                    ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl)
959                    ->setBody(self::PAGE_404, Mime::getHtml())
960                    ->end();
961                return true;
962
963            default:
964                LogUtility::msg("The method ($method) is not an http redirection");
965                return false;
966        }
967
968
969    }
970
971    /**
972     * @param $id
973     * @return array
974     */
975    private
976    function getBestPage($id): array
977    {
978
979        // The return parameters
980        $bestPageId = null;
981        $scorePageName = null;
982
983        // Get Score from a page
984        $pageName = noNS($id);
985        $pagesWithSameName = ft_pageLookup($pageName);
986        if (count($pagesWithSameName) > 0) {
987
988            // Search same namespace in the page found than in the Id page asked.
989            $bestNbWordFound = 0;
990
991
992            $wordsInPageSourceId = explode(':', $id);
993            foreach ($pagesWithSameName as $targetPageId => $title) {
994
995                // Nb of word found in the target page id
996                // that are in the source page id
997                $nbWordFound = 0;
998                foreach ($wordsInPageSourceId as $word) {
999                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
1000                }
1001
1002                if ($bestPageId == null) {
1003
1004                    $bestNbWordFound = $nbWordFound;
1005                    $bestPageId = $targetPageId;
1006
1007                } else {
1008
1009                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
1010
1011                        $bestNbWordFound = $nbWordFound;
1012                        $bestPageId = $targetPageId;
1013
1014                    }
1015
1016                }
1017
1018            }
1019            $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace');
1020            return array(
1021                'id' => $bestPageId,
1022                'score' => $scorePageName);
1023        }
1024        return array(
1025            'id' => $bestPageId,
1026            'score' => $scorePageName
1027        );
1028
1029    }
1030
1031
1032    /**
1033     * Redirect to the search engine
1034     */
1035    private
1036    function redirectToSearchEngine()
1037    {
1038
1039        global $ID;
1040        $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE);
1041
1042    }
1043
1044
1045    /**
1046     *
1047     *   * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date
1048     *   * For a SQlite database, it will add a row into the log
1049     *
1050     * @param string $sourcePageId
1051     * @param $targetPageId
1052     * @param $algorithmic
1053     * @param $method - http or rewrite
1054     */
1055    function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method)
1056    {
1057
1058        $row = array(
1059            "TIMESTAMP" => date("c"),
1060            "SOURCE" => $sourcePageId,
1061            "TARGET" => $targetPageId,
1062            "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null,
1063            "TYPE" => $algorithmic,
1064            "METHOD" => $method
1065        );
1066        $request = Sqlite::createOrGetBackendSqlite()
1067            ->createRequest()
1068            ->setTableRow('redirections_log', $row);
1069        try {
1070            $request
1071                ->execute();
1072        } catch (ExceptionCompile $e) {
1073            LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}");
1074        } finally {
1075            $request->close();
1076        }
1077
1078
1079    }
1080
1081    /**
1082     * This function check if there is a redirection declared
1083     * in the redirection table
1084     * @return bool - true if a rewrite or redirection occurs
1085     * @throws Exception
1086     */
1087    private function processingPageRules(): bool
1088    {
1089        global $ID;
1090
1091        $calculatedTarget = null;
1092        $ruleMatcher = null; // Used in a warning message if the target page does not exist
1093        // Known redirection in the table
1094        // Get the page from redirection data
1095        $rules = $this->pageRules->getRules();
1096        foreach ($rules as $rule) {
1097
1098            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
1099            $ruleTarget = $rule[PageRules::TARGET_NAME];
1100
1101            // Glob to Rexgexp
1102            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
1103
1104            // Match ?
1105            // https://www.php.net/manual/en/function.preg-match.php
1106            $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
1107            if ($pregMatchResult === false) {
1108                // The `if` to take into account this problem
1109                // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
1110                LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
1111                return false;
1112            }
1113            if ($pregMatchResult) {
1114                $calculatedTarget = $ruleTarget;
1115                foreach ($matches as $key => $match) {
1116                    if ($key == 0) {
1117                        continue;
1118                    } else {
1119                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
1120                    }
1121                }
1122                break;
1123            }
1124        }
1125
1126        if ($calculatedTarget == null) {
1127            return false;
1128        }
1129
1130        // If this is an external redirect (other domain)
1131        try {
1132            $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl();
1133        } catch (ExceptionBadSyntax $e) {
1134            $isHttpUrl = false;
1135        }
1136        if ($isHttpUrl) {
1137            $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1138            return true;
1139        }
1140
1141        // If the page exist
1142        if (page_exists($calculatedTarget)) {
1143
1144            // This is DokuWiki Id and should always be lowercase
1145            // The page rule may have change that
1146            $calculatedTarget = strtolower($calculatedTarget);
1147            $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1148            if ($res) {
1149                return true;
1150            } else {
1151                return false;
1152            }
1153
1154        } else {
1155
1156            LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR);
1157            return false;
1158
1159        }
1160
1161    }
1162
1163    private function performNotFoundRedirect(string $targetId, string $origin): bool
1164    {
1165        return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD);
1166    }
1167
1168
1169}
1170