xref: /plugin/combo/action/router.php (revision 5187326aa67a686ebdd43a3fdcb16909ef6f65d3)
1<?php
2
3
4use ComboStrap\DatabasePageRow;
5use ComboStrap\DokuwikiId;
6use ComboStrap\ExceptionBadArgument;
7use ComboStrap\ExceptionBadSyntax;
8use ComboStrap\ExceptionCompile;
9use ComboStrap\ExceptionSqliteNotAvailable;
10use ComboStrap\ExecutionContext;
11use ComboStrap\FileSystems;
12use ComboStrap\HttpResponse;
13use ComboStrap\HttpResponseStatus;
14use ComboStrap\Identity;
15use ComboStrap\LogUtility;
16use ComboStrap\MarkupPath;
17use ComboStrap\Meta\Field\AliasType;
18use ComboStrap\Mime;
19use ComboStrap\PageId;
20use ComboStrap\PageRules;
21use ComboStrap\PageUrlPath;
22use ComboStrap\PageUrlType;
23use ComboStrap\RouterBestEndPage;
24use ComboStrap\Site;
25use ComboStrap\SiteConfig;
26use ComboStrap\Sqlite;
27use ComboStrap\Web\Url;
28use ComboStrap\Web\UrlEndpoint;
29use ComboStrap\Web\UrlRewrite;
30use ComboStrap\WikiPath;
31
32require_once(__DIR__ . '/../vendor/autoload.php');
33
34/**
35 * Class action_plugin_combo_url
36 *
37 * The actual URL manager
38 *
39 *
40 */
41class action_plugin_combo_router extends DokuWiki_Action_Plugin
42{
43
44    /**
45     * @deprecated
46     */
47    const URL_MANAGER_ENABLE_CONF = "enableUrlManager";
48    const ROUTER_ENABLE_CONF = "enableRouter";
49
50    // The redirect type
51    const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id)
52    // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects
53    const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301)
54    const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated)
55
56    public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect";
57
58    // Where the target id value comes from
59    const TARGET_ORIGIN_WELL_KNOWN = 'well-known';
60    const TARGET_ORIGIN_PAGE_RULES = 'pageRules';
61    /**
62     * Named Permalink (canonical)
63     */
64    const TARGET_ORIGIN_CANONICAL = 'canonical';
65    const TARGET_ORIGIN_ALIAS = 'alias';
66    /**
67     * Identifier Permalink (full page id)
68     */
69    const TARGET_ORIGIN_PERMALINK = "permalink";
70    /**
71     * Extended Permalink (abbreviated page id at the end)
72     */
73    const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink";
74    const TARGET_ORIGIN_START_PAGE = 'startPage';
75    const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName';
76    const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace';
77    const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine';
78    const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName';
79    const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned";
80
81
82    // The constant parameters
83    const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
84    const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
85    const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
86    const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
87    const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
88    const GO_TO_EDIT_MODE = 'GoToEditMode';
89    const NOTHING = 'Nothing';
90
91    /** @var string - a name used in log and other places */
92    const NAME = 'Url Manager';
93    const CANONICAL = 'router';
94    const PAGE_404 = "<html lang=\"en\"><body></body></html>";
95    const REFRESH_HEADER_NAME = "Refresh";
96    const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url=';
97    const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": ";
98    public const URL_MANAGER_NAME = "Router";
99
100
101    /**
102     * @var PageRules
103     */
104    private $pageRules;
105
106
107    function __construct()
108    {
109        // enable direct access to language strings
110        // ie $this->lang
111        $this->setupLocale();
112
113    }
114
115    /**
116     * @param string $refreshHeader
117     * @return false|string
118     */
119    public static function getUrlFromRefresh(string $refreshHeader)
120    {
121        return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX));
122    }
123
124    public static function getUrlFromLocation($refreshHeader)
125    {
126        return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX));
127    }
128
129    /**
130     * @return string|null
131     *
132     * Return the original id from the request
133     * ie `howto:how-to-get-started-with-combostrap-m3i8vga8`
134     * if `/howto/how-to-get-started-with-combostrap-m3i8vga8`
135     *
136     * Unfortunately, DOKUWIKI_STARTED is not the first event
137     * The id may have been changed by
138     * {@link action_plugin_combo_lang::load_lang()}
139     * function, that's why we have this function
140     * to get the original requested id
141     */
142    private static function getOriginalIdFromRequest(): ?string
143    {
144        $originalId = $_GET["id"] ?? null;
145        if ($originalId === null) {
146            return null;
147        }
148        // We get a `/` as first character
149        // because we return an id, we need to delete it
150        $originalId = substr($originalId, 1);
151        // transform / to :
152        return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
153    }
154
155    /**
156     * Determine if the request should be banned based on the id
157     *
158     * @param string $id
159     * @return bool
160     *
161     * See also {@link https://perishablepress.com/7g-firewall/#features}
162     * for blocking rules on http request data such as:
163     *   * query_string
164     *   * user_agent,
165     *   * remote host
166     */
167    public static function isShadowBanned(string $id): bool
168    {
169        /**
170         * ie
171         * wp-json:api:flutter_woo:config_file
172         * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css
173         * wp-admin
174         * 2020:wp-includes:wlwmanifest.xml
175         * wp-content:start
176         * wp-admin:css:start
177         * sito:wp-includes:wlwmanifest.xml
178         * site:wp-includes:wlwmanifest.xml
179         * cms:wp-includes:wlwmanifest.xml
180         * test:wp-includes:wlwmanifest.xml
181         * media:wp-includes:wlwmanifest.xml
182         * wp2:wp-includes:wlwmanifest.xml
183         * 2019:wp-includes:wlwmanifest.xml
184         * shop:wp-includes:wlwmanifest.xml
185         * wp1:wp-includes:wlwmanifest.xml
186         * news:wp-includes:wlwmanifest.xml
187         * 2018:wp-includes:wlwmanifest.xml
188         */
189        if (strpos($id, 'wp-') !== false) {
190            return true;
191        }
192
193        /**
194         * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33
195         * db:oracle:999999.9:union:all:select_null:from_dual
196         * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0
197         */
198        if (preg_match('/_chr_|_0_0/', $id) === 1) {
199            return true;
200        }
201
202
203        /**
204         * ie
205         * git:objects:
206         * git:refs:heads:stable
207         * git:logs:refs:heads:main
208         * git:logs:refs:heads:stable
209         * git:hooks:pre-push.sample
210         * git:hooks:pre-receive.sample
211         */
212        if (strpos($id, "git:") === 0) {
213            return true;
214        }
215
216        return false;
217
218    }
219
220    /**
221     * @param string $id
222     * @return bool
223     * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md
224     * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/
225     * well-known:dnt-policy.txt
226     */
227    public static function isWellKnownFile(string $id): bool
228    {
229        return strpos($id, "well-known") === 0;
230    }
231
232
233    function register(Doku_Event_Handler $controller)
234    {
235
236        if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) {
237
238            /**
239             * This will call the function {@link action_plugin_combo_router::_router()}
240             * The event is not DOKUWIKI_STARTED because this is not the first one
241             *
242             * https://www.dokuwiki.org/devel:event:init_lang_load
243             */
244            $controller->register_hook('DOKUWIKI_STARTED',
245                'BEFORE',
246                $this,
247                'router',
248                array());
249
250            /**
251             * Bot Ban functionality
252             *
253             * Because we make a redirection to the home page, we need to check
254             * if the home is readable, for that, the AUTH plugin needs to be initialized
255             * That's why we wait
256             * https://www.dokuwiki.org/devel:event:dokuwiki_init_done
257             *
258             * and we can't use
259             * https://www.dokuwiki.org/devel:event:init_lang_load
260             * because there is no auth setup in {@link auth_aclcheck_cb()}
261             * and the the line `if (!$auth instanceof AuthPlugin) return AUTH_NONE;` return none;
262             */
263            $controller->register_hook('DOKUWIKI_INIT_DONE', 'BEFORE', $this, 'ban', array());
264
265        }
266
267
268    }
269
270    /**
271     *
272     * We have created a spacial ban function that is
273     * called before the first function
274     * {@link action_plugin_combo_metalang::load_lang()}
275     * to spare CPU.
276     *
277     * @param $event
278     * @throws Exception
279     */
280    function ban(&$event)
281    {
282
283        $id = self::getOriginalIdFromRequest();
284        if ($id === null) {
285            return;
286        }
287        $page = MarkupPath::createMarkupFromId($id);
288        if (FileSystems::exists($page)) {
289            return;
290        }
291
292        // Well known
293        if (self::isWellKnownFile($id)) {
294            $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD);
295            ExecutionContext::getActualOrCreateFromEnv()
296                ->response()
297                ->setStatus(HttpResponseStatus::NOT_FOUND)
298                ->end();
299            return;
300        }
301
302        // Shadow banned
303        if (self::isShadowBanned($id)) {
304            $webSiteHomePage = Site::getIndexPageName();
305            $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED);
306        }
307
308    }
309
310    /**
311     * @param $event Doku_Event
312     * @param $param
313     * @return void
314     * @throws Exception
315     */
316    function router(&$event, $param)
317    {
318
319        /**
320         * Just the {@link ExecutionContext::SHOW_ACTION}
321         * may be redirected
322         */
323        $executionContext = ExecutionContext::getActualOrCreateFromEnv();
324        if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) {
325            return;
326        }
327
328        $urlRewrite = Site::getUrlRewrite();
329        if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) {
330            UrlRewrite::sendErrorMessage();
331            return;
332        }
333
334        global $ID;
335
336        /**
337         * Without SQLite, this module does not work further
338         */
339        try {
340            Sqlite::createOrGetSqlite();
341        } catch (ExceptionSqliteNotAvailable $e) {
342            return;
343        }
344
345        $this->pageRules = new PageRules();
346
347
348        /**
349         * Unfortunately, DOKUWIKI_STARTED is not the first event
350         * The id may have been changed by
351         * {@link action_plugin_combo_lang::load_lang()}
352         * function, that's why we check against the {@link $_REQUEST}
353         * and not the global ID
354         */
355        $originalId = self::getOriginalIdFromRequest();
356
357        /**
358         * Page is an existing id ?
359         */
360        $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
361        if (FileSystems::exists($requestedMarkupPath)) {
362
363            /**
364             * If this is not the root home page
365             * and if the canonical id is the not the same (the id has changed)
366             * and if this is not a historical page (revision)
367             * redirect
368             */
369            if (
370                $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
371                && $ID != Site::getIndexPageName()
372                && !isset($_REQUEST["rev"])
373            ) {
374                /**
375                 * TODO: When saving for the first time, the page is not stored in the database
376                 *   but that's not the case actually
377                 */
378                $databasePageRow = $requestedMarkupPath->getDatabasePage();
379                if ($databasePageRow->exists()) {
380                    /**
381                     * A move may leave the database in a bad state,
382                     * unfortunately (ie page is not in index, unable to update, ...)
383                     * We test therefore if the database page id exists
384                     */
385                    $targetPageId = $databasePageRow->getFromRow("id");
386                    $targetPath = WikiPath::createMarkupPathFromId($targetPageId);
387                    if (FileSystems::exists($targetPath)) {
388                        $this->executePermanentRedirect(
389                            $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(),
390                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
391                        );
392                    }
393                }
394            }
395            return;
396        }
397
398
399        $identifier = $ID;
400
401
402        /**
403         * Page Id in the url
404         */
405        $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
406        if ($shortPageId != null) {
407            $pageId = PageUrlPath::decodePageId($shortPageId);
408        } else {
409            /**
410             * Permalink with id
411             */
412            $pageId = PageUrlPath::decodePageId($identifier);
413        }
414        if ($pageId !== null) {
415
416            if ($requestedMarkupPath->getParent() === null) {
417                $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
418                if ($page !== null && $page->exists()) {
419                    $this->executePermanentRedirect(
420                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
421                        self::TARGET_ORIGIN_PERMALINK
422                    );
423                    return;
424                }
425            }
426
427            /**
428             * Page Id Abbr ?
429             * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
430             */
431            $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
432            if ($page === null) {
433                // or the length of the abbr has changed
434                $canonicalDatabasePage = new DatabasePageRow();
435                $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
436                if ($row !== null) {
437                    $canonicalDatabasePage->setRow($row);
438                    $page = $canonicalDatabasePage->getMarkupPath();
439                }
440            }
441            if ($page !== null && $page->exists()) {
442                /**
443                 * If the url canonical id has changed, we show it
444                 * to the writer by performing a permanent redirect
445                 */
446                if ($identifier != $page->getUrlId()) {
447                    // Google asks for a redirect
448                    // https://developers.google.com/search/docs/advanced/crawling/301-redirects
449                    // People access your site through several different URLs.
450                    // If, for example, your home page can be reached in multiple ways
451                    // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
452                    // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
453                    // and use redirects to send traffic from the other URLs to your preferred URL.
454                    $this->executePermanentRedirect(
455                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
456                        self::TARGET_ORIGIN_PERMALINK_EXTENDED
457                    );
458                    return;
459                }
460
461                $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED);
462                return;
463
464            }
465            // permanent url not yet in the database
466            // Other permanent such as permanent canonical ?
467            // We let the process go with the new identifier
468
469        }
470
471        // Global variable needed in the process
472        global $conf;
473
474        /**
475         * Identifier is a Canonical ?
476         */
477        $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
478        $canonicalPage = $canonicalDatabasePage->getMarkupPath();
479        if ($canonicalPage !== null && $canonicalPage->exists()) {
480            /**
481             * Does the canonical url is canonical name based
482             * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
483             */
484            if ($canonicalPage->getUrlId() === $identifier) {
485                $res = $this->executeTransparentRedirect(
486                    $canonicalPage->getWikiId(),
487                    self::TARGET_ORIGIN_CANONICAL
488                );
489            } else {
490                $res = $this->executePermanentRedirect(
491                    $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property
492                    self::TARGET_ORIGIN_CANONICAL
493                );
494            }
495            if ($res) {
496                return;
497            }
498        }
499
500        /**
501         * Identifier is an alias
502         */
503        $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
504        if (
505            $aliasRequestedPage !== null
506            && $aliasRequestedPage->exists()
507            // The build alias is the file system metadata alias
508            // it may be null if the replication in the database was not successful
509            && $aliasRequestedPage->getBuildAlias() !== null
510        ) {
511            $buildAlias = $aliasRequestedPage->getBuildAlias();
512            switch ($buildAlias->getType()) {
513                case AliasType::REDIRECT:
514                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
515                    if ($res) {
516                        return;
517                    }
518                    break;
519                case AliasType::SYNONYM:
520                    $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS);
521                    if ($res) {
522                        return;
523                    }
524                    break;
525                default:
526                    LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
527                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
528                    if ($res) {
529                        return;
530                    }
531                    break;
532            }
533        }
534
535
536        // If there is a redirection defined in the page rules
537        $result = $this->processingPageRules();
538        if ($result) {
539            // A redirection has occurred
540            // finish the process
541            return;
542        }
543
544        /**
545         *
546         * There was no redirection found, redirect to edit mode if writer
547         *
548         */
549        if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) {
550
551            $this->gotToEditMode($event);
552            // Stop here
553            return;
554
555        }
556
557        /**
558         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
559         */
560        if ($this->getConf('ActionReaderFirst') == self::NOTHING) {
561            return;
562        }
563
564        // We are reader and their is no redirection set, we apply the algorithm
565        $readerAlgorithms = array();
566        $readerAlgorithms[0] = $this->getConf('ActionReaderFirst');
567        $readerAlgorithms[1] = $this->getConf('ActionReaderSecond');
568        $readerAlgorithms[2] = $this->getConf('ActionReaderThird');
569
570        while (
571            ($algorithm = array_shift($readerAlgorithms)) != null
572        ) {
573
574            switch ($algorithm) {
575
576                case self::NOTHING:
577                    return;
578
579                case self::GO_TO_BEST_END_PAGE_NAME:
580
581                    /**
582                     * @var MarkupPath $bestEndPage
583                     */
584                    list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
585                    if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) {
586                        $res = false;
587                        switch ($method) {
588                            case self::REDIRECT_PERMANENT_METHOD:
589                                $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
590                                break;
591                            case self::REDIRECT_NOTFOUND_METHOD:
592                                $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
593                                break;
594                            default:
595                                LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
596                        }
597                        if ($res) {
598                            // Redirection has succeeded
599                            return;
600                        }
601                    }
602                    break;
603
604                case self::GO_TO_NS_START_PAGE:
605
606                    // Start page with the conf['start'] parameter
607                    $startPage = getNS($identifier) . ':' . $conf['start'];
608                    if (page_exists($startPage)) {
609                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
610                        if ($res) {
611                            return;
612                        }
613                    }
614
615                    // Start page with the same name than the namespace
616                    $startPage = getNS($identifier) . ':' . curNS($identifier);
617                    if (page_exists($startPage)) {
618                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
619                        if ($res) {
620                            return;
621                        }
622                    }
623                    break;
624
625                case self::GO_TO_BEST_PAGE_NAME:
626
627                    $bestPageId = null;
628
629                    $bestPage = $this->getBestPage($identifier);
630                    $bestPageId = $bestPage['id'];
631                    $scorePageName = $bestPage['score'];
632
633                    // Get Score from a Namespace
634                    $bestNamespace = $this->scoreBestNamespace($identifier);
635                    $bestNamespaceId = $bestNamespace['namespace'];
636                    $namespaceScore = $bestNamespace['score'];
637
638                    // Compare the two score
639                    if ($scorePageName > 0 or $namespaceScore > 0) {
640                        if ($scorePageName > $namespaceScore) {
641                            $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
642                        } else {
643                            $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
644                        }
645                        return;
646                    }
647                    break;
648
649                case self::GO_TO_BEST_NAMESPACE:
650
651                    $scoreNamespace = $this->scoreBestNamespace($identifier);
652                    $bestNamespaceId = $scoreNamespace['namespace'];
653                    $score = $scoreNamespace['score'];
654
655                    if ($score > 0) {
656                        $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE);
657                        return;
658                    }
659                    break;
660
661                case self::GO_TO_SEARCH_ENGINE:
662
663                    $this->redirectToSearchEngine();
664
665                    return;
666
667                // End Switch Action
668            }
669
670            // End While Action
671        }
672
673
674    }
675
676
677    /**
678     * getBestNamespace
679     * Return a list with 'BestNamespaceId Score'
680     * @param $id
681     * @return array
682     */
683    private
684    function scoreBestNamespace($id)
685    {
686
687        global $conf;
688
689        // Parameters
690        $pageNameSpace = getNS($id);
691
692        // If the page has an existing namespace start page take it, other search other namespace
693        $startPageNameSpace = $pageNameSpace . ":";
694        $dateAt = '';
695        // $startPageNameSpace will get a full path (ie with start or the namespace
696        resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true);
697        if (page_exists($startPageNameSpace)) {
698            $nameSpaces = array($startPageNameSpace);
699        } else {
700            $nameSpaces = ft_pageLookup($conf['start']);
701        }
702
703        // Parameters and search the best namespace
704        $pathNames = explode(':', $pageNameSpace);
705        $bestNbWordFound = 0;
706        $bestNamespaceId = '';
707        foreach ($nameSpaces as $nameSpace) {
708
709            $nbWordFound = 0;
710            foreach ($pathNames as $pathName) {
711                if (strlen($pathName) > 2) {
712                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
713                }
714            }
715            if ($nbWordFound > $bestNbWordFound) {
716                // Take only the smallest namespace
717                if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) {
718                    $bestNbWordFound = $nbWordFound;
719                    $bestNamespaceId = $nameSpace;
720                }
721            }
722        }
723
724        $startPageFactor = $this->getConf('WeightFactorForStartPage');
725        $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace');
726        if ($bestNbWordFound > 0) {
727            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
728        } else {
729            $bestNamespaceScore = 0;
730        }
731
732
733        return array(
734            'namespace' => $bestNamespaceId,
735            'score' => $bestNamespaceScore
736        );
737
738    }
739
740    /**
741     * @param $event
742     */
743    private
744    function gotToEditMode(&$event)
745    {
746        global $ACT;
747        $ACT = 'edit';
748
749    }
750
751
752    /**
753     * Redirect to an internal page ie:
754     *   * on the same domain
755     *   * no HTTP redirect
756     *   * id rewrite
757     * @param string $targetPageId - target page id
758     * @param string $targetOriginId - the source of the target (redirect)
759     * @return bool - return true if the user has the permission and that the redirect was done
760     * @throws Exception
761     */
762    private
763    function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool
764    {
765        /**
766         * Because we set the ID globally for the ID redirect
767         * we make sure that this is not a {@link MarkupPath}
768         * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem}
769         * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem}
770         */
771        if (is_object($targetPageId)) {
772            $class = get_class($targetPageId);
773            LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id");
774        }
775
776        if (is_object($targetOriginId)) {
777            $class = get_class($targetOriginId);
778            LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id");
779        }
780
781        // If the user does not have the right to see the target page
782        // don't do anything
783        if (!(Identity::isReader($targetPageId))) {
784            return false;
785        }
786
787        // Change the id
788        global $ID;
789        global $INFO;
790        $sourceId = $ID;
791        $ID = $targetPageId;
792        if (isset($_REQUEST["id"])) {
793            $_REQUEST["id"] = $targetPageId;
794        }
795        if (isset($_GET["id"])) {
796            $_GET["id"] = $targetPageId;
797        }
798
799        /**
800         * Refresh the $INFO data
801         *
802         * the info attributes are used elsewhere
803         *   'id': for the sidebar
804         *   'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()}
805         *   'rev' : for the edit button to be sure that the page is still the same
806         */
807        $INFO = pageinfo();
808
809        /**
810         * Not compatible with
811         * https://www.dokuwiki.org/config:send404 is enabled
812         *
813         * This check happens before that dokuwiki is started
814         * and send an header in doku.php
815         *
816         * We send a warning
817         */
818        global $conf;
819        if ($conf['send404'] == true) {
820            LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
821        }
822
823        // Redirection
824        $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD);
825
826        return true;
827
828    }
829
830    private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool
831    {
832        return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD);
833    }
834
835    /**
836     * The general HTTP Redirect method to an internal page
837     * where the redirection method decide which type of redirection
838     * @param string $targetIdOrUrl - a dokuwiki id or an url
839     * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin)
840     * @param string $method - the redirection method
841     */
842    private
843    function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool
844    {
845
846        global $ID;
847
848
849        // Log the redirections
850        $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method);
851
852
853        // An http external url ?
854        try {
855            $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl();
856        } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
857            $isHttpUrl = false;
858        }
859
860        // If there is a bug in the isValid function for an internal url
861        // We get a loop.
862        // The Url becomes the id, the id is unknown and we do a redirect again
863        //
864        // We check then if the target starts with the base url
865        // if this is the case, it's valid
866        if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) {
867            $isHttpUrl = true;
868        }
869        if ($isHttpUrl) {
870
871            // defend against HTTP Response Splitting
872            // https://owasp.org/www-community/attacks/HTTP_Response_Splitting
873            $targetUrl = stripctl($targetIdOrUrl);
874
875        } else {
876
877
878            // Explode the page ID and the anchor (#)
879            $link = explode('#', $targetIdOrUrl, 2);
880
881            $url = UrlEndpoint::createDokuUrl();
882
883            $urlParams = [];
884            // if this is search engine redirect
885            if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) {
886                $replacementPart = array(':', '_', '-');
887                $query = str_replace($replacementPart, ' ', $ID);
888                $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION);
889                $url->setQueryParameter("q", $query);
890            }
891
892            /**
893             * Doing a permanent redirect with a added query string
894             * create a new page url on the search engine
895             *
896             * ie
897             * http://host/page
898             * is not the same
899             * than
900             * http://host/page?whatever
901             *
902             * We can't pass query string otherwise, we get
903             * the SEO warning / error
904             * `Alternative page with proper canonical tag`
905             *
906             * Use HTTP X header for debug
907             */
908            if ($method !== self::REDIRECT_PERMANENT_METHOD) {
909                $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID);
910                $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin);
911            }
912
913            $id = $link[0];
914            $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id);
915            if (array_key_exists(1, $link)) {
916                $url->setFragment($link[1]);
917            }
918            $targetUrl = $url->toAbsoluteUrlString();
919
920        }
921
922        /**
923         * The dokuwiki function {@link send_redirect()}
924         * set the `Location header` and in php, the header function
925         * in this case change the status code to 302 Arghhhh.
926         * The code below is adapted from this function {@link send_redirect()}
927         */
928        global $MSG; // are there any undisplayed messages? keep them in session for display
929        if (isset($MSG) && count($MSG) && !defined('NOSESSION')) {
930            //reopen session, store data and close session again
931            @session_start();
932            $_SESSION[DOKU_COOKIE]['msg'] = $MSG;
933        }
934        session_write_close(); // always close the session
935
936        switch ($method) {
937
938            case self::REDIRECT_PERMANENT_METHOD:
939                ExecutionContext::getActualOrCreateFromEnv()
940                    ->response()
941                    ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT)
942                    ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl)
943                    ->end();
944                return true;
945
946            case self::REDIRECT_NOTFOUND_METHOD:
947
948
949                // Empty 404 body to not get the standard 404 page of the browser
950                // but a blank page to avoid a sort of FOUC.
951                // ie the user see a page briefly
952                ExecutionContext::getActualOrCreateFromEnv()
953                    ->response()
954                    ->setStatus(HttpResponseStatus::NOT_FOUND)
955                    ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl)
956                    ->setBody(self::PAGE_404, Mime::getHtml())
957                    ->end();
958                return true;
959
960            default:
961                LogUtility::msg("The method ($method) is not an http redirection");
962                return false;
963        }
964
965
966    }
967
968    /**
969     * @param $id
970     * @return array
971     */
972    private
973    function getBestPage($id): array
974    {
975
976        // The return parameters
977        $bestPageId = null;
978        $scorePageName = null;
979
980        // Get Score from a page
981        $pageName = noNS($id);
982        $pagesWithSameName = ft_pageLookup($pageName);
983        if (count($pagesWithSameName) > 0) {
984
985            // Search same namespace in the page found than in the Id page asked.
986            $bestNbWordFound = 0;
987
988
989            $wordsInPageSourceId = explode(':', $id);
990            foreach ($pagesWithSameName as $targetPageId => $title) {
991
992                // Nb of word found in the target page id
993                // that are in the source page id
994                $nbWordFound = 0;
995                foreach ($wordsInPageSourceId as $word) {
996                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
997                }
998
999                if ($bestPageId == null) {
1000
1001                    $bestNbWordFound = $nbWordFound;
1002                    $bestPageId = $targetPageId;
1003
1004                } else {
1005
1006                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
1007
1008                        $bestNbWordFound = $nbWordFound;
1009                        $bestPageId = $targetPageId;
1010
1011                    }
1012
1013                }
1014
1015            }
1016            $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace');
1017            return array(
1018                'id' => $bestPageId,
1019                'score' => $scorePageName);
1020        }
1021        return array(
1022            'id' => $bestPageId,
1023            'score' => $scorePageName
1024        );
1025
1026    }
1027
1028
1029    /**
1030     * Redirect to the search engine
1031     */
1032    private
1033    function redirectToSearchEngine()
1034    {
1035
1036        global $ID;
1037        $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE);
1038
1039    }
1040
1041
1042    /**
1043     *
1044     *   * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date
1045     *   * For a SQlite database, it will add a row into the log
1046     *
1047     * @param string $sourcePageId
1048     * @param $targetPageId
1049     * @param $algorithmic
1050     * @param $method - http or rewrite
1051     */
1052    function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method)
1053    {
1054
1055        $row = array(
1056            "TIMESTAMP" => date("c"),
1057            "SOURCE" => $sourcePageId,
1058            "TARGET" => $targetPageId,
1059            "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null,
1060            "TYPE" => $algorithmic,
1061            "METHOD" => $method
1062        );
1063        $request = Sqlite::createOrGetBackendSqlite()
1064            ->createRequest()
1065            ->setTableRow('redirections_log', $row);
1066        try {
1067            $request
1068                ->execute();
1069        } catch (ExceptionCompile $e) {
1070            LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}");
1071        } finally {
1072            $request->close();
1073        }
1074
1075
1076    }
1077
1078    /**
1079     * This function check if there is a redirection declared
1080     * in the redirection table
1081     * @return bool - true if a rewrite or redirection occurs
1082     * @throws Exception
1083     */
1084    private function processingPageRules(): bool
1085    {
1086        global $ID;
1087
1088        $calculatedTarget = null;
1089        $ruleMatcher = null; // Used in a warning message if the target page does not exist
1090        // Known redirection in the table
1091        // Get the page from redirection data
1092        $rules = $this->pageRules->getRules();
1093        foreach ($rules as $rule) {
1094
1095            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
1096            $ruleTarget = $rule[PageRules::TARGET_NAME];
1097
1098            // Glob to Rexgexp
1099            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
1100
1101            // Match ?
1102            // https://www.php.net/manual/en/function.preg-match.php
1103            $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
1104            if ($pregMatchResult === false) {
1105                // The `if` to take into account this problem
1106                // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
1107                LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
1108                return false;
1109            }
1110            if ($pregMatchResult) {
1111                $calculatedTarget = $ruleTarget;
1112                foreach ($matches as $key => $match) {
1113                    if ($key == 0) {
1114                        continue;
1115                    } else {
1116                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
1117                    }
1118                }
1119                break;
1120            }
1121        }
1122
1123        if ($calculatedTarget == null) {
1124            return false;
1125        }
1126
1127        // If this is an external redirect (other domain)
1128        try {
1129            $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl();
1130        } catch (ExceptionBadSyntax $e) {
1131            $isHttpUrl = false;
1132        }
1133        if ($isHttpUrl) {
1134            $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1135            return true;
1136        }
1137
1138        // If the page exist
1139        if (page_exists($calculatedTarget)) {
1140
1141            // This is DokuWiki Id and should always be lowercase
1142            // The page rule may have change that
1143            $calculatedTarget = strtolower($calculatedTarget);
1144            $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1145            if ($res) {
1146                return true;
1147            } else {
1148                return false;
1149            }
1150
1151        } else {
1152
1153            LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR);
1154            return false;
1155
1156        }
1157
1158    }
1159
1160    private function performNotFoundRedirect(string $targetId, string $origin): bool
1161    {
1162        return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD);
1163    }
1164
1165
1166}
1167