1<?php
2
3
4use ComboStrap\DatabasePageRow;
5use ComboStrap\DokuwikiId;
6use ComboStrap\ExceptionBadArgument;
7use ComboStrap\ExceptionBadSyntax;
8use ComboStrap\ExceptionCompile;
9use ComboStrap\ExceptionSqliteNotAvailable;
10use ComboStrap\ExecutionContext;
11use ComboStrap\FileSystems;
12use ComboStrap\HttpResponse;
13use ComboStrap\HttpResponseStatus;
14use ComboStrap\Identity;
15use ComboStrap\LogUtility;
16use ComboStrap\MarkupPath;
17use ComboStrap\Meta\Field\AliasType;
18use ComboStrap\Mime;
19use ComboStrap\PageId;
20use ComboStrap\PageRules;
21use ComboStrap\PageUrlPath;
22use ComboStrap\PageUrlType;
23use ComboStrap\RouterBestEndPage;
24use ComboStrap\Site;
25use ComboStrap\SiteConfig;
26use ComboStrap\Sqlite;
27use ComboStrap\Web\Url;
28use ComboStrap\Web\UrlEndpoint;
29use ComboStrap\Web\UrlRewrite;
30use ComboStrap\WikiPath;
31
32require_once(__DIR__ . '/../vendor/autoload.php');
33
34/**
35 * Class action_plugin_combo_url
36 *
37 * The actual URL manager
38 *
39 *
40 */
41class action_plugin_combo_router extends DokuWiki_Action_Plugin
42{
43
44    /**
45     * @deprecated
46     */
47    const URL_MANAGER_ENABLE_CONF = "enableUrlManager";
48    const ROUTER_ENABLE_CONF = "enableRouter";
49
50    // The redirect type
51    const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id)
52    // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects
53    const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301)
54    const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated)
55
56    public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect";
57
58    // Where the target id value comes from
59    const TARGET_ORIGIN_WELL_KNOWN = 'well-known';
60    const TARGET_ORIGIN_PAGE_RULES = 'pageRules';
61    /**
62     * Named Permalink (canonical)
63     */
64    const TARGET_ORIGIN_CANONICAL = 'canonical';
65    const TARGET_ORIGIN_ALIAS = 'alias';
66    /**
67     * Identifier Permalink (full page id)
68     */
69    const TARGET_ORIGIN_PERMALINK = "permalink";
70    /**
71     * Extended Permalink (abbreviated page id at the end)
72     */
73    const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink";
74    const TARGET_ORIGIN_START_PAGE = 'startPage';
75    const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName';
76    const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace';
77    const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine';
78    const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName';
79    const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned";
80
81
82    // The constant parameters
83    const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
84    const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
85    const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
86    const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
87    const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
88    const GO_TO_EDIT_MODE = 'GoToEditMode';
89    const NOTHING = 'Nothing';
90
91    /** @var string - a name used in log and other places */
92    const NAME = 'Url Manager';
93    const CANONICAL = 'router';
94    const PAGE_404 = "<html lang=\"en\"><body></body></html>";
95    const REFRESH_HEADER_NAME = "Refresh";
96    const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url=';
97    const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": ";
98    public const URL_MANAGER_NAME = "Router";
99
100
101    /**
102     * @var PageRules
103     */
104    private $pageRules;
105
106
107    function __construct()
108    {
109        // enable direct access to language strings
110        // ie $this->lang
111        $this->setupLocale();
112
113    }
114
115    /**
116     * @param string $refreshHeader
117     * @return false|string
118     */
119    public static function getUrlFromRefresh(string $refreshHeader)
120    {
121        return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX));
122    }
123
124    public static function getUrlFromLocation($refreshHeader)
125    {
126        return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX));
127    }
128
129    /**
130     * @return array|mixed|string|string[]
131     *
132     * Unfortunately, DOKUWIKI_STARTED is not the first event
133     * The id may have been changed by
134     * {@link action_plugin_combo_metalang::load_lang()}
135     * function, that's why we have this function
136     * to get the original requested id
137     */
138    private static function getOriginalIdFromRequest()
139    {
140        $originalId = $_GET["id"] ?? null;
141        if ($originalId === null) {
142            return null;
143        }
144        return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
145    }
146
147    /**
148     * Determine if the request should be banned based on the id
149     *
150     * @param string $id
151     * @return bool
152     *
153     * See also {@link https://perishablepress.com/7g-firewall/#features}
154     * for blocking rules on http request data such as:
155     *   * query_string
156     *   * user_agent,
157     *   * remote host
158     */
159    public static function isShadowBanned(string $id): bool
160    {
161        /**
162         * ie
163         * wp-json:api:flutter_woo:config_file
164         * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css
165         * wp-admin
166         * 2020:wp-includes:wlwmanifest.xml
167         * wp-content:start
168         * wp-admin:css:start
169         * sito:wp-includes:wlwmanifest.xml
170         * site:wp-includes:wlwmanifest.xml
171         * cms:wp-includes:wlwmanifest.xml
172         * test:wp-includes:wlwmanifest.xml
173         * media:wp-includes:wlwmanifest.xml
174         * wp2:wp-includes:wlwmanifest.xml
175         * 2019:wp-includes:wlwmanifest.xml
176         * shop:wp-includes:wlwmanifest.xml
177         * wp1:wp-includes:wlwmanifest.xml
178         * news:wp-includes:wlwmanifest.xml
179         * 2018:wp-includes:wlwmanifest.xml
180         */
181        if (strpos($id, 'wp-') !== false) {
182            return true;
183        }
184
185        /**
186         * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33
187         * db:oracle:999999.9:union:all:select_null:from_dual
188         * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0
189         */
190        if (preg_match('/_chr_|_0_0/', $id) === 1) {
191            return true;
192        }
193
194
195        /**
196         * ie
197         * git:objects:
198         * git:refs:heads:stable
199         * git:logs:refs:heads:main
200         * git:logs:refs:heads:stable
201         * git:hooks:pre-push.sample
202         * git:hooks:pre-receive.sample
203         */
204        if (strpos($id, "git:") === 0) {
205            return true;
206        }
207
208        return false;
209
210    }
211
212    /**
213     * @param string $id
214     * @return bool
215     * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md
216     * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/
217     * well-known:dnt-policy.txt
218     */
219    public static function isWellKnownFile(string $id): bool
220    {
221        return strpos($id, "well-known") === 0;
222    }
223
224
225    function register(Doku_Event_Handler $controller)
226    {
227
228        if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) {
229
230            /**
231             * This will call the function {@link action_plugin_combo_router::_router()}
232             * The event is not DOKUWIKI_STARTED because this is not the first one
233             *
234             * https://www.dokuwiki.org/devel:event:init_lang_load
235             */
236            $controller->register_hook('DOKUWIKI_STARTED',
237                'BEFORE',
238                $this,
239                'router',
240                array());
241
242            /**
243             * This is the real first call of Dokuwiki
244             * Unfortunately, it does not create the environment
245             * We just ban to spare server resources
246             *
247             * https://www.dokuwiki.org/devel:event:init_lang_load
248             */
249            $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array());
250
251        }
252
253
254    }
255
256    /**
257     *
258     * We have created a spacial ban function that is
259     * called before the first function
260     * {@link action_plugin_combo_metalang::load_lang()}
261     * to spare CPU.
262     *
263     * @param $event
264     * @throws Exception
265     */
266    function ban(&$event)
267    {
268
269        $id = self::getOriginalIdFromRequest();
270        if ($id === null) {
271            return;
272        }
273        $page = MarkupPath::createMarkupFromId($id);
274        if (!FileSystems::exists($page)) {
275            // Well known
276            if (self::isWellKnownFile($id)) {
277                $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD);
278                ExecutionContext::getActualOrCreateFromEnv()
279                    ->response()
280                    ->setStatus(HttpResponseStatus::NOT_FOUND)
281                    ->end();
282                return;
283            }
284
285            // Shadow banned
286            if (self::isShadowBanned($id)) {
287                $webSiteHomePage = Site::getIndexPageName();
288                $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED);
289            }
290        }
291    }
292
293    /**
294     * @param $event Doku_Event
295     * @param $param
296     * @return void
297     * @throws Exception
298     */
299    function router(&$event, $param)
300    {
301
302        /**
303         * Just the {@link ExecutionContext::SHOW_ACTION}
304         * may be redirected
305         */
306        $executionContext = ExecutionContext::getActualOrCreateFromEnv();
307        if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) {
308            return;
309        }
310
311        $urlRewrite = Site::getUrlRewrite();
312        if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) {
313            UrlRewrite::sendErrorMessage();
314            return;
315        }
316
317        global $ID;
318
319        /**
320         * Without SQLite, this module does not work further
321         */
322        try {
323            Sqlite::createOrGetSqlite();
324        } catch (ExceptionSqliteNotAvailable $e) {
325            return;
326        }
327
328        $this->pageRules = new PageRules();
329
330
331        /**
332         * Unfortunately, DOKUWIKI_STARTED is not the first event
333         * The id may have been changed by
334         * {@link action_plugin_combo_lang::load_lang()}
335         * function, that's why we check against the {@link $_REQUEST}
336         * and not the global ID
337         */
338        $originalId = self::getOriginalIdFromRequest();
339
340        /**
341         * Page is an existing id ?
342         */
343        $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
344        if (FileSystems::exists($requestedMarkupPath)) {
345
346            /**
347             * If this is not the root home page
348             * and if the canonical id is the not the same,
349             * and if this is not a historical page (revision)
350             * redirect
351             */
352            if (
353                $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
354                && $ID != Site::getIndexPageName()
355                && !isset($_REQUEST["rev"])
356            ) {
357                /**
358                 * TODO: When saving for the first time, the page is not stored in the database
359                 *   but that's not the case actually
360                 */
361                $databasePageRow = $requestedMarkupPath->getDatabasePage();
362                if ($databasePageRow->exists()) {
363                    /**
364                     * A move may leave the database in a bad state,
365                     * unfortunately (ie page is not in index, unable to update, ...)
366                     * We test therefore if the database page id exists
367                     */
368                    $targetPageId = $databasePageRow->getFromRow("id");
369                    $targetPath = WikiPath::createMarkupPathFromId($targetPageId);
370                    if (FileSystems::exists($targetPath)) {
371                        $this->executePermanentRedirect(
372                            $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(),
373                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
374                        );
375                    }
376                }
377            }
378            return;
379        }
380
381
382        $identifier = $ID;
383
384
385        /**
386         * Page Id in the url
387         */
388        $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
389        if ($shortPageId != null) {
390            $pageId = PageUrlPath::decodePageId($shortPageId);
391        } else {
392            /**
393             * Permalink with id
394             */
395            $pageId = PageUrlPath::decodePageId($identifier);
396        }
397        if ($pageId !== null) {
398
399            if ($requestedMarkupPath->getParent() === null) {
400                $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
401                if ($page !== null && $page->exists()) {
402                    $this->executePermanentRedirect(
403                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
404                        self::TARGET_ORIGIN_PERMALINK
405                    );
406                    return;
407                }
408            }
409
410            /**
411             * Page Id Abbr ?
412             * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
413             */
414            $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
415            if ($page === null) {
416                // or the length of the abbr has changed
417                $canonicalDatabasePage = new DatabasePageRow();
418                $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
419                if ($row !== null) {
420                    $canonicalDatabasePage->setRow($row);
421                    $page = $canonicalDatabasePage->getMarkupPath();
422                }
423            }
424            if ($page !== null && $page->exists()) {
425                /**
426                 * If the url canonical id has changed, we show it
427                 * to the writer by performing a permanent redirect
428                 */
429                if ($identifier != $page->getUrlId()) {
430                    // Google asks for a redirect
431                    // https://developers.google.com/search/docs/advanced/crawling/301-redirects
432                    // People access your site through several different URLs.
433                    // If, for example, your home page can be reached in multiple ways
434                    // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
435                    // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
436                    // and use redirects to send traffic from the other URLs to your preferred URL.
437                    $this->executePermanentRedirect(
438                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
439                        self::TARGET_ORIGIN_PERMALINK_EXTENDED
440                    );
441                    return;
442                }
443
444                $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED);
445                return;
446
447            }
448            // permanent url not yet in the database
449            // Other permanent such as permanent canonical ?
450            // We let the process go with the new identifier
451
452        }
453
454        // Global variable needed in the process
455        global $conf;
456
457        /**
458         * Identifier is a Canonical ?
459         */
460        $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
461        $canonicalPage = $canonicalDatabasePage->getMarkupPath();
462        if ($canonicalPage !== null && $canonicalPage->exists()) {
463            /**
464             * Does the canonical url is canonical name based
465             * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
466             */
467            if ($canonicalPage->getUrlId() === $identifier) {
468                $res = $this->executeTransparentRedirect(
469                    $canonicalPage->getWikiId(),
470                    self::TARGET_ORIGIN_CANONICAL
471                );
472            } else {
473                $res = $this->executePermanentRedirect(
474                    $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property
475                    self::TARGET_ORIGIN_CANONICAL
476                );
477            }
478            if ($res) {
479                return;
480            }
481        }
482
483        /**
484         * Identifier is an alias
485         */
486        $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
487        if (
488            $aliasRequestedPage !== null
489            && $aliasRequestedPage->exists()
490            // The build alias is the file system metadata alias
491            // it may be null if the replication in the database was not successful
492            && $aliasRequestedPage->getBuildAlias() !== null
493        ) {
494            $buildAlias = $aliasRequestedPage->getBuildAlias();
495            switch ($buildAlias->getType()) {
496                case AliasType::REDIRECT:
497                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
498                    if ($res) {
499                        return;
500                    }
501                    break;
502                case AliasType::SYNONYM:
503                    $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS);
504                    if ($res) {
505                        return;
506                    }
507                    break;
508                default:
509                    LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
510                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
511                    if ($res) {
512                        return;
513                    }
514                    break;
515            }
516        }
517
518
519        // If there is a redirection defined in the page rules
520        $result = $this->processingPageRules();
521        if ($result) {
522            // A redirection has occurred
523            // finish the process
524            return;
525        }
526
527        /**
528         *
529         * There was no redirection found, redirect to edit mode if writer
530         *
531         */
532        if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) {
533
534            $this->gotToEditMode($event);
535            // Stop here
536            return;
537
538        }
539
540        /**
541         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
542         */
543        if ($this->getConf('ActionReaderFirst') == self::NOTHING) {
544            return;
545        }
546
547        // We are reader and their is no redirection set, we apply the algorithm
548        $readerAlgorithms = array();
549        $readerAlgorithms[0] = $this->getConf('ActionReaderFirst');
550        $readerAlgorithms[1] = $this->getConf('ActionReaderSecond');
551        $readerAlgorithms[2] = $this->getConf('ActionReaderThird');
552
553        while (
554            ($algorithm = array_shift($readerAlgorithms)) != null
555        ) {
556
557            switch ($algorithm) {
558
559                case self::NOTHING:
560                    return;
561
562                case self::GO_TO_BEST_END_PAGE_NAME:
563
564                    /**
565                     * @var MarkupPath $bestEndPage
566                     */
567                    list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
568                    if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) {
569                        $res = false;
570                        switch ($method) {
571                            case self::REDIRECT_PERMANENT_METHOD:
572                                $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
573                                break;
574                            case self::REDIRECT_NOTFOUND_METHOD:
575                                $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
576                                break;
577                            default:
578                                LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
579                        }
580                        if ($res) {
581                            // Redirection has succeeded
582                            return;
583                        }
584                    }
585                    break;
586
587                case self::GO_TO_NS_START_PAGE:
588
589                    // Start page with the conf['start'] parameter
590                    $startPage = getNS($identifier) . ':' . $conf['start'];
591                    if (page_exists($startPage)) {
592                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
593                        if ($res) {
594                            return;
595                        }
596                    }
597
598                    // Start page with the same name than the namespace
599                    $startPage = getNS($identifier) . ':' . curNS($identifier);
600                    if (page_exists($startPage)) {
601                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
602                        if ($res) {
603                            return;
604                        }
605                    }
606                    break;
607
608                case self::GO_TO_BEST_PAGE_NAME:
609
610                    $bestPageId = null;
611
612                    $bestPage = $this->getBestPage($identifier);
613                    $bestPageId = $bestPage['id'];
614                    $scorePageName = $bestPage['score'];
615
616                    // Get Score from a Namespace
617                    $bestNamespace = $this->scoreBestNamespace($identifier);
618                    $bestNamespaceId = $bestNamespace['namespace'];
619                    $namespaceScore = $bestNamespace['score'];
620
621                    // Compare the two score
622                    if ($scorePageName > 0 or $namespaceScore > 0) {
623                        if ($scorePageName > $namespaceScore) {
624                            $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
625                        } else {
626                            $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
627                        }
628                        return;
629                    }
630                    break;
631
632                case self::GO_TO_BEST_NAMESPACE:
633
634                    $scoreNamespace = $this->scoreBestNamespace($identifier);
635                    $bestNamespaceId = $scoreNamespace['namespace'];
636                    $score = $scoreNamespace['score'];
637
638                    if ($score > 0) {
639                        $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE);
640                        return;
641                    }
642                    break;
643
644                case self::GO_TO_SEARCH_ENGINE:
645
646                    $this->redirectToSearchEngine();
647
648                    return;
649
650                // End Switch Action
651            }
652
653            // End While Action
654        }
655
656
657    }
658
659
660    /**
661     * getBestNamespace
662     * Return a list with 'BestNamespaceId Score'
663     * @param $id
664     * @return array
665     */
666    private
667    function scoreBestNamespace($id)
668    {
669
670        global $conf;
671
672        // Parameters
673        $pageNameSpace = getNS($id);
674
675        // If the page has an existing namespace start page take it, other search other namespace
676        $startPageNameSpace = $pageNameSpace . ":";
677        $dateAt = '';
678        // $startPageNameSpace will get a full path (ie with start or the namespace
679        resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true);
680        if (page_exists($startPageNameSpace)) {
681            $nameSpaces = array($startPageNameSpace);
682        } else {
683            $nameSpaces = ft_pageLookup($conf['start']);
684        }
685
686        // Parameters and search the best namespace
687        $pathNames = explode(':', $pageNameSpace);
688        $bestNbWordFound = 0;
689        $bestNamespaceId = '';
690        foreach ($nameSpaces as $nameSpace) {
691
692            $nbWordFound = 0;
693            foreach ($pathNames as $pathName) {
694                if (strlen($pathName) > 2) {
695                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
696                }
697            }
698            if ($nbWordFound > $bestNbWordFound) {
699                // Take only the smallest namespace
700                if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) {
701                    $bestNbWordFound = $nbWordFound;
702                    $bestNamespaceId = $nameSpace;
703                }
704            }
705        }
706
707        $startPageFactor = $this->getConf('WeightFactorForStartPage');
708        $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace');
709        if ($bestNbWordFound > 0) {
710            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
711        } else {
712            $bestNamespaceScore = 0;
713        }
714
715
716        return array(
717            'namespace' => $bestNamespaceId,
718            'score' => $bestNamespaceScore
719        );
720
721    }
722
723    /**
724     * @param $event
725     */
726    private
727    function gotToEditMode(&$event)
728    {
729        global $ACT;
730        $ACT = 'edit';
731
732    }
733
734
735    /**
736     * Redirect to an internal page ie:
737     *   * on the same domain
738     *   * no HTTP redirect
739     *   * id rewrite
740     * @param string $targetPageId - target page id
741     * @param string $targetOriginId - the source of the target (redirect)
742     * @return bool - return true if the user has the permission and that the redirect was done
743     * @throws Exception
744     */
745    private
746    function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool
747    {
748        /**
749         * Because we set the ID globally for the ID redirect
750         * we make sure that this is not a {@link MarkupPath}
751         * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem}
752         * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem}
753         */
754        if (is_object($targetPageId)) {
755            $class = get_class($targetPageId);
756            LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id");
757        }
758
759        if (is_object($targetOriginId)) {
760            $class = get_class($targetOriginId);
761            LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id");
762        }
763
764        // If the user does not have the right to see the target page
765        // don't do anything
766        if (!(Identity::isReader($targetPageId))) {
767            return false;
768        }
769
770        // Change the id
771        global $ID;
772        global $INFO;
773        $sourceId = $ID;
774        $ID = $targetPageId;
775        if (isset($_REQUEST["id"])) {
776            $_REQUEST["id"] = $targetPageId;
777        }
778        if (isset($_GET["id"])) {
779            $_GET["id"] = $targetPageId;
780        }
781
782        /**
783         * Refresh the $INFO data
784         *
785         * the info attributes are used elsewhere
786         *   'id': for the sidebar
787         *   'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()}
788         *   'rev' : for the edit button to be sure that the page is still the same
789         */
790        $INFO = pageinfo();
791
792        /**
793         * Not compatible with
794         * https://www.dokuwiki.org/config:send404 is enabled
795         *
796         * This check happens before that dokuwiki is started
797         * and send an header in doku.php
798         *
799         * We send a warning
800         */
801        global $conf;
802        if ($conf['send404'] == true) {
803            LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
804        }
805
806        // Redirection
807        $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD);
808
809        return true;
810
811    }
812
813    private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool
814    {
815        return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD);
816    }
817
818    /**
819     * The general HTTP Redirect method to an internal page
820     * where the redirection method decide which type of redirection
821     * @param string $targetIdOrUrl - a dokuwiki id or an url
822     * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin)
823     * @param string $method - the redirection method
824     */
825    private
826    function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool
827    {
828
829        global $ID;
830
831
832        // Log the redirections
833        $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method);
834
835
836        // An http external url ?
837        try {
838            $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl();
839        } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
840            $isHttpUrl = false;
841        }
842
843        // If there is a bug in the isValid function for an internal url
844        // We get a loop.
845        // The Url becomes the id, the id is unknown and we do a redirect again
846        //
847        // We check then if the target starts with the base url
848        // if this is the case, it's valid
849        if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) {
850            $isHttpUrl = true;
851        }
852        if ($isHttpUrl) {
853
854            // defend against HTTP Response Splitting
855            // https://owasp.org/www-community/attacks/HTTP_Response_Splitting
856            $targetUrl = stripctl($targetIdOrUrl);
857
858        } else {
859
860
861            // Explode the page ID and the anchor (#)
862            $link = explode('#', $targetIdOrUrl, 2);
863
864            $url = UrlEndpoint::createDokuUrl();
865
866            $urlParams = [];
867            // if this is search engine redirect
868            if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) {
869                $replacementPart = array(':', '_', '-');
870                $query = str_replace($replacementPart, ' ', $ID);
871                $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION);
872                $url->setQueryParameter("q", $query);
873            }
874
875            /**
876             * Doing a permanent redirect with a added query string
877             * create a new page url on the search engine
878             *
879             * ie
880             * http://host/page
881             * is not the same
882             * than
883             * http://host/page?whatever
884             *
885             * We can't pass query string otherwise, we get
886             * the SEO warning / error
887             * `Alternative page with proper canonical tag`
888             *
889             * Use HTTP X header for debug
890             */
891            if ($method !== self::REDIRECT_PERMANENT_METHOD) {
892                $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID);
893                $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin);
894            }
895
896            $id = $link[0];
897            $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id);
898            if (array_key_exists(1, $link)) {
899                $url->setFragment($link[1]);
900            }
901            $targetUrl = $url->toAbsoluteUrlString();
902
903        }
904
905        /**
906         * The dokuwiki function {@link send_redirect()}
907         * set the `Location header` and in php, the header function
908         * in this case change the status code to 302 Arghhhh.
909         * The code below is adapted from this function {@link send_redirect()}
910         */
911        global $MSG; // are there any undisplayed messages? keep them in session for display
912        if (isset($MSG) && count($MSG) && !defined('NOSESSION')) {
913            //reopen session, store data and close session again
914            @session_start();
915            $_SESSION[DOKU_COOKIE]['msg'] = $MSG;
916        }
917        session_write_close(); // always close the session
918
919        switch ($method) {
920
921            case self::REDIRECT_PERMANENT_METHOD:
922                ExecutionContext::getActualOrCreateFromEnv()
923                    ->response()
924                    ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT)
925                    ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl)
926                    ->end();
927                return true;
928
929            case self::REDIRECT_NOTFOUND_METHOD:
930
931
932                // Empty 404 body to not get the standard 404 page of the browser
933                // but a blank page to avoid a sort of FOUC.
934                // ie the user see a page briefly
935                ExecutionContext::getActualOrCreateFromEnv()
936                    ->response()
937                    ->setStatus(HttpResponseStatus::NOT_FOUND)
938                    ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl)
939                    ->setBody(self::PAGE_404, Mime::getHtml())
940                    ->end();
941                return true;
942
943            default:
944                LogUtility::msg("The method ($method) is not an http redirection");
945                return false;
946        }
947
948
949    }
950
951    /**
952     * @param $id
953     * @return array
954     */
955    private
956    function getBestPage($id): array
957    {
958
959        // The return parameters
960        $bestPageId = null;
961        $scorePageName = null;
962
963        // Get Score from a page
964        $pageName = noNS($id);
965        $pagesWithSameName = ft_pageLookup($pageName);
966        if (count($pagesWithSameName) > 0) {
967
968            // Search same namespace in the page found than in the Id page asked.
969            $bestNbWordFound = 0;
970
971
972            $wordsInPageSourceId = explode(':', $id);
973            foreach ($pagesWithSameName as $targetPageId => $title) {
974
975                // Nb of word found in the target page id
976                // that are in the source page id
977                $nbWordFound = 0;
978                foreach ($wordsInPageSourceId as $word) {
979                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
980                }
981
982                if ($bestPageId == null) {
983
984                    $bestNbWordFound = $nbWordFound;
985                    $bestPageId = $targetPageId;
986
987                } else {
988
989                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
990
991                        $bestNbWordFound = $nbWordFound;
992                        $bestPageId = $targetPageId;
993
994                    }
995
996                }
997
998            }
999            $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace');
1000            return array(
1001                'id' => $bestPageId,
1002                'score' => $scorePageName);
1003        }
1004        return array(
1005            'id' => $bestPageId,
1006            'score' => $scorePageName
1007        );
1008
1009    }
1010
1011
1012    /**
1013     * Redirect to the search engine
1014     */
1015    private
1016    function redirectToSearchEngine()
1017    {
1018
1019        global $ID;
1020        $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE);
1021
1022    }
1023
1024
1025    /**
1026     *
1027     *   * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date
1028     *   * For a SQlite database, it will add a row into the log
1029     *
1030     * @param string $sourcePageId
1031     * @param $targetPageId
1032     * @param $algorithmic
1033     * @param $method - http or rewrite
1034     */
1035    function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method)
1036    {
1037
1038        $row = array(
1039            "TIMESTAMP" => date("c"),
1040            "SOURCE" => $sourcePageId,
1041            "TARGET" => $targetPageId,
1042            "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null,
1043            "TYPE" => $algorithmic,
1044            "METHOD" => $method
1045        );
1046        $request = Sqlite::createOrGetBackendSqlite()
1047            ->createRequest()
1048            ->setTableRow('redirections_log', $row);
1049        try {
1050            $request
1051                ->execute();
1052        } catch (ExceptionCompile $e) {
1053            LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}");
1054        } finally {
1055            $request->close();
1056        }
1057
1058
1059    }
1060
1061    /**
1062     * This function check if there is a redirection declared
1063     * in the redirection table
1064     * @return bool - true if a rewrite or redirection occurs
1065     * @throws Exception
1066     */
1067    private function processingPageRules(): bool
1068    {
1069        global $ID;
1070
1071        $calculatedTarget = null;
1072        $ruleMatcher = null; // Used in a warning message if the target page does not exist
1073        // Known redirection in the table
1074        // Get the page from redirection data
1075        $rules = $this->pageRules->getRules();
1076        foreach ($rules as $rule) {
1077
1078            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
1079            $ruleTarget = $rule[PageRules::TARGET_NAME];
1080
1081            // Glob to Rexgexp
1082            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
1083
1084            // Match ?
1085            // https://www.php.net/manual/en/function.preg-match.php
1086            $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
1087            if ($pregMatchResult === false) {
1088                // The `if` to take into account this problem
1089                // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
1090                LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
1091                return false;
1092            }
1093            if ($pregMatchResult) {
1094                $calculatedTarget = $ruleTarget;
1095                foreach ($matches as $key => $match) {
1096                    if ($key == 0) {
1097                        continue;
1098                    } else {
1099                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
1100                    }
1101                }
1102                break;
1103            }
1104        }
1105
1106        if ($calculatedTarget == null) {
1107            return false;
1108        }
1109
1110        // If this is an external redirect (other domain)
1111        try {
1112            $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl();
1113        } catch (ExceptionBadSyntax $e) {
1114            $isHttpUrl = false;
1115        }
1116        if ($isHttpUrl) {
1117            $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1118            return true;
1119        }
1120
1121        // If the page exist
1122        if (page_exists($calculatedTarget)) {
1123
1124            // This is DokuWiki Id and should always be lowercase
1125            // The page rule may have change that
1126            $calculatedTarget = strtolower($calculatedTarget);
1127            $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1128            if ($res) {
1129                return true;
1130            } else {
1131                return false;
1132            }
1133
1134        } else {
1135
1136            LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR);
1137            return false;
1138
1139        }
1140
1141    }
1142
1143    private function performNotFoundRedirect(string $targetId, string $origin): bool
1144    {
1145        return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD);
1146    }
1147
1148
1149}
1150