xref: /plugin/combo/action/router.php (revision aea52b497ecf7ce8a15fd6f9dad705aee722f51f)
1<?php
2
3
4use ComboStrap\DatabasePageRow;
5use ComboStrap\DokuwikiId;
6use ComboStrap\ExceptionBadArgument;
7use ComboStrap\ExceptionBadSyntax;
8use ComboStrap\ExceptionCompile;
9use ComboStrap\ExceptionSqliteNotAvailable;
10use ComboStrap\ExecutionContext;
11use ComboStrap\FileSystems;
12use ComboStrap\HttpResponse;
13use ComboStrap\HttpResponseStatus;
14use ComboStrap\Identity;
15use ComboStrap\LogUtility;
16use ComboStrap\MarkupPath;
17use ComboStrap\Meta\Field\AliasType;
18use ComboStrap\Mime;
19use ComboStrap\PageId;
20use ComboStrap\PageRules;
21use ComboStrap\PageUrlPath;
22use ComboStrap\PageUrlType;
23use ComboStrap\RouterBestEndPage;
24use ComboStrap\Site;
25use ComboStrap\SiteConfig;
26use ComboStrap\Sqlite;
27use ComboStrap\Web\Url;
28use ComboStrap\Web\UrlEndpoint;
29use ComboStrap\Web\UrlRewrite;
30use ComboStrap\WikiPath;
31
32require_once(__DIR__ . '/../vendor/autoload.php');
33
34/**
35 * Class action_plugin_combo_url
36 *
37 * The actual URL manager
38 *
39 *
40 */
41class action_plugin_combo_router extends DokuWiki_Action_Plugin
42{
43
44    /**
45     * @deprecated
46     */
47    const URL_MANAGER_ENABLE_CONF = "enableUrlManager";
48    const ROUTER_ENABLE_CONF = "enableRouter";
49
50    // The redirect type
51    const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id)
52    // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects
53    const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301)
54    const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated)
55
56    public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect";
57
58    // Where the target id value comes from
59    const TARGET_ORIGIN_WELL_KNOWN = 'well-known';
60    const TARGET_ORIGIN_PAGE_RULES = 'pageRules';
61    /**
62     * Named Permalink (canonical)
63     */
64    const TARGET_ORIGIN_CANONICAL = 'canonical';
65    const TARGET_ORIGIN_ALIAS = 'alias';
66    /**
67     * Identifier Permalink (full page id)
68     */
69    const TARGET_ORIGIN_PERMALINK = "permalink";
70    /**
71     * Extended Permalink (abbreviated page id at the end)
72     */
73    const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink";
74    const TARGET_ORIGIN_START_PAGE = 'startPage';
75    const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName';
76    const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace';
77    const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine';
78    const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName';
79    const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned";
80
81
82    // The constant parameters
83    const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
84    const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
85    const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
86    const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
87    const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
88    const GO_TO_EDIT_MODE = 'GoToEditMode';
89    const NOTHING = 'Nothing';
90
91    /** @var string - a name used in log and other places */
92    const NAME = 'Url Manager';
93    const CANONICAL = 'router';
94    const PAGE_404 = "<html lang=\"en\"><body></body></html>";
95    const REFRESH_HEADER_NAME = "Refresh";
96    const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url=';
97    const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": ";
98    public const URL_MANAGER_NAME = "Router";
99
100
101    /**
102     * @var PageRules
103     */
104    private $pageRules;
105
106
107    function __construct()
108    {
109        // enable direct access to language strings
110        // ie $this->lang
111        $this->setupLocale();
112
113    }
114
115    /**
116     * @param string $refreshHeader
117     * @return false|string
118     */
119    public static function getUrlFromRefresh(string $refreshHeader)
120    {
121        return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX));
122    }
123
124    public static function getUrlFromLocation($refreshHeader)
125    {
126        return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX));
127    }
128
129    /**
130     * @return array|mixed|string|string[]
131     *
132     * Unfortunately, DOKUWIKI_STARTED is not the first event
133     * The id may have been changed by
134     * {@link action_plugin_combo_metalang::load_lang()}
135     * function, that's why we have this function
136     * to get the original requested id
137     */
138    private static function getOriginalIdFromRequest()
139    {
140        $originalId = $_GET["id"] ?? null;
141        if ($originalId === null) {
142            return null;
143        }
144        return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
145    }
146
147    /**
148     * Determine if the request should be banned based on the id
149     *
150     * @param string $id
151     * @return bool
152     *
153     * See also {@link https://perishablepress.com/7g-firewall/#features}
154     * for blocking rules on http request data such as:
155     *   * query_string
156     *   * user_agent,
157     *   * remote host
158     */
159    public static function isShadowBanned(string $id): bool
160    {
161        /**
162         * ie
163         * wp-json:api:flutter_woo:config_file
164         * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css
165         * wp-admin
166         * 2020:wp-includes:wlwmanifest.xml
167         * wp-content:start
168         * wp-admin:css:start
169         * sito:wp-includes:wlwmanifest.xml
170         * site:wp-includes:wlwmanifest.xml
171         * cms:wp-includes:wlwmanifest.xml
172         * test:wp-includes:wlwmanifest.xml
173         * media:wp-includes:wlwmanifest.xml
174         * wp2:wp-includes:wlwmanifest.xml
175         * 2019:wp-includes:wlwmanifest.xml
176         * shop:wp-includes:wlwmanifest.xml
177         * wp1:wp-includes:wlwmanifest.xml
178         * news:wp-includes:wlwmanifest.xml
179         * 2018:wp-includes:wlwmanifest.xml
180         */
181        if (strpos($id, 'wp-') !== false) {
182            return true;
183        }
184
185        /**
186         * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33
187         * db:oracle:999999.9:union:all:select_null:from_dual
188         * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0
189         */
190        if (preg_match('/_chr_|_0_0/', $id) === 1) {
191            return true;
192        }
193
194
195        /**
196         * ie
197         * git:objects:
198         * git:refs:heads:stable
199         * git:logs:refs:heads:main
200         * git:logs:refs:heads:stable
201         * git:hooks:pre-push.sample
202         * git:hooks:pre-receive.sample
203         */
204        if (strpos($id, "git:") === 0) {
205            return true;
206        }
207
208        return false;
209
210    }
211
212    /**
213     * @param string $id
214     * @return bool
215     * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md
216     * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/
217     * well-known:dnt-policy.txt
218     */
219    public static function isWellKnownFile(string $id): bool
220    {
221        return strpos($id, "well-known") === 0;
222    }
223
224
225    function register(Doku_Event_Handler $controller)
226    {
227
228        if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) {
229
230            /**
231             * This will call the function {@link action_plugin_combo_router::_router()}
232             * The event is not DOKUWIKI_STARTED because this is not the first one
233             *
234             * https://www.dokuwiki.org/devel:event:init_lang_load
235             */
236            $controller->register_hook('DOKUWIKI_STARTED',
237                'BEFORE',
238                $this,
239                'router',
240                array());
241
242            /**
243             * This is the real first call of Dokuwiki
244             * Unfortunately, it does not create the environment
245             * We just ban to spare server resources
246             *
247             * https://www.dokuwiki.org/devel:event:init_lang_load
248             */
249            $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array());
250
251        }
252
253
254    }
255
256    /**
257     *
258     * We have created a spacial ban function that is
259     * called before the first function
260     * {@link action_plugin_combo_metalang::load_lang()}
261     * to spare CPU.
262     *
263     * @param $event
264     * @throws Exception
265     */
266    function ban(&$event)
267    {
268
269        $id = self::getOriginalIdFromRequest();
270        if ($id === null) {
271            return;
272        }
273        $page = MarkupPath::createMarkupFromId($id);
274        if (!FileSystems::exists($page)) {
275            // Well known
276            if (self::isWellKnownFile($id)) {
277                $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD);
278                ExecutionContext::getActualOrCreateFromEnv()
279                    ->response()
280                    ->setStatus(HttpResponseStatus::NOT_FOUND)
281                    ->end();
282                return;
283            }
284
285            // Shadow banned
286            if (self::isShadowBanned($id)) {
287                $webSiteHomePage = Site::getIndexPageName();
288                $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED);
289            }
290        }
291    }
292
293    /**
294     * @param $event Doku_Event
295     * @param $param
296     * @return void
297     * @throws Exception
298     */
299    function router(&$event, $param)
300    {
301
302        /**
303         * Just the {@link ExecutionContext::SHOW_ACTION}
304         * may be redirected
305         */
306        $executionContext = ExecutionContext::getActualOrCreateFromEnv();
307        if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) {
308            return;
309        }
310
311        $urlRewrite = Site::getUrlRewrite();
312        if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) {
313            UrlRewrite::sendErrorMessage();
314            return;
315        }
316
317        global $ID;
318
319        /**
320         * Without SQLite, this module does not work further
321         */
322        try {
323            Sqlite::createOrGetSqlite();
324        } catch (ExceptionSqliteNotAvailable $e) {
325            return;
326        }
327
328        $this->pageRules = new PageRules();
329
330
331        /**
332         * Unfortunately, DOKUWIKI_STARTED is not the first event
333         * The id may have been changed by
334         * {@link action_plugin_combo_lang::load_lang()}
335         * function, that's why we check against the {@link $_REQUEST}
336         * and not the global ID
337         */
338        $originalId = self::getOriginalIdFromRequest();
339
340        /**
341         * Page is an existing id ?
342         */
343        $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
344        if (FileSystems::exists($requestedMarkupPath)) {
345
346            /**
347             * If this is not the root home page
348             * and if the canonical id is the not the same,
349             * and if this is not a historical page (revision)
350             * redirect
351             */
352            if (
353                $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
354                && $ID != Site::getIndexPageName()
355                && !isset($_REQUEST["rev"])
356            ) {
357                /**
358                 * TODO: When saving for the first time, the page is not stored in the database
359                 *   but that's not the case actually
360                 */
361                $databasePageRow = $requestedMarkupPath->getDatabasePage();
362                if ($databasePageRow->exists()) {
363                    /**
364                     * A move may leave the database in a bad state,
365                     * unfortunately (ie page is not in index, unable to update, ...)
366                     * We test therefore if the database page id exists
367                     */
368                    $targetPageId = $databasePageRow->getFromRow("id");
369                    $targetPath = WikiPath::createMarkupPathFromId($targetPageId);
370                    if (FileSystems::exists($targetPath)) {
371                        $this->executePermanentRedirect(
372                            $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(),
373                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
374                        );
375                    }
376                }
377            }
378            return;
379        }
380
381
382        $identifier = $ID;
383
384
385        /**
386         * Page Id Website / root Permalink ?
387         */
388        $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
389        if ($shortPageId !== null) {
390            $pageId = PageUrlPath::decodePageId($shortPageId);
391            if ($requestedMarkupPath->getParent() === null && $pageId !== null) {
392                $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
393                if ($page !== null && $page->exists()) {
394                    $this->executePermanentRedirect(
395                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
396                        self::TARGET_ORIGIN_PERMALINK
397                    );
398                }
399            }
400
401            /**
402             * Page Id Abbr ?
403             * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
404             */
405            if (
406                $pageId !== null
407            ) {
408                $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
409                if ($page === null) {
410                    // or the length of the abbr has changed
411                    $canonicalDatabasePage = new DatabasePageRow();
412                    $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
413                    if ($row !== null) {
414                        $canonicalDatabasePage->setRow($row);
415                        $page = $canonicalDatabasePage->getMarkupPath();
416                    }
417                }
418                if ($page !== null && $page->exists()) {
419                    /**
420                     * If the url canonical id has changed, we show it
421                     * to the writer by performing a permanent redirect
422                     */
423                    if ($identifier != $page->getUrlId()) {
424                        // Google asks for a redirect
425                        // https://developers.google.com/search/docs/advanced/crawling/301-redirects
426                        // People access your site through several different URLs.
427                        // If, for example, your home page can be reached in multiple ways
428                        // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
429                        // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
430                        // and use redirects to send traffic from the other URLs to your preferred URL.
431                        $this->executePermanentRedirect(
432                            $page->getCanonicalUrl()->toAbsoluteUrlString(),
433                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
434                        );
435                        return;
436                    }
437
438                    $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED);
439                    return;
440
441                }
442                // permanent url not yet in the database
443                // Other permanent such as permanent canonical ?
444                // We let the process go with the new identifier
445
446            }
447
448        }
449
450        // Global variable needed in the process
451        global $conf;
452
453        /**
454         * Identifier is a Canonical ?
455         */
456        $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
457        $canonicalPage = $canonicalDatabasePage->getMarkupPath();
458        if ($canonicalPage !== null && $canonicalPage->exists()) {
459            /**
460             * Does the canonical url is canonical name based
461             * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
462             */
463            if ($canonicalPage->getUrlId() === $identifier) {
464                $res = $this->executeTransparentRedirect(
465                    $canonicalPage->getWikiId(),
466                    self::TARGET_ORIGIN_CANONICAL
467                );
468            } else {
469                $res = $this->executePermanentRedirect(
470                    $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property
471                    self::TARGET_ORIGIN_CANONICAL
472                );
473            }
474            if ($res) {
475                return;
476            }
477        }
478
479        /**
480         * Identifier is an alias
481         */
482        $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
483        if (
484            $aliasRequestedPage !== null
485            && $aliasRequestedPage->exists()
486            // The build alias is the file system metadata alias
487            // it may be null if the replication in the database was not successful
488            && $aliasRequestedPage->getBuildAlias() !== null
489        ) {
490            $buildAlias = $aliasRequestedPage->getBuildAlias();
491            switch ($buildAlias->getType()) {
492                case AliasType::REDIRECT:
493                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
494                    if ($res) {
495                        return;
496                    }
497                    break;
498                case AliasType::SYNONYM:
499                    $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS);
500                    if ($res) {
501                        return;
502                    }
503                    break;
504                default:
505                    LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
506                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
507                    if ($res) {
508                        return;
509                    }
510                    break;
511            }
512        }
513
514
515        // If there is a redirection defined in the page rules
516        $result = $this->processingPageRules();
517        if ($result) {
518            // A redirection has occurred
519            // finish the process
520            return;
521        }
522
523        /**
524         *
525         * There was no redirection found, redirect to edit mode if writer
526         *
527         */
528        if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) {
529
530            $this->gotToEditMode($event);
531            // Stop here
532            return;
533
534        }
535
536        /**
537         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
538         */
539        if ($this->getConf('ActionReaderFirst') == self::NOTHING) {
540            return;
541        }
542
543        // We are reader and their is no redirection set, we apply the algorithm
544        $readerAlgorithms = array();
545        $readerAlgorithms[0] = $this->getConf('ActionReaderFirst');
546        $readerAlgorithms[1] = $this->getConf('ActionReaderSecond');
547        $readerAlgorithms[2] = $this->getConf('ActionReaderThird');
548
549        while (
550            ($algorithm = array_shift($readerAlgorithms)) != null
551        ) {
552
553            switch ($algorithm) {
554
555                case self::NOTHING:
556                    return;
557
558                case self::GO_TO_BEST_END_PAGE_NAME:
559
560                    /**
561                     * @var MarkupPath $bestEndPage
562                     */
563                    list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
564                    if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) {
565                        $res = false;
566                        switch ($method) {
567                            case self::REDIRECT_PERMANENT_METHOD:
568                                $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
569                                break;
570                            case self::REDIRECT_NOTFOUND_METHOD:
571                                $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
572                                break;
573                            default:
574                                LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
575                        }
576                        if ($res) {
577                            // Redirection has succeeded
578                            return;
579                        }
580                    }
581                    break;
582
583                case self::GO_TO_NS_START_PAGE:
584
585                    // Start page with the conf['start'] parameter
586                    $startPage = getNS($identifier) . ':' . $conf['start'];
587                    if (page_exists($startPage)) {
588                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
589                        if ($res) {
590                            return;
591                        }
592                    }
593
594                    // Start page with the same name than the namespace
595                    $startPage = getNS($identifier) . ':' . curNS($identifier);
596                    if (page_exists($startPage)) {
597                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
598                        if ($res) {
599                            return;
600                        }
601                    }
602                    break;
603
604                case self::GO_TO_BEST_PAGE_NAME:
605
606                    $bestPageId = null;
607
608                    $bestPage = $this->getBestPage($identifier);
609                    $bestPageId = $bestPage['id'];
610                    $scorePageName = $bestPage['score'];
611
612                    // Get Score from a Namespace
613                    $bestNamespace = $this->scoreBestNamespace($identifier);
614                    $bestNamespaceId = $bestNamespace['namespace'];
615                    $namespaceScore = $bestNamespace['score'];
616
617                    // Compare the two score
618                    if ($scorePageName > 0 or $namespaceScore > 0) {
619                        if ($scorePageName > $namespaceScore) {
620                            $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
621                        } else {
622                            $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
623                        }
624                        return;
625                    }
626                    break;
627
628                case self::GO_TO_BEST_NAMESPACE:
629
630                    $scoreNamespace = $this->scoreBestNamespace($identifier);
631                    $bestNamespaceId = $scoreNamespace['namespace'];
632                    $score = $scoreNamespace['score'];
633
634                    if ($score > 0) {
635                        $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE);
636                        return;
637                    }
638                    break;
639
640                case self::GO_TO_SEARCH_ENGINE:
641
642                    $this->redirectToSearchEngine();
643
644                    return;
645
646                // End Switch Action
647            }
648
649            // End While Action
650        }
651
652
653    }
654
655
656    /**
657     * getBestNamespace
658     * Return a list with 'BestNamespaceId Score'
659     * @param $id
660     * @return array
661     */
662    private
663    function scoreBestNamespace($id)
664    {
665
666        global $conf;
667
668        // Parameters
669        $pageNameSpace = getNS($id);
670
671        // If the page has an existing namespace start page take it, other search other namespace
672        $startPageNameSpace = $pageNameSpace . ":";
673        $dateAt = '';
674        // $startPageNameSpace will get a full path (ie with start or the namespace
675        resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true);
676        if (page_exists($startPageNameSpace)) {
677            $nameSpaces = array($startPageNameSpace);
678        } else {
679            $nameSpaces = ft_pageLookup($conf['start']);
680        }
681
682        // Parameters and search the best namespace
683        $pathNames = explode(':', $pageNameSpace);
684        $bestNbWordFound = 0;
685        $bestNamespaceId = '';
686        foreach ($nameSpaces as $nameSpace) {
687
688            $nbWordFound = 0;
689            foreach ($pathNames as $pathName) {
690                if (strlen($pathName) > 2) {
691                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
692                }
693            }
694            if ($nbWordFound > $bestNbWordFound) {
695                // Take only the smallest namespace
696                if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) {
697                    $bestNbWordFound = $nbWordFound;
698                    $bestNamespaceId = $nameSpace;
699                }
700            }
701        }
702
703        $startPageFactor = $this->getConf('WeightFactorForStartPage');
704        $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace');
705        if ($bestNbWordFound > 0) {
706            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
707        } else {
708            $bestNamespaceScore = 0;
709        }
710
711
712        return array(
713            'namespace' => $bestNamespaceId,
714            'score' => $bestNamespaceScore
715        );
716
717    }
718
719    /**
720     * @param $event
721     */
722    private
723    function gotToEditMode(&$event)
724    {
725        global $ACT;
726        $ACT = 'edit';
727
728    }
729
730
731    /**
732     * Redirect to an internal page ie:
733     *   * on the same domain
734     *   * no HTTP redirect
735     *   * id rewrite
736     * @param string $targetPageId - target page id
737     * @param string $targetOriginId - the source of the target (redirect)
738     * @return bool - return true if the user has the permission and that the redirect was done
739     * @throws Exception
740     */
741    private
742    function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool
743    {
744        /**
745         * Because we set the ID globally for the ID redirect
746         * we make sure that this is not a {@link MarkupPath}
747         * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem}
748         * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem}
749         */
750        if (is_object($targetPageId)) {
751            $class = get_class($targetPageId);
752            LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id");
753        }
754
755        if (is_object($targetOriginId)) {
756            $class = get_class($targetOriginId);
757            LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id");
758        }
759
760        // If the user does not have the right to see the target page
761        // don't do anything
762        if (!(Identity::isReader($targetPageId))) {
763            return false;
764        }
765
766        // Change the id
767        global $ID;
768        global $INFO;
769        $sourceId = $ID;
770        $ID = $targetPageId;
771        if (isset($_REQUEST["id"])) {
772            $_REQUEST["id"] = $targetPageId;
773        }
774        if (isset($_GET["id"])) {
775            $_GET["id"] = $targetPageId;
776        }
777
778        /**
779         * Refresh the $INFO data
780         *
781         * the info attributes are used elsewhere
782         *   'id': for the sidebar
783         *   'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()}
784         *   'rev' : for the edit button to be sure that the page is still the same
785         */
786        $INFO = pageinfo();
787
788        /**
789         * Not compatible with
790         * https://www.dokuwiki.org/config:send404 is enabled
791         *
792         * This check happens before that dokuwiki is started
793         * and send an header in doku.php
794         *
795         * We send a warning
796         */
797        global $conf;
798        if ($conf['send404'] == true) {
799            LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
800        }
801
802        // Redirection
803        $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD);
804
805        return true;
806
807    }
808
809    private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool
810    {
811        return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD);
812    }
813
814    /**
815     * The general HTTP Redirect method to an internal page
816     * where the redirection method decide which type of redirection
817     * @param string $targetIdOrUrl - a dokuwiki id or an url
818     * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin)
819     * @param string $method - the redirection method
820     */
821    private
822    function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool
823    {
824
825        global $ID;
826
827
828        // Log the redirections
829        $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method);
830
831
832        // An http external url ?
833        try {
834            $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl();
835        } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
836            $isHttpUrl = false;
837        }
838
839        // If there is a bug in the isValid function for an internal url
840        // We get a loop.
841        // The Url becomes the id, the id is unknown and we do a redirect again
842        //
843        // We check then if the target starts with the base url
844        // if this is the case, it's valid
845        if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) {
846            $isHttpUrl = true;
847        }
848        if ($isHttpUrl) {
849
850            // defend against HTTP Response Splitting
851            // https://owasp.org/www-community/attacks/HTTP_Response_Splitting
852            $targetUrl = stripctl($targetIdOrUrl);
853
854        } else {
855
856
857            // Explode the page ID and the anchor (#)
858            $link = explode('#', $targetIdOrUrl, 2);
859
860            $url = UrlEndpoint::createDokuUrl();
861
862            $urlParams = [];
863            // if this is search engine redirect
864            if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) {
865                $replacementPart = array(':', '_', '-');
866                $query = str_replace($replacementPart, ' ', $ID);
867                $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION);
868                $url->setQueryParameter("q", $query);
869            }
870
871            /**
872             * Doing a permanent redirect with a added query string
873             * create a new page url on the search engine
874             *
875             * ie
876             * http://host/page
877             * is not the same
878             * than
879             * http://host/page?whatever
880             *
881             * We can't pass query string otherwise, we get
882             * the SEO warning / error
883             * `Alternative page with proper canonical tag`
884             *
885             * Use HTTP X header for debug
886             */
887            if ($method !== self::REDIRECT_PERMANENT_METHOD) {
888                $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID);
889                $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin);
890            }
891
892            $id = $link[0];
893            $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id);
894            if (array_key_exists(1, $link)) {
895                $url->setFragment($link[1]);
896            }
897            $targetUrl = $url->toAbsoluteUrlString();
898
899        }
900
901        /**
902         * The dokuwiki function {@link send_redirect()}
903         * set the `Location header` and in php, the header function
904         * in this case change the status code to 302 Arghhhh.
905         * The code below is adapted from this function {@link send_redirect()}
906         */
907        global $MSG; // are there any undisplayed messages? keep them in session for display
908        if (isset($MSG) && count($MSG) && !defined('NOSESSION')) {
909            //reopen session, store data and close session again
910            @session_start();
911            $_SESSION[DOKU_COOKIE]['msg'] = $MSG;
912        }
913        session_write_close(); // always close the session
914
915        switch ($method) {
916
917            case self::REDIRECT_PERMANENT_METHOD:
918                ExecutionContext::getActualOrCreateFromEnv()
919                    ->response()
920                    ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT)
921                    ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl)
922                    ->end();
923                return true;
924
925            case self::REDIRECT_NOTFOUND_METHOD:
926
927
928                // Empty 404 body to not get the standard 404 page of the browser
929                // but a blank page to avoid a sort of FOUC.
930                // ie the user see a page briefly
931                ExecutionContext::getActualOrCreateFromEnv()
932                    ->response()
933                    ->setStatus(HttpResponseStatus::NOT_FOUND)
934                    ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl)
935                    ->setBody(self::PAGE_404, Mime::getHtml())
936                    ->end();
937                return true;
938
939            default:
940                LogUtility::msg("The method ($method) is not an http redirection");
941                return false;
942        }
943
944
945    }
946
947    /**
948     * @param $id
949     * @return array
950     */
951    private
952    function getBestPage($id): array
953    {
954
955        // The return parameters
956        $bestPageId = null;
957        $scorePageName = null;
958
959        // Get Score from a page
960        $pageName = noNS($id);
961        $pagesWithSameName = ft_pageLookup($pageName);
962        if (count($pagesWithSameName) > 0) {
963
964            // Search same namespace in the page found than in the Id page asked.
965            $bestNbWordFound = 0;
966
967
968            $wordsInPageSourceId = explode(':', $id);
969            foreach ($pagesWithSameName as $targetPageId => $title) {
970
971                // Nb of word found in the target page id
972                // that are in the source page id
973                $nbWordFound = 0;
974                foreach ($wordsInPageSourceId as $word) {
975                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
976                }
977
978                if ($bestPageId == null) {
979
980                    $bestNbWordFound = $nbWordFound;
981                    $bestPageId = $targetPageId;
982
983                } else {
984
985                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
986
987                        $bestNbWordFound = $nbWordFound;
988                        $bestPageId = $targetPageId;
989
990                    }
991
992                }
993
994            }
995            $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace');
996            return array(
997                'id' => $bestPageId,
998                'score' => $scorePageName);
999        }
1000        return array(
1001            'id' => $bestPageId,
1002            'score' => $scorePageName
1003        );
1004
1005    }
1006
1007
1008    /**
1009     * Redirect to the search engine
1010     */
1011    private
1012    function redirectToSearchEngine()
1013    {
1014
1015        global $ID;
1016        $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE);
1017
1018    }
1019
1020
1021    /**
1022     *
1023     *   * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date
1024     *   * For a SQlite database, it will add a row into the log
1025     *
1026     * @param string $sourcePageId
1027     * @param $targetPageId
1028     * @param $algorithmic
1029     * @param $method - http or rewrite
1030     */
1031    function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method)
1032    {
1033
1034        $row = array(
1035            "TIMESTAMP" => date("c"),
1036            "SOURCE" => $sourcePageId,
1037            "TARGET" => $targetPageId,
1038            "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null,
1039            "TYPE" => $algorithmic,
1040            "METHOD" => $method
1041        );
1042        $request = Sqlite::createOrGetBackendSqlite()
1043            ->createRequest()
1044            ->setTableRow('redirections_log', $row);
1045        try {
1046            $request
1047                ->execute();
1048        } catch (ExceptionCompile $e) {
1049            LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}");
1050        } finally {
1051            $request->close();
1052        }
1053
1054
1055    }
1056
1057    /**
1058     * This function check if there is a redirection declared
1059     * in the redirection table
1060     * @return bool - true if a rewrite or redirection occurs
1061     * @throws Exception
1062     */
1063    private function processingPageRules(): bool
1064    {
1065        global $ID;
1066
1067        $calculatedTarget = null;
1068        $ruleMatcher = null; // Used in a warning message if the target page does not exist
1069        // Known redirection in the table
1070        // Get the page from redirection data
1071        $rules = $this->pageRules->getRules();
1072        foreach ($rules as $rule) {
1073
1074            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
1075            $ruleTarget = $rule[PageRules::TARGET_NAME];
1076
1077            // Glob to Rexgexp
1078            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
1079
1080            // Match ?
1081            // https://www.php.net/manual/en/function.preg-match.php
1082            $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
1083            if ($pregMatchResult === false) {
1084                // The `if` to take into account this problem
1085                // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
1086                LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
1087                return false;
1088            }
1089            if ($pregMatchResult) {
1090                $calculatedTarget = $ruleTarget;
1091                foreach ($matches as $key => $match) {
1092                    if ($key == 0) {
1093                        continue;
1094                    } else {
1095                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
1096                    }
1097                }
1098                break;
1099            }
1100        }
1101
1102        if ($calculatedTarget == null) {
1103            return false;
1104        }
1105
1106        // If this is an external redirect (other domain)
1107        try {
1108            $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl();
1109        } catch (ExceptionBadSyntax $e) {
1110            $isHttpUrl = false;
1111        }
1112        if ($isHttpUrl) {
1113            $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1114            return true;
1115        }
1116
1117        // If the page exist
1118        if (page_exists($calculatedTarget)) {
1119
1120            // This is DokuWiki Id and should always be lowercase
1121            // The page rule may have change that
1122            $calculatedTarget = strtolower($calculatedTarget);
1123            $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1124            if ($res) {
1125                return true;
1126            } else {
1127                return false;
1128            }
1129
1130        } else {
1131
1132            LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR);
1133            return false;
1134
1135        }
1136
1137    }
1138
1139    private function performNotFoundRedirect(string $targetId, string $origin): bool
1140    {
1141        return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD);
1142    }
1143
1144
1145}
1146