xref: /plugin/combo/action/router.php (revision 04fd306c7c155fa133ebb3669986875d65988276)
1<?php
2
3
4
5
6use ComboStrap\DatabasePageRow;
7use ComboStrap\ExceptionBadArgument;
8use ComboStrap\ExceptionBadSyntax;
9use ComboStrap\ExceptionCompile;
10use ComboStrap\ExceptionSqliteNotAvailable;
11use ComboStrap\ExecutionContext;
12use ComboStrap\FileSystems;
13use ComboStrap\HttpResponse;
14use ComboStrap\HttpResponseStatus;
15use ComboStrap\Identity;
16use ComboStrap\LogUtility;
17use ComboStrap\MarkupPath;
18use ComboStrap\Meta\Field\AliasType;
19use ComboStrap\Mime;
20use ComboStrap\PageId;
21use ComboStrap\PageRules;
22use ComboStrap\PageUrlPath;
23use ComboStrap\PageUrlType;
24use ComboStrap\RouterBestEndPage;
25use ComboStrap\Site;
26use ComboStrap\SiteConfig;
27use ComboStrap\Sqlite;
28use ComboStrap\Web\Url;
29use ComboStrap\WikiPath;
30
31require_once(__DIR__ . '/../vendor/autoload.php');
32
33/**
34 * Class action_plugin_combo_url
35 *
36 * The actual URL manager
37 *
38 *
39 */
40class action_plugin_combo_router extends DokuWiki_Action_Plugin
41{
42
43    /**
44     * @deprecated
45     */
46    const URL_MANAGER_ENABLE_CONF = "enableUrlManager";
47    const ROUTER_ENABLE_CONF = "enableRouter";
48
49    // The redirect type
50    const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id)
51    // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects
52    const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301)
53    const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated)
54
55    public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect";
56
57    // Where the target id value comes from
58    const TARGET_ORIGIN_WELL_KNOWN = 'well-known';
59    const TARGET_ORIGIN_PAGE_RULES = 'pageRules';
60    /**
61     * Named Permalink (canonical)
62     */
63    const TARGET_ORIGIN_CANONICAL = 'canonical';
64    const TARGET_ORIGIN_ALIAS = 'alias';
65    /**
66     * Identifier Permalink (full page id)
67     */
68    const TARGET_ORIGIN_PERMALINK = "permalink";
69    /**
70     * Extended Permalink (abbreviated page id at the end)
71     */
72    const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink";
73    const TARGET_ORIGIN_START_PAGE = 'startPage';
74    const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName';
75    const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace';
76    const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine';
77    const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName';
78    const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned";
79
80
81    // The constant parameters
82    const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
83    const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
84    const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
85    const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
86    const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
87    const GO_TO_EDIT_MODE = 'GoToEditMode';
88    const NOTHING = 'Nothing';
89
90    /** @var string - a name used in log and other places */
91    const NAME = 'Url Manager';
92    const CANONICAL = 'router';
93    const PAGE_404 = "<html lang=\"en\"><body></body></html>";
94    const REFRESH_HEADER_NAME = "Refresh";
95    const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url=';
96    const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": ";
97    public const URL_MANAGER_NAME = "Router";
98
99
100    /**
101     * @var PageRules
102     */
103    private $pageRules;
104
105
106    function __construct()
107    {
108        // enable direct access to language strings
109        // ie $this->lang
110        $this->setupLocale();
111
112    }
113
114    /**
115     * @param string $refreshHeader
116     * @return false|string
117     */
118    public static function getUrlFromRefresh(string $refreshHeader)
119    {
120        return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX));
121    }
122
123    public static function getUrlFromLocation($refreshHeader)
124    {
125        return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX));
126    }
127
128    /**
129     * @return array|mixed|string|string[]
130     *
131     * Unfortunately, DOKUWIKI_STARTED is not the first event
132     * The id may have been changed by
133     * {@link action_plugin_combo_metalang::load_lang()}
134     * function, that's why we have this function
135     * to get the original requested id
136     */
137    private static function getOriginalIdFromRequest()
138    {
139        $originalId = $_GET["id"];
140        return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
141    }
142
143    /**
144     * Determine if the request should be banned based on the id
145     *
146     * @param string $id
147     * @return bool
148     *
149     * See also {@link https://perishablepress.com/7g-firewall/#features}
150     * for blocking rules on http request data such as:
151     *   * query_string
152     *   * user_agent,
153     *   * remote host
154     */
155    public static function isShadowBanned(string $id): bool
156    {
157        /**
158         * ie
159         * wp-json:api:flutter_woo:config_file
160         * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css
161         * wp-admin
162         * 2020:wp-includes:wlwmanifest.xml
163         * wp-content:start
164         * wp-admin:css:start
165         * sito:wp-includes:wlwmanifest.xml
166         * site:wp-includes:wlwmanifest.xml
167         * cms:wp-includes:wlwmanifest.xml
168         * test:wp-includes:wlwmanifest.xml
169         * media:wp-includes:wlwmanifest.xml
170         * wp2:wp-includes:wlwmanifest.xml
171         * 2019:wp-includes:wlwmanifest.xml
172         * shop:wp-includes:wlwmanifest.xml
173         * wp1:wp-includes:wlwmanifest.xml
174         * news:wp-includes:wlwmanifest.xml
175         * 2018:wp-includes:wlwmanifest.xml
176         */
177        if (strpos($id, 'wp-') !== false) {
178            return true;
179        }
180
181        /**
182         * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33
183         * db:oracle:999999.9:union:all:select_null:from_dual
184         * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0
185         */
186        if (preg_match('/_chr_|_0_0/', $id) === 1) {
187            return true;
188        }
189
190
191        /**
192         * ie
193         * git:objects:
194         * git:refs:heads:stable
195         * git:logs:refs:heads:main
196         * git:logs:refs:heads:stable
197         * git:hooks:pre-push.sample
198         * git:hooks:pre-receive.sample
199         */
200        if (strpos($id, "git:") === 0) {
201            return true;
202        }
203
204        return false;
205
206    }
207
208    /**
209     * @param string $id
210     * @return bool
211     * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md
212     * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/
213     * well-known:dnt-policy.txt
214     */
215    public static function isWellKnownFile(string $id): bool
216    {
217        return strpos($id, "well-known") === 0;
218    }
219
220
221    function register(Doku_Event_Handler $controller)
222    {
223
224        if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) {
225
226            /**
227             * This will call the function {@link action_plugin_combo_router::_router()}
228             * The event is not DOKUWIKI_STARTED because this is not the first one
229             *
230             * https://www.dokuwiki.org/devel:event:init_lang_load
231             */
232            $controller->register_hook('DOKUWIKI_STARTED',
233                'BEFORE',
234                $this,
235                'router',
236                array());
237
238            /**
239             * This is the real first call of Dokuwiki
240             * Unfortunately, it does not create the environment
241             * We just ban to spare server resources
242             *
243             * https://www.dokuwiki.org/devel:event:init_lang_load
244             */
245            $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array());
246
247        }
248
249
250    }
251
252    /**
253     *
254     * We have created a spacial ban function that is
255     * called before the first function
256     * {@link action_plugin_combo_metalang::load_lang()}
257     * to spare CPU.
258     *
259     * @param $event
260     * @throws Exception
261     */
262    function ban(&$event)
263    {
264
265        $id = self::getOriginalIdFromRequest();
266        $page = MarkupPath::createMarkupFromId($id);
267        if (!FileSystems::exists($page)) {
268            // Well known
269            if (self::isWellKnownFile($id)) {
270                $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD);
271                ExecutionContext::getActualOrCreateFromEnv()
272                    ->response()
273                    ->setStatus(HttpResponseStatus::NOT_FOUND)
274                    ->end();
275                return;
276            }
277
278            // Shadow banned
279            if (self::isShadowBanned($id)) {
280                $webSiteHomePage = Site::getIndexPageName();
281                $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED);
282            }
283        }
284    }
285
286    /**
287     * @param $event Doku_Event
288     * @param $param
289     * @return void
290     * @throws Exception
291     */
292    function router(&$event, $param)
293    {
294
295        /**
296         * Just the {@link ExecutionContext::SHOW_ACTION}
297         * may be redirected
298         */
299        $executionContext = ExecutionContext::getActualOrCreateFromEnv();
300        if ($executionContext->getExecutingAction()!==ExecutionContext::SHOW_ACTION) {
301            return;
302        }
303
304
305        global $ID;
306
307        /**
308         * Without SQLite, this module does not work further
309         */
310        try {
311            Sqlite::createOrGetSqlite();
312        } catch (ExceptionSqliteNotAvailable $e) {
313            return;
314        }
315
316        $this->pageRules = new PageRules();
317
318
319        /**
320         * Unfortunately, DOKUWIKI_STARTED is not the first event
321         * The id may have been changed by
322         * {@link action_plugin_combo_lang::load_lang()}
323         * function, that's why we check against the {@link $_REQUEST}
324         * and not the global ID
325         */
326        $originalId = self::getOriginalIdFromRequest();
327
328        /**
329         * Page is an existing id ?
330         */
331        $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
332        if (FileSystems::exists($requestedMarkupPath)) {
333
334            /**
335             * If this is not the root home page
336             * and if the canonical id is the not the same,
337             * and if this is not a historical page (revision)
338             * redirect
339             */
340            if (
341                $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
342                && $ID != Site::getIndexPageName()
343                && !isset($_REQUEST["rev"])
344            ) {
345                /**
346                 * TODO: When saving for the first time, the page is not stored in the database
347                 *   but that's not the case actually
348                 */
349                $databasePageRow = $requestedMarkupPath->getDatabasePage();
350                if ($databasePageRow->exists()) {
351                    /**
352                     * A move may leave the database in a bad state,
353                     * unfortunately (ie page is not in index, unable to update, ...)
354                     * We test therefore if the database page id exists
355                     */
356                    $targetPageId = $databasePageRow->getFromRow("id");
357                    $targetPath = WikiPath::createMarkupPathFromId($targetPageId);
358                    if(FileSystems::exists($targetPath)) {
359                        $this->executePermanentRedirect(
360                            $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(),
361                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
362                        );
363                    }
364                }
365            }
366            return;
367        }
368
369
370        $identifier = $ID;
371
372
373        /**
374         * Page Id Website / root Permalink ?
375         */
376        $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
377        if ($shortPageId !== null) {
378            $pageId = PageUrlPath::decodePageId($shortPageId);
379            if ($requestedMarkupPath->getParent() === null && $pageId !== null) {
380                $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
381                if ($page !== null && $page->exists()) {
382                    $this->executePermanentRedirect(
383                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
384                        self::TARGET_ORIGIN_PERMALINK
385                    );
386                }
387            }
388
389            /**
390             * Page Id Abbr ?
391             * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
392             */
393            if (
394                $pageId !== null
395            ) {
396                $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
397                if ($page === null) {
398                    // or the length of the abbr has changed
399                    $canonicalDatabasePage = new DatabasePageRow();
400                    $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
401                    if ($row !== null) {
402                        $canonicalDatabasePage->setRow($row);
403                        $page = $canonicalDatabasePage->getMarkupPath();
404                    }
405                }
406                if ($page !== null && $page->exists()) {
407                    /**
408                     * If the url canonical id has changed, we show it
409                     * to the writer by performing a permanent redirect
410                     */
411                    if ($identifier != $page->getUrlId()) {
412                        // Google asks for a redirect
413                        // https://developers.google.com/search/docs/advanced/crawling/301-redirects
414                        // People access your site through several different URLs.
415                        // If, for example, your home page can be reached in multiple ways
416                        // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
417                        // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
418                        // and use redirects to send traffic from the other URLs to your preferred URL.
419                        $this->executePermanentRedirect(
420                            $page->getCanonicalUrl()->toAbsoluteUrlString(),
421                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
422                        );
423                        return;
424                    }
425
426                    $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED);
427                    return;
428
429                }
430                // permanent url not yet in the database
431                // Other permanent such as permanent canonical ?
432                // We let the process go with the new identifier
433
434            }
435
436        }
437
438        // Global variable needed in the process
439        global $conf;
440
441        /**
442         * Identifier is a Canonical ?
443         */
444        $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
445        $canonicalPage = $canonicalDatabasePage->getMarkupPath();
446        if ($canonicalPage !== null && $canonicalPage->exists()) {
447            /**
448             * Does the canonical url is canonical name based
449             * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
450             */
451            if ($canonicalPage->getUrlId() === $identifier) {
452                $res = $this->executeTransparentRedirect(
453                    $canonicalPage->getWikiId(),
454                    self::TARGET_ORIGIN_CANONICAL
455                );
456            } else {
457                $res = $this->executePermanentRedirect(
458                    $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property
459                    self::TARGET_ORIGIN_CANONICAL
460                );
461            }
462            if ($res) {
463                return;
464            }
465        }
466
467        /**
468         * Identifier is an alias
469         */
470        $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
471        if (
472            $aliasRequestedPage !== null
473            && $aliasRequestedPage->exists()
474            // The build alias is the file system metadata alias
475            // it may be null if the replication in the database was not successful
476            && $aliasRequestedPage->getBuildAlias() !== null
477        ) {
478            $buildAlias = $aliasRequestedPage->getBuildAlias();
479            switch ($buildAlias->getType()) {
480                case AliasType::REDIRECT:
481                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
482                    if ($res) {
483                        return;
484                    }
485                    break;
486                case AliasType::SYNONYM:
487                    $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS);
488                    if ($res) {
489                        return;
490                    }
491                    break;
492                default:
493                    LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
494                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
495                    if ($res) {
496                        return;
497                    }
498                    break;
499            }
500        }
501
502
503        // If there is a redirection defined in the page rules
504        $result = $this->processingPageRules();
505        if ($result) {
506            // A redirection has occurred
507            // finish the process
508            return;
509        }
510
511        /**
512         *
513         * There was no redirection found, redirect to edit mode if writer
514         *
515         */
516        if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) {
517
518            $this->gotToEditMode($event);
519            // Stop here
520            return;
521
522        }
523
524        /**
525         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
526         */
527        if ($this->getConf('ActionReaderFirst') == self::NOTHING) {
528            return;
529        }
530
531        // We are reader and their is no redirection set, we apply the algorithm
532        $readerAlgorithms = array();
533        $readerAlgorithms[0] = $this->getConf('ActionReaderFirst');
534        $readerAlgorithms[1] = $this->getConf('ActionReaderSecond');
535        $readerAlgorithms[2] = $this->getConf('ActionReaderThird');
536
537        while (
538            ($algorithm = array_shift($readerAlgorithms)) != null
539        ) {
540
541            switch ($algorithm) {
542
543                case self::NOTHING:
544                    return;
545
546                case self::GO_TO_BEST_END_PAGE_NAME:
547
548                    /**
549                     * @var MarkupPath $bestEndPage
550                     */
551                    list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
552                    if ($bestEndPage != null) {
553                        $res = false;
554                        switch ($method) {
555                            case self::REDIRECT_PERMANENT_METHOD:
556                                $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
557                                break;
558                            case self::REDIRECT_NOTFOUND_METHOD:
559                                $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
560                                break;
561                            default:
562                                LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
563                        }
564                        if ($res) {
565                            // Redirection has succeeded
566                            return;
567                        }
568                    }
569                    break;
570
571                case self::GO_TO_NS_START_PAGE:
572
573                    // Start page with the conf['start'] parameter
574                    $startPage = getNS($identifier) . ':' . $conf['start'];
575                    if (page_exists($startPage)) {
576                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
577                        if ($res) {
578                            return;
579                        }
580                    }
581
582                    // Start page with the same name than the namespace
583                    $startPage = getNS($identifier) . ':' . curNS($identifier);
584                    if (page_exists($startPage)) {
585                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
586                        if ($res) {
587                            return;
588                        }
589                    }
590                    break;
591
592                case self::GO_TO_BEST_PAGE_NAME:
593
594                    $bestPageId = null;
595
596                    $bestPage = $this->getBestPage($identifier);
597                    $bestPageId = $bestPage['id'];
598                    $scorePageName = $bestPage['score'];
599
600                    // Get Score from a Namespace
601                    $bestNamespace = $this->scoreBestNamespace($identifier);
602                    $bestNamespaceId = $bestNamespace['namespace'];
603                    $namespaceScore = $bestNamespace['score'];
604
605                    // Compare the two score
606                    if ($scorePageName > 0 or $namespaceScore > 0) {
607                        if ($scorePageName > $namespaceScore) {
608                            $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
609                        } else {
610                            $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
611                        }
612                        return;
613                    }
614                    break;
615
616                case self::GO_TO_BEST_NAMESPACE:
617
618                    $scoreNamespace = $this->scoreBestNamespace($identifier);
619                    $bestNamespaceId = $scoreNamespace['namespace'];
620                    $score = $scoreNamespace['score'];
621
622                    if ($score > 0) {
623                        $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE);
624                        return;
625                    }
626                    break;
627
628                case self::GO_TO_SEARCH_ENGINE:
629
630                    $this->redirectToSearchEngine();
631
632                    return;
633
634                // End Switch Action
635            }
636
637            // End While Action
638        }
639
640
641    }
642
643
644    /**
645     * getBestNamespace
646     * Return a list with 'BestNamespaceId Score'
647     * @param $id
648     * @return array
649     */
650    private
651    function scoreBestNamespace($id)
652    {
653
654        global $conf;
655
656        // Parameters
657        $pageNameSpace = getNS($id);
658
659        // If the page has an existing namespace start page take it, other search other namespace
660        $startPageNameSpace = $pageNameSpace . ":";
661        $dateAt = '';
662        // $startPageNameSpace will get a full path (ie with start or the namespace
663        resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true);
664        if (page_exists($startPageNameSpace)) {
665            $nameSpaces = array($startPageNameSpace);
666        } else {
667            $nameSpaces = ft_pageLookup($conf['start']);
668        }
669
670        // Parameters and search the best namespace
671        $pathNames = explode(':', $pageNameSpace);
672        $bestNbWordFound = 0;
673        $bestNamespaceId = '';
674        foreach ($nameSpaces as $nameSpace) {
675
676            $nbWordFound = 0;
677            foreach ($pathNames as $pathName) {
678                if (strlen($pathName) > 2) {
679                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
680                }
681            }
682            if ($nbWordFound > $bestNbWordFound) {
683                // Take only the smallest namespace
684                if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) {
685                    $bestNbWordFound = $nbWordFound;
686                    $bestNamespaceId = $nameSpace;
687                }
688            }
689        }
690
691        $startPageFactor = $this->getConf('WeightFactorForStartPage');
692        $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace');
693        if ($bestNbWordFound > 0) {
694            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
695        } else {
696            $bestNamespaceScore = 0;
697        }
698
699
700        return array(
701            'namespace' => $bestNamespaceId,
702            'score' => $bestNamespaceScore
703        );
704
705    }
706
707    /**
708     * @param $event
709     */
710    private
711    function gotToEditMode(&$event)
712    {
713        global $ACT;
714        $ACT = 'edit';
715
716    }
717
718
719    /**
720     * Redirect to an internal page ie:
721     *   * on the same domain
722     *   * no HTTP redirect
723     *   * id rewrite
724     * @param string $targetPageId - target page id
725     * @param string $targetOriginId - the source of the target (redirect)
726     * @return bool - return true if the user has the permission and that the redirect was done
727     * @throws Exception
728     */
729    private
730    function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool
731    {
732        /**
733         * Because we set the ID globally for the ID redirect
734         * we make sure that this is not a {@link MarkupPath}
735         * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem}
736         * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem}
737         */
738        if (is_object($targetPageId)) {
739            $class = get_class($targetPageId);
740            LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id");
741        }
742
743        if (is_object($targetOriginId)) {
744            $class = get_class($targetOriginId);
745            LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id");
746        }
747
748        // If the user does not have the right to see the target page
749        // don't do anything
750        if (!(Identity::isReader($targetPageId))) {
751            return false;
752        }
753
754        // Change the id
755        global $ID;
756        global $INFO;
757        $sourceId = $ID;
758        $ID = $targetPageId;
759        if (isset($_REQUEST["id"])) {
760            $_REQUEST["id"] = $targetPageId;
761        }
762        if (isset($_GET["id"])) {
763            $_GET["id"] = $targetPageId;
764        }
765
766        /**
767         * Refresh the $INFO data
768         *
769         * the info attributes are used elsewhere
770         *   'id': for the sidebar
771         *   'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()}
772         *   'rev' : for the edit button to be sure that the page is still the same
773         */
774        $INFO = pageinfo();
775
776        /**
777         * Not compatible with
778         * https://www.dokuwiki.org/config:send404 is enabled
779         *
780         * This check happens before that dokuwiki is started
781         * and send an header in doku.php
782         *
783         * We send a warning
784         */
785        global $conf;
786        if ($conf['send404'] == true) {
787            LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
788        }
789
790        // Redirection
791        $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD);
792
793        return true;
794
795    }
796
797    private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool
798    {
799        return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD);
800    }
801
802    /**
803     * The general HTTP Redirect method to an internal page
804     * where the redirection method decide which type of redirection
805     * @param string $targetIdOrUrl - a dokuwiki id or an url
806     * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin)
807     * @param string $method - the redirection method
808     */
809    private
810    function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool
811    {
812
813        global $ID;
814
815
816        // Log the redirections
817        $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method);
818
819
820        // An http external url ?
821        try {
822            $isValid = Url::createFromString($targetIdOrUrl)->isHttpUrl();
823        } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
824            $isValid = false;
825        }
826
827        // If there is a bug in the isValid function for an internal url
828        // We get a loop.
829        // The Url becomes the id, the id is unknown and we do a redirect again
830        //
831        // We check then if the target starts with the base url
832        // if this is the case, it's valid
833        if (!$isValid && strpos($targetIdOrUrl, DOKU_URL) === 0) {
834            $isValid = true;
835        }
836        if ($isValid) {
837
838            // defend against HTTP Response Splitting
839            // https://owasp.org/www-community/attacks/HTTP_Response_Splitting
840            $targetUrl = stripctl($targetIdOrUrl);
841
842        } else {
843
844
845            // Explode the page ID and the anchor (#)
846            $link = explode('#', $targetIdOrUrl, 2);
847
848            // Query String to pass the message
849            $urlParams = [];
850            if ($targetOrigin != self::TARGET_ORIGIN_PERMALINK) {
851                $urlParams = array(
852                    action_plugin_combo_routermessage::ORIGIN_PAGE => $ID,
853                    action_plugin_combo_routermessage::ORIGIN_TYPE => $targetOrigin
854                );
855            }
856
857            // if this is search engine redirect
858            if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) {
859                $replacementPart = array(':', '_', '-');
860                $query = str_replace($replacementPart, ' ', $ID);
861                $urlParams["do"] = "search";
862                $urlParams["q"] = $query;
863            }
864
865            $targetUrl = wl($link[0], $urlParams, true, '&');
866            // %3A back to :
867            $targetUrl = str_replace("%3A", ":", $targetUrl);
868            if ($link[1]) {
869                $targetUrl .= '#' . rawurlencode($link[1]);
870            }
871
872        }
873
874        /**
875         * The dokuwiki function {@link send_redirect()}
876         * set the `Location header` and in php, the header function
877         * in this case change the status code to 302 Arghhhh.
878         * The code below is adapted from this function {@link send_redirect()}
879         */
880        global $MSG; // are there any undisplayed messages? keep them in session for display
881        if (isset($MSG) && count($MSG) && !defined('NOSESSION')) {
882            //reopen session, store data and close session again
883            @session_start();
884            $_SESSION[DOKU_COOKIE]['msg'] = $MSG;
885        }
886        session_write_close(); // always close the session
887
888        switch ($method) {
889            case self::REDIRECT_PERMANENT_METHOD:
890                ExecutionContext::getActualOrCreateFromEnv()
891                    ->response()
892                    ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT)
893                    ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl)
894                    ->end();
895                return true;
896            case self::REDIRECT_NOTFOUND_METHOD:
897
898                // Empty 404 body to not get the standard 404 page of the browser
899                // but a blank page to avoid a sort of FOUC.
900                // ie the user see a page briefly
901                ExecutionContext::getActualOrCreateFromEnv()
902                    ->response()
903                    ->setStatus(HttpResponseStatus::NOT_FOUND)
904                    ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl)
905                    ->setBody(self::PAGE_404, Mime::getHtml())
906                    ->end();
907                return true;
908
909            default:
910                LogUtility::msg("The method ($method) is not an http redirection");
911                return false;
912        }
913
914
915    }
916
917    /**
918     * @param $id
919     * @return array
920     */
921    private
922    function getBestPage($id): array
923    {
924
925        // The return parameters
926        $bestPageId = null;
927        $scorePageName = null;
928
929        // Get Score from a page
930        $pageName = noNS($id);
931        $pagesWithSameName = ft_pageLookup($pageName);
932        if (count($pagesWithSameName) > 0) {
933
934            // Search same namespace in the page found than in the Id page asked.
935            $bestNbWordFound = 0;
936
937
938            $wordsInPageSourceId = explode(':', $id);
939            foreach ($pagesWithSameName as $targetPageId => $title) {
940
941                // Nb of word found in the target page id
942                // that are in the source page id
943                $nbWordFound = 0;
944                foreach ($wordsInPageSourceId as $word) {
945                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
946                }
947
948                if ($bestPageId == null) {
949
950                    $bestNbWordFound = $nbWordFound;
951                    $bestPageId = $targetPageId;
952
953                } else {
954
955                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
956
957                        $bestNbWordFound = $nbWordFound;
958                        $bestPageId = $targetPageId;
959
960                    }
961
962                }
963
964            }
965            $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace');
966            return array(
967                'id' => $bestPageId,
968                'score' => $scorePageName);
969        }
970        return array(
971            'id' => $bestPageId,
972            'score' => $scorePageName
973        );
974
975    }
976
977
978    /**
979     * Redirect to the search engine
980     */
981    private
982    function redirectToSearchEngine()
983    {
984
985        global $ID;
986        $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE);
987
988    }
989
990
991    /**
992     *
993     *   * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date
994     *   * For a SQlite database, it will add a row into the log
995     *
996     * @param string $sourcePageId
997     * @param $targetPageId
998     * @param $algorithmic
999     * @param $method - http or rewrite
1000     */
1001    function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method)
1002    {
1003
1004        $row = array(
1005            "TIMESTAMP" => date("c"),
1006            "SOURCE" => $sourcePageId,
1007            "TARGET" => $targetPageId,
1008            "REFERRER" => $_SERVER['HTTP_REFERER'],
1009            "TYPE" => $algorithmic,
1010            "METHOD" => $method
1011        );
1012        $request = Sqlite::createOrGetBackendSqlite()
1013            ->createRequest()
1014            ->setTableRow('redirections_log', $row);
1015        try {
1016            $request
1017                ->execute();
1018        } catch (ExceptionCompile $e) {
1019            LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}");
1020        } finally {
1021            $request->close();
1022        }
1023
1024
1025    }
1026
1027    /**
1028     * This function check if there is a redirection declared
1029     * in the redirection table
1030     * @return bool - true if a rewrite or redirection occurs
1031     * @throws Exception
1032     */
1033    private function processingPageRules(): bool
1034    {
1035        global $ID;
1036
1037        $calculatedTarget = null;
1038        $ruleMatcher = null; // Used in a warning message if the target page does not exist
1039        // Known redirection in the table
1040        // Get the page from redirection data
1041        $rules = $this->pageRules->getRules();
1042        foreach ($rules as $rule) {
1043
1044            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
1045            $ruleTarget = $rule[PageRules::TARGET_NAME];
1046
1047            // Glob to Rexgexp
1048            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
1049
1050            // Match ?
1051            // https://www.php.net/manual/en/function.preg-match.php
1052            $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
1053            if ($pregMatchResult === false) {
1054                // The `if` to take into account this problem
1055                // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
1056                LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
1057                return false;
1058            }
1059            if ($pregMatchResult) {
1060                $calculatedTarget = $ruleTarget;
1061                foreach ($matches as $key => $match) {
1062                    if ($key == 0) {
1063                        continue;
1064                    } else {
1065                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
1066                    }
1067                }
1068                break;
1069            }
1070        }
1071
1072        if ($calculatedTarget == null) {
1073            return false;
1074        }
1075
1076        // If this is an external redirect (other domain)
1077        try {
1078            $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl();
1079        } catch (ExceptionBadSyntax $e) {
1080            $isHttpUrl = false;
1081        }
1082        if ($isHttpUrl) {
1083            $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1084            return true;
1085        }
1086
1087        // If the page exist
1088        if (page_exists($calculatedTarget)) {
1089
1090            // This is DokuWiki Id and should always be lowercase
1091            // The page rule may have change that
1092            $calculatedTarget = strtolower($calculatedTarget);
1093            $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1094            if ($res) {
1095                return true;
1096            } else {
1097                return false;
1098            }
1099
1100        } else {
1101
1102            LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR);
1103            return false;
1104
1105        }
1106
1107    }
1108
1109    private function performNotFoundRedirect(string $targetId, string $origin): bool
1110    {
1111        return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD);
1112    }
1113
1114
1115}
1116