xref: /plugin/combo/action/router.php (revision f49666c9f21885cd7f6e44c53bc8ba5754a36151)
1<?php
2
3
4use ComboStrap\DatabasePageRow;
5use ComboStrap\ExceptionBadArgument;
6use ComboStrap\ExceptionBadSyntax;
7use ComboStrap\ExceptionCompile;
8use ComboStrap\ExceptionSqliteNotAvailable;
9use ComboStrap\ExecutionContext;
10use ComboStrap\FileSystems;
11use ComboStrap\HttpResponse;
12use ComboStrap\HttpResponseStatus;
13use ComboStrap\Identity;
14use ComboStrap\LogUtility;
15use ComboStrap\MarkupPath;
16use ComboStrap\Meta\Field\AliasType;
17use ComboStrap\Mime;
18use ComboStrap\PageId;
19use ComboStrap\PageRules;
20use ComboStrap\PageUrlPath;
21use ComboStrap\PageUrlType;
22use ComboStrap\RouterBestEndPage;
23use ComboStrap\Site;
24use ComboStrap\SiteConfig;
25use ComboStrap\Sqlite;
26use ComboStrap\Web\Url;
27use ComboStrap\Web\UrlRewrite;
28use ComboStrap\WikiPath;
29
30require_once(__DIR__ . '/../vendor/autoload.php');
31
32/**
33 * Class action_plugin_combo_url
34 *
35 * The actual URL manager
36 *
37 *
38 */
39class action_plugin_combo_router extends DokuWiki_Action_Plugin
40{
41
42    /**
43     * @deprecated
44     */
45    const URL_MANAGER_ENABLE_CONF = "enableUrlManager";
46    const ROUTER_ENABLE_CONF = "enableRouter";
47
48    // The redirect type
49    const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id)
50    // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects
51    const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301)
52    const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated)
53
54    public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect";
55
56    // Where the target id value comes from
57    const TARGET_ORIGIN_WELL_KNOWN = 'well-known';
58    const TARGET_ORIGIN_PAGE_RULES = 'pageRules';
59    /**
60     * Named Permalink (canonical)
61     */
62    const TARGET_ORIGIN_CANONICAL = 'canonical';
63    const TARGET_ORIGIN_ALIAS = 'alias';
64    /**
65     * Identifier Permalink (full page id)
66     */
67    const TARGET_ORIGIN_PERMALINK = "permalink";
68    /**
69     * Extended Permalink (abbreviated page id at the end)
70     */
71    const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink";
72    const TARGET_ORIGIN_START_PAGE = 'startPage';
73    const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName';
74    const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace';
75    const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine';
76    const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName';
77    const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned";
78
79
80    // The constant parameters
81    const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
82    const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
83    const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
84    const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
85    const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
86    const GO_TO_EDIT_MODE = 'GoToEditMode';
87    const NOTHING = 'Nothing';
88
89    /** @var string - a name used in log and other places */
90    const NAME = 'Url Manager';
91    const CANONICAL = 'router';
92    const PAGE_404 = "<html lang=\"en\"><body></body></html>";
93    const REFRESH_HEADER_NAME = "Refresh";
94    const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url=';
95    const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": ";
96    public const URL_MANAGER_NAME = "Router";
97
98
99    /**
100     * @var PageRules
101     */
102    private $pageRules;
103
104
105    function __construct()
106    {
107        // enable direct access to language strings
108        // ie $this->lang
109        $this->setupLocale();
110
111    }
112
113    /**
114     * @param string $refreshHeader
115     * @return false|string
116     */
117    public static function getUrlFromRefresh(string $refreshHeader)
118    {
119        return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX));
120    }
121
122    public static function getUrlFromLocation($refreshHeader)
123    {
124        return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX));
125    }
126
127    /**
128     * @return array|mixed|string|string[]
129     *
130     * Unfortunately, DOKUWIKI_STARTED is not the first event
131     * The id may have been changed by
132     * {@link action_plugin_combo_metalang::load_lang()}
133     * function, that's why we have this function
134     * to get the original requested id
135     */
136    private static function getOriginalIdFromRequest()
137    {
138        $originalId = $_GET["id"] ?? null;
139        if ($originalId === null) {
140            return null;
141        }
142        return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
143    }
144
145    /**
146     * Determine if the request should be banned based on the id
147     *
148     * @param string $id
149     * @return bool
150     *
151     * See also {@link https://perishablepress.com/7g-firewall/#features}
152     * for blocking rules on http request data such as:
153     *   * query_string
154     *   * user_agent,
155     *   * remote host
156     */
157    public static function isShadowBanned(string $id): bool
158    {
159        /**
160         * ie
161         * wp-json:api:flutter_woo:config_file
162         * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css
163         * wp-admin
164         * 2020:wp-includes:wlwmanifest.xml
165         * wp-content:start
166         * wp-admin:css:start
167         * sito:wp-includes:wlwmanifest.xml
168         * site:wp-includes:wlwmanifest.xml
169         * cms:wp-includes:wlwmanifest.xml
170         * test:wp-includes:wlwmanifest.xml
171         * media:wp-includes:wlwmanifest.xml
172         * wp2:wp-includes:wlwmanifest.xml
173         * 2019:wp-includes:wlwmanifest.xml
174         * shop:wp-includes:wlwmanifest.xml
175         * wp1:wp-includes:wlwmanifest.xml
176         * news:wp-includes:wlwmanifest.xml
177         * 2018:wp-includes:wlwmanifest.xml
178         */
179        if (strpos($id, 'wp-') !== false) {
180            return true;
181        }
182
183        /**
184         * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33
185         * db:oracle:999999.9:union:all:select_null:from_dual
186         * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0
187         */
188        if (preg_match('/_chr_|_0_0/', $id) === 1) {
189            return true;
190        }
191
192
193        /**
194         * ie
195         * git:objects:
196         * git:refs:heads:stable
197         * git:logs:refs:heads:main
198         * git:logs:refs:heads:stable
199         * git:hooks:pre-push.sample
200         * git:hooks:pre-receive.sample
201         */
202        if (strpos($id, "git:") === 0) {
203            return true;
204        }
205
206        return false;
207
208    }
209
210    /**
211     * @param string $id
212     * @return bool
213     * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md
214     * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/
215     * well-known:dnt-policy.txt
216     */
217    public static function isWellKnownFile(string $id): bool
218    {
219        return strpos($id, "well-known") === 0;
220    }
221
222
223    function register(Doku_Event_Handler $controller)
224    {
225
226        if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) {
227
228            /**
229             * This will call the function {@link action_plugin_combo_router::_router()}
230             * The event is not DOKUWIKI_STARTED because this is not the first one
231             *
232             * https://www.dokuwiki.org/devel:event:init_lang_load
233             */
234            $controller->register_hook('DOKUWIKI_STARTED',
235                'BEFORE',
236                $this,
237                'router',
238                array());
239
240            /**
241             * This is the real first call of Dokuwiki
242             * Unfortunately, it does not create the environment
243             * We just ban to spare server resources
244             *
245             * https://www.dokuwiki.org/devel:event:init_lang_load
246             */
247            $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array());
248
249        }
250
251
252    }
253
254    /**
255     *
256     * We have created a spacial ban function that is
257     * called before the first function
258     * {@link action_plugin_combo_metalang::load_lang()}
259     * to spare CPU.
260     *
261     * @param $event
262     * @throws Exception
263     */
264    function ban(&$event)
265    {
266
267        $id = self::getOriginalIdFromRequest();
268        $page = MarkupPath::createMarkupFromId($id);
269        if (!FileSystems::exists($page)) {
270            // Well known
271            if (self::isWellKnownFile($id)) {
272                $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD);
273                ExecutionContext::getActualOrCreateFromEnv()
274                    ->response()
275                    ->setStatus(HttpResponseStatus::NOT_FOUND)
276                    ->end();
277                return;
278            }
279
280            // Shadow banned
281            if (self::isShadowBanned($id)) {
282                $webSiteHomePage = Site::getIndexPageName();
283                $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED);
284            }
285        }
286    }
287
288    /**
289     * @param $event Doku_Event
290     * @param $param
291     * @return void
292     * @throws Exception
293     */
294    function router(&$event, $param)
295    {
296
297        /**
298         * Just the {@link ExecutionContext::SHOW_ACTION}
299         * may be redirected
300         */
301        $executionContext = ExecutionContext::getActualOrCreateFromEnv();
302        if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) {
303            return;
304        }
305
306        $urlRewrite = Site::getUrlRewrite();
307        if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) {
308            UrlRewrite::sendErrorMessage();
309            return;
310        }
311
312        global $ID;
313
314        /**
315         * Without SQLite, this module does not work further
316         */
317        try {
318            Sqlite::createOrGetSqlite();
319        } catch (ExceptionSqliteNotAvailable $e) {
320            return;
321        }
322
323        $this->pageRules = new PageRules();
324
325
326        /**
327         * Unfortunately, DOKUWIKI_STARTED is not the first event
328         * The id may have been changed by
329         * {@link action_plugin_combo_lang::load_lang()}
330         * function, that's why we check against the {@link $_REQUEST}
331         * and not the global ID
332         */
333        $originalId = self::getOriginalIdFromRequest();
334
335        /**
336         * Page is an existing id ?
337         */
338        $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
339        if (FileSystems::exists($requestedMarkupPath)) {
340
341            /**
342             * If this is not the root home page
343             * and if the canonical id is the not the same,
344             * and if this is not a historical page (revision)
345             * redirect
346             */
347            if (
348                $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
349                && $ID != Site::getIndexPageName()
350                && !isset($_REQUEST["rev"])
351            ) {
352                /**
353                 * TODO: When saving for the first time, the page is not stored in the database
354                 *   but that's not the case actually
355                 */
356                $databasePageRow = $requestedMarkupPath->getDatabasePage();
357                if ($databasePageRow->exists()) {
358                    /**
359                     * A move may leave the database in a bad state,
360                     * unfortunately (ie page is not in index, unable to update, ...)
361                     * We test therefore if the database page id exists
362                     */
363                    $targetPageId = $databasePageRow->getFromRow("id");
364                    $targetPath = WikiPath::createMarkupPathFromId($targetPageId);
365                    if (FileSystems::exists($targetPath)) {
366                        $this->executePermanentRedirect(
367                            $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(),
368                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
369                        );
370                    }
371                }
372            }
373            return;
374        }
375
376
377        $identifier = $ID;
378
379
380        /**
381         * Page Id Website / root Permalink ?
382         */
383        $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
384        if ($shortPageId !== null) {
385            $pageId = PageUrlPath::decodePageId($shortPageId);
386            if ($requestedMarkupPath->getParent() === null && $pageId !== null) {
387                $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
388                if ($page !== null && $page->exists()) {
389                    $this->executePermanentRedirect(
390                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
391                        self::TARGET_ORIGIN_PERMALINK
392                    );
393                }
394            }
395
396            /**
397             * Page Id Abbr ?
398             * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
399             */
400            if (
401                $pageId !== null
402            ) {
403                $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
404                if ($page === null) {
405                    // or the length of the abbr has changed
406                    $canonicalDatabasePage = new DatabasePageRow();
407                    $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
408                    if ($row !== null) {
409                        $canonicalDatabasePage->setRow($row);
410                        $page = $canonicalDatabasePage->getMarkupPath();
411                    }
412                }
413                if ($page !== null && $page->exists()) {
414                    /**
415                     * If the url canonical id has changed, we show it
416                     * to the writer by performing a permanent redirect
417                     */
418                    if ($identifier != $page->getUrlId()) {
419                        // Google asks for a redirect
420                        // https://developers.google.com/search/docs/advanced/crawling/301-redirects
421                        // People access your site through several different URLs.
422                        // If, for example, your home page can be reached in multiple ways
423                        // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
424                        // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
425                        // and use redirects to send traffic from the other URLs to your preferred URL.
426                        $this->executePermanentRedirect(
427                            $page->getCanonicalUrl()->toAbsoluteUrlString(),
428                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
429                        );
430                        return;
431                    }
432
433                    $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED);
434                    return;
435
436                }
437                // permanent url not yet in the database
438                // Other permanent such as permanent canonical ?
439                // We let the process go with the new identifier
440
441            }
442
443        }
444
445        // Global variable needed in the process
446        global $conf;
447
448        /**
449         * Identifier is a Canonical ?
450         */
451        $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
452        $canonicalPage = $canonicalDatabasePage->getMarkupPath();
453        if ($canonicalPage !== null && $canonicalPage->exists()) {
454            /**
455             * Does the canonical url is canonical name based
456             * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
457             */
458            if ($canonicalPage->getUrlId() === $identifier) {
459                $res = $this->executeTransparentRedirect(
460                    $canonicalPage->getWikiId(),
461                    self::TARGET_ORIGIN_CANONICAL
462                );
463            } else {
464                $res = $this->executePermanentRedirect(
465                    $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property
466                    self::TARGET_ORIGIN_CANONICAL
467                );
468            }
469            if ($res) {
470                return;
471            }
472        }
473
474        /**
475         * Identifier is an alias
476         */
477        $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
478        if (
479            $aliasRequestedPage !== null
480            && $aliasRequestedPage->exists()
481            // The build alias is the file system metadata alias
482            // it may be null if the replication in the database was not successful
483            && $aliasRequestedPage->getBuildAlias() !== null
484        ) {
485            $buildAlias = $aliasRequestedPage->getBuildAlias();
486            switch ($buildAlias->getType()) {
487                case AliasType::REDIRECT:
488                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
489                    if ($res) {
490                        return;
491                    }
492                    break;
493                case AliasType::SYNONYM:
494                    $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS);
495                    if ($res) {
496                        return;
497                    }
498                    break;
499                default:
500                    LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
501                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
502                    if ($res) {
503                        return;
504                    }
505                    break;
506            }
507        }
508
509
510        // If there is a redirection defined in the page rules
511        $result = $this->processingPageRules();
512        if ($result) {
513            // A redirection has occurred
514            // finish the process
515            return;
516        }
517
518        /**
519         *
520         * There was no redirection found, redirect to edit mode if writer
521         *
522         */
523        if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) {
524
525            $this->gotToEditMode($event);
526            // Stop here
527            return;
528
529        }
530
531        /**
532         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
533         */
534        if ($this->getConf('ActionReaderFirst') == self::NOTHING) {
535            return;
536        }
537
538        // We are reader and their is no redirection set, we apply the algorithm
539        $readerAlgorithms = array();
540        $readerAlgorithms[0] = $this->getConf('ActionReaderFirst');
541        $readerAlgorithms[1] = $this->getConf('ActionReaderSecond');
542        $readerAlgorithms[2] = $this->getConf('ActionReaderThird');
543
544        while (
545            ($algorithm = array_shift($readerAlgorithms)) != null
546        ) {
547
548            switch ($algorithm) {
549
550                case self::NOTHING:
551                    return;
552
553                case self::GO_TO_BEST_END_PAGE_NAME:
554
555                    /**
556                     * @var MarkupPath $bestEndPage
557                     */
558                    list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
559                    if ($bestEndPage != null) {
560                        $res = false;
561                        switch ($method) {
562                            case self::REDIRECT_PERMANENT_METHOD:
563                                $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
564                                break;
565                            case self::REDIRECT_NOTFOUND_METHOD:
566                                $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
567                                break;
568                            default:
569                                LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
570                        }
571                        if ($res) {
572                            // Redirection has succeeded
573                            return;
574                        }
575                    }
576                    break;
577
578                case self::GO_TO_NS_START_PAGE:
579
580                    // Start page with the conf['start'] parameter
581                    $startPage = getNS($identifier) . ':' . $conf['start'];
582                    if (page_exists($startPage)) {
583                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
584                        if ($res) {
585                            return;
586                        }
587                    }
588
589                    // Start page with the same name than the namespace
590                    $startPage = getNS($identifier) . ':' . curNS($identifier);
591                    if (page_exists($startPage)) {
592                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
593                        if ($res) {
594                            return;
595                        }
596                    }
597                    break;
598
599                case self::GO_TO_BEST_PAGE_NAME:
600
601                    $bestPageId = null;
602
603                    $bestPage = $this->getBestPage($identifier);
604                    $bestPageId = $bestPage['id'];
605                    $scorePageName = $bestPage['score'];
606
607                    // Get Score from a Namespace
608                    $bestNamespace = $this->scoreBestNamespace($identifier);
609                    $bestNamespaceId = $bestNamespace['namespace'];
610                    $namespaceScore = $bestNamespace['score'];
611
612                    // Compare the two score
613                    if ($scorePageName > 0 or $namespaceScore > 0) {
614                        if ($scorePageName > $namespaceScore) {
615                            $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
616                        } else {
617                            $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
618                        }
619                        return;
620                    }
621                    break;
622
623                case self::GO_TO_BEST_NAMESPACE:
624
625                    $scoreNamespace = $this->scoreBestNamespace($identifier);
626                    $bestNamespaceId = $scoreNamespace['namespace'];
627                    $score = $scoreNamespace['score'];
628
629                    if ($score > 0) {
630                        $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE);
631                        return;
632                    }
633                    break;
634
635                case self::GO_TO_SEARCH_ENGINE:
636
637                    $this->redirectToSearchEngine();
638
639                    return;
640
641                // End Switch Action
642            }
643
644            // End While Action
645        }
646
647
648    }
649
650
651    /**
652     * getBestNamespace
653     * Return a list with 'BestNamespaceId Score'
654     * @param $id
655     * @return array
656     */
657    private
658    function scoreBestNamespace($id)
659    {
660
661        global $conf;
662
663        // Parameters
664        $pageNameSpace = getNS($id);
665
666        // If the page has an existing namespace start page take it, other search other namespace
667        $startPageNameSpace = $pageNameSpace . ":";
668        $dateAt = '';
669        // $startPageNameSpace will get a full path (ie with start or the namespace
670        resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true);
671        if (page_exists($startPageNameSpace)) {
672            $nameSpaces = array($startPageNameSpace);
673        } else {
674            $nameSpaces = ft_pageLookup($conf['start']);
675        }
676
677        // Parameters and search the best namespace
678        $pathNames = explode(':', $pageNameSpace);
679        $bestNbWordFound = 0;
680        $bestNamespaceId = '';
681        foreach ($nameSpaces as $nameSpace) {
682
683            $nbWordFound = 0;
684            foreach ($pathNames as $pathName) {
685                if (strlen($pathName) > 2) {
686                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
687                }
688            }
689            if ($nbWordFound > $bestNbWordFound) {
690                // Take only the smallest namespace
691                if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) {
692                    $bestNbWordFound = $nbWordFound;
693                    $bestNamespaceId = $nameSpace;
694                }
695            }
696        }
697
698        $startPageFactor = $this->getConf('WeightFactorForStartPage');
699        $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace');
700        if ($bestNbWordFound > 0) {
701            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
702        } else {
703            $bestNamespaceScore = 0;
704        }
705
706
707        return array(
708            'namespace' => $bestNamespaceId,
709            'score' => $bestNamespaceScore
710        );
711
712    }
713
714    /**
715     * @param $event
716     */
717    private
718    function gotToEditMode(&$event)
719    {
720        global $ACT;
721        $ACT = 'edit';
722
723    }
724
725
726    /**
727     * Redirect to an internal page ie:
728     *   * on the same domain
729     *   * no HTTP redirect
730     *   * id rewrite
731     * @param string $targetPageId - target page id
732     * @param string $targetOriginId - the source of the target (redirect)
733     * @return bool - return true if the user has the permission and that the redirect was done
734     * @throws Exception
735     */
736    private
737    function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool
738    {
739        /**
740         * Because we set the ID globally for the ID redirect
741         * we make sure that this is not a {@link MarkupPath}
742         * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem}
743         * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem}
744         */
745        if (is_object($targetPageId)) {
746            $class = get_class($targetPageId);
747            LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id");
748        }
749
750        if (is_object($targetOriginId)) {
751            $class = get_class($targetOriginId);
752            LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id");
753        }
754
755        // If the user does not have the right to see the target page
756        // don't do anything
757        if (!(Identity::isReader($targetPageId))) {
758            return false;
759        }
760
761        // Change the id
762        global $ID;
763        global $INFO;
764        $sourceId = $ID;
765        $ID = $targetPageId;
766        if (isset($_REQUEST["id"])) {
767            $_REQUEST["id"] = $targetPageId;
768        }
769        if (isset($_GET["id"])) {
770            $_GET["id"] = $targetPageId;
771        }
772
773        /**
774         * Refresh the $INFO data
775         *
776         * the info attributes are used elsewhere
777         *   'id': for the sidebar
778         *   'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()}
779         *   'rev' : for the edit button to be sure that the page is still the same
780         */
781        $INFO = pageinfo();
782
783        /**
784         * Not compatible with
785         * https://www.dokuwiki.org/config:send404 is enabled
786         *
787         * This check happens before that dokuwiki is started
788         * and send an header in doku.php
789         *
790         * We send a warning
791         */
792        global $conf;
793        if ($conf['send404'] == true) {
794            LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
795        }
796
797        // Redirection
798        $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD);
799
800        return true;
801
802    }
803
804    private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool
805    {
806        return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD);
807    }
808
809    /**
810     * The general HTTP Redirect method to an internal page
811     * where the redirection method decide which type of redirection
812     * @param string $targetIdOrUrl - a dokuwiki id or an url
813     * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin)
814     * @param string $method - the redirection method
815     */
816    private
817    function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool
818    {
819
820        global $ID;
821
822
823        // Log the redirections
824        $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method);
825
826
827        // An http external url ?
828        try {
829            $isValid = Url::createFromString($targetIdOrUrl)->isHttpUrl();
830        } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
831            $isValid = false;
832        }
833
834        // If there is a bug in the isValid function for an internal url
835        // We get a loop.
836        // The Url becomes the id, the id is unknown and we do a redirect again
837        //
838        // We check then if the target starts with the base url
839        // if this is the case, it's valid
840        if (!$isValid && strpos($targetIdOrUrl, DOKU_URL) === 0) {
841            $isValid = true;
842        }
843        if ($isValid) {
844
845            // defend against HTTP Response Splitting
846            // https://owasp.org/www-community/attacks/HTTP_Response_Splitting
847            $targetUrl = stripctl($targetIdOrUrl);
848
849        } else {
850
851
852            // Explode the page ID and the anchor (#)
853            $link = explode('#', $targetIdOrUrl, 2);
854
855            // Query String to pass the message
856            $urlParams = [];
857            if ($targetOrigin != self::TARGET_ORIGIN_PERMALINK) {
858                $urlParams = array(
859                    action_plugin_combo_routermessage::ORIGIN_PAGE => $ID,
860                    action_plugin_combo_routermessage::ORIGIN_TYPE => $targetOrigin
861                );
862            }
863
864            // if this is search engine redirect
865            if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) {
866                $replacementPart = array(':', '_', '-');
867                $query = str_replace($replacementPart, ' ', $ID);
868                $urlParams["do"] = "search";
869                $urlParams["q"] = $query;
870            }
871
872            $targetUrl = wl($link[0], $urlParams, true, '&');
873            // %3A back to :
874            $targetUrl = str_replace("%3A", ":", $targetUrl);
875            if ($link[1]) {
876                $targetUrl .= '#' . rawurlencode($link[1]);
877            }
878
879        }
880
881        /**
882         * The dokuwiki function {@link send_redirect()}
883         * set the `Location header` and in php, the header function
884         * in this case change the status code to 302 Arghhhh.
885         * The code below is adapted from this function {@link send_redirect()}
886         */
887        global $MSG; // are there any undisplayed messages? keep them in session for display
888        if (isset($MSG) && count($MSG) && !defined('NOSESSION')) {
889            //reopen session, store data and close session again
890            @session_start();
891            $_SESSION[DOKU_COOKIE]['msg'] = $MSG;
892        }
893        session_write_close(); // always close the session
894
895        switch ($method) {
896            case self::REDIRECT_PERMANENT_METHOD:
897                ExecutionContext::getActualOrCreateFromEnv()
898                    ->response()
899                    ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT)
900                    ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl)
901                    ->end();
902                return true;
903            case self::REDIRECT_NOTFOUND_METHOD:
904
905                // Empty 404 body to not get the standard 404 page of the browser
906                // but a blank page to avoid a sort of FOUC.
907                // ie the user see a page briefly
908                ExecutionContext::getActualOrCreateFromEnv()
909                    ->response()
910                    ->setStatus(HttpResponseStatus::NOT_FOUND)
911                    ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl)
912                    ->setBody(self::PAGE_404, Mime::getHtml())
913                    ->end();
914                return true;
915
916            default:
917                LogUtility::msg("The method ($method) is not an http redirection");
918                return false;
919        }
920
921
922    }
923
924    /**
925     * @param $id
926     * @return array
927     */
928    private
929    function getBestPage($id): array
930    {
931
932        // The return parameters
933        $bestPageId = null;
934        $scorePageName = null;
935
936        // Get Score from a page
937        $pageName = noNS($id);
938        $pagesWithSameName = ft_pageLookup($pageName);
939        if (count($pagesWithSameName) > 0) {
940
941            // Search same namespace in the page found than in the Id page asked.
942            $bestNbWordFound = 0;
943
944
945            $wordsInPageSourceId = explode(':', $id);
946            foreach ($pagesWithSameName as $targetPageId => $title) {
947
948                // Nb of word found in the target page id
949                // that are in the source page id
950                $nbWordFound = 0;
951                foreach ($wordsInPageSourceId as $word) {
952                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
953                }
954
955                if ($bestPageId == null) {
956
957                    $bestNbWordFound = $nbWordFound;
958                    $bestPageId = $targetPageId;
959
960                } else {
961
962                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
963
964                        $bestNbWordFound = $nbWordFound;
965                        $bestPageId = $targetPageId;
966
967                    }
968
969                }
970
971            }
972            $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace');
973            return array(
974                'id' => $bestPageId,
975                'score' => $scorePageName);
976        }
977        return array(
978            'id' => $bestPageId,
979            'score' => $scorePageName
980        );
981
982    }
983
984
985    /**
986     * Redirect to the search engine
987     */
988    private
989    function redirectToSearchEngine()
990    {
991
992        global $ID;
993        $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE);
994
995    }
996
997
998    /**
999     *
1000     *   * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date
1001     *   * For a SQlite database, it will add a row into the log
1002     *
1003     * @param string $sourcePageId
1004     * @param $targetPageId
1005     * @param $algorithmic
1006     * @param $method - http or rewrite
1007     */
1008    function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method)
1009    {
1010
1011        $row = array(
1012            "TIMESTAMP" => date("c"),
1013            "SOURCE" => $sourcePageId,
1014            "TARGET" => $targetPageId,
1015            "REFERRER" => $_SERVER['HTTP_REFERER'],
1016            "TYPE" => $algorithmic,
1017            "METHOD" => $method
1018        );
1019        $request = Sqlite::createOrGetBackendSqlite()
1020            ->createRequest()
1021            ->setTableRow('redirections_log', $row);
1022        try {
1023            $request
1024                ->execute();
1025        } catch (ExceptionCompile $e) {
1026            LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}");
1027        } finally {
1028            $request->close();
1029        }
1030
1031
1032    }
1033
1034    /**
1035     * This function check if there is a redirection declared
1036     * in the redirection table
1037     * @return bool - true if a rewrite or redirection occurs
1038     * @throws Exception
1039     */
1040    private function processingPageRules(): bool
1041    {
1042        global $ID;
1043
1044        $calculatedTarget = null;
1045        $ruleMatcher = null; // Used in a warning message if the target page does not exist
1046        // Known redirection in the table
1047        // Get the page from redirection data
1048        $rules = $this->pageRules->getRules();
1049        foreach ($rules as $rule) {
1050
1051            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
1052            $ruleTarget = $rule[PageRules::TARGET_NAME];
1053
1054            // Glob to Rexgexp
1055            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
1056
1057            // Match ?
1058            // https://www.php.net/manual/en/function.preg-match.php
1059            $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
1060            if ($pregMatchResult === false) {
1061                // The `if` to take into account this problem
1062                // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
1063                LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
1064                return false;
1065            }
1066            if ($pregMatchResult) {
1067                $calculatedTarget = $ruleTarget;
1068                foreach ($matches as $key => $match) {
1069                    if ($key == 0) {
1070                        continue;
1071                    } else {
1072                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
1073                    }
1074                }
1075                break;
1076            }
1077        }
1078
1079        if ($calculatedTarget == null) {
1080            return false;
1081        }
1082
1083        // If this is an external redirect (other domain)
1084        try {
1085            $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl();
1086        } catch (ExceptionBadSyntax $e) {
1087            $isHttpUrl = false;
1088        }
1089        if ($isHttpUrl) {
1090            $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1091            return true;
1092        }
1093
1094        // If the page exist
1095        if (page_exists($calculatedTarget)) {
1096
1097            // This is DokuWiki Id and should always be lowercase
1098            // The page rule may have change that
1099            $calculatedTarget = strtolower($calculatedTarget);
1100            $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1101            if ($res) {
1102                return true;
1103            } else {
1104                return false;
1105            }
1106
1107        } else {
1108
1109            LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR);
1110            return false;
1111
1112        }
1113
1114    }
1115
1116    private function performNotFoundRedirect(string $targetId, string $origin): bool
1117    {
1118        return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD);
1119    }
1120
1121
1122}
1123