xref: /plugin/combo/action/router.php (revision 54743e42e98b8cc4f9d7df000ae3be1d8edf18ff)
1<?php
2
3
4use ComboStrap\DatabasePageRow;
5use ComboStrap\ExceptionBadArgument;
6use ComboStrap\ExceptionBadSyntax;
7use ComboStrap\ExceptionCompile;
8use ComboStrap\ExceptionSqliteNotAvailable;
9use ComboStrap\ExecutionContext;
10use ComboStrap\FileSystems;
11use ComboStrap\HttpResponse;
12use ComboStrap\HttpResponseStatus;
13use ComboStrap\Identity;
14use ComboStrap\LogUtility;
15use ComboStrap\MarkupPath;
16use ComboStrap\Meta\Field\AliasType;
17use ComboStrap\Mime;
18use ComboStrap\PageId;
19use ComboStrap\PageRules;
20use ComboStrap\PageUrlPath;
21use ComboStrap\PageUrlType;
22use ComboStrap\RouterBestEndPage;
23use ComboStrap\Site;
24use ComboStrap\SiteConfig;
25use ComboStrap\Sqlite;
26use ComboStrap\Web\Url;
27use ComboStrap\Web\UrlRewrite;
28use ComboStrap\WikiPath;
29
30require_once(__DIR__ . '/../vendor/autoload.php');
31
32/**
33 * Class action_plugin_combo_url
34 *
35 * The actual URL manager
36 *
37 *
38 */
39class action_plugin_combo_router extends DokuWiki_Action_Plugin
40{
41
42    /**
43     * @deprecated
44     */
45    const URL_MANAGER_ENABLE_CONF = "enableUrlManager";
46    const ROUTER_ENABLE_CONF = "enableRouter";
47
48    // The redirect type
49    const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id)
50    // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects
51    const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301)
52    const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated)
53
54    public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect";
55
56    // Where the target id value comes from
57    const TARGET_ORIGIN_WELL_KNOWN = 'well-known';
58    const TARGET_ORIGIN_PAGE_RULES = 'pageRules';
59    /**
60     * Named Permalink (canonical)
61     */
62    const TARGET_ORIGIN_CANONICAL = 'canonical';
63    const TARGET_ORIGIN_ALIAS = 'alias';
64    /**
65     * Identifier Permalink (full page id)
66     */
67    const TARGET_ORIGIN_PERMALINK = "permalink";
68    /**
69     * Extended Permalink (abbreviated page id at the end)
70     */
71    const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink";
72    const TARGET_ORIGIN_START_PAGE = 'startPage';
73    const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName';
74    const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace';
75    const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine';
76    const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName';
77    const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned";
78
79
80    // The constant parameters
81    const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
82    const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
83    const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
84    const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
85    const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
86    const GO_TO_EDIT_MODE = 'GoToEditMode';
87    const NOTHING = 'Nothing';
88
89    /** @var string - a name used in log and other places */
90    const NAME = 'Url Manager';
91    const CANONICAL = 'router';
92    const PAGE_404 = "<html lang=\"en\"><body></body></html>";
93    const REFRESH_HEADER_NAME = "Refresh";
94    const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url=';
95    const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": ";
96    public const URL_MANAGER_NAME = "Router";
97
98
99    /**
100     * @var PageRules
101     */
102    private $pageRules;
103
104
105    function __construct()
106    {
107        // enable direct access to language strings
108        // ie $this->lang
109        $this->setupLocale();
110
111    }
112
113    /**
114     * @param string $refreshHeader
115     * @return false|string
116     */
117    public static function getUrlFromRefresh(string $refreshHeader)
118    {
119        return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX));
120    }
121
122    public static function getUrlFromLocation($refreshHeader)
123    {
124        return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX));
125    }
126
127    /**
128     * @return array|mixed|string|string[]
129     *
130     * Unfortunately, DOKUWIKI_STARTED is not the first event
131     * The id may have been changed by
132     * {@link action_plugin_combo_metalang::load_lang()}
133     * function, that's why we have this function
134     * to get the original requested id
135     */
136    private static function getOriginalIdFromRequest()
137    {
138        $originalId = $_GET["id"];
139        return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
140    }
141
142    /**
143     * Determine if the request should be banned based on the id
144     *
145     * @param string $id
146     * @return bool
147     *
148     * See also {@link https://perishablepress.com/7g-firewall/#features}
149     * for blocking rules on http request data such as:
150     *   * query_string
151     *   * user_agent,
152     *   * remote host
153     */
154    public static function isShadowBanned(string $id): bool
155    {
156        /**
157         * ie
158         * wp-json:api:flutter_woo:config_file
159         * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css
160         * wp-admin
161         * 2020:wp-includes:wlwmanifest.xml
162         * wp-content:start
163         * wp-admin:css:start
164         * sito:wp-includes:wlwmanifest.xml
165         * site:wp-includes:wlwmanifest.xml
166         * cms:wp-includes:wlwmanifest.xml
167         * test:wp-includes:wlwmanifest.xml
168         * media:wp-includes:wlwmanifest.xml
169         * wp2:wp-includes:wlwmanifest.xml
170         * 2019:wp-includes:wlwmanifest.xml
171         * shop:wp-includes:wlwmanifest.xml
172         * wp1:wp-includes:wlwmanifest.xml
173         * news:wp-includes:wlwmanifest.xml
174         * 2018:wp-includes:wlwmanifest.xml
175         */
176        if (strpos($id, 'wp-') !== false) {
177            return true;
178        }
179
180        /**
181         * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33
182         * db:oracle:999999.9:union:all:select_null:from_dual
183         * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0
184         */
185        if (preg_match('/_chr_|_0_0/', $id) === 1) {
186            return true;
187        }
188
189
190        /**
191         * ie
192         * git:objects:
193         * git:refs:heads:stable
194         * git:logs:refs:heads:main
195         * git:logs:refs:heads:stable
196         * git:hooks:pre-push.sample
197         * git:hooks:pre-receive.sample
198         */
199        if (strpos($id, "git:") === 0) {
200            return true;
201        }
202
203        return false;
204
205    }
206
207    /**
208     * @param string $id
209     * @return bool
210     * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md
211     * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/
212     * well-known:dnt-policy.txt
213     */
214    public static function isWellKnownFile(string $id): bool
215    {
216        return strpos($id, "well-known") === 0;
217    }
218
219
220    function register(Doku_Event_Handler $controller)
221    {
222
223        if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) {
224
225            /**
226             * This will call the function {@link action_plugin_combo_router::_router()}
227             * The event is not DOKUWIKI_STARTED because this is not the first one
228             *
229             * https://www.dokuwiki.org/devel:event:init_lang_load
230             */
231            $controller->register_hook('DOKUWIKI_STARTED',
232                'BEFORE',
233                $this,
234                'router',
235                array());
236
237            /**
238             * This is the real first call of Dokuwiki
239             * Unfortunately, it does not create the environment
240             * We just ban to spare server resources
241             *
242             * https://www.dokuwiki.org/devel:event:init_lang_load
243             */
244            $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array());
245
246        }
247
248
249    }
250
251    /**
252     *
253     * We have created a spacial ban function that is
254     * called before the first function
255     * {@link action_plugin_combo_metalang::load_lang()}
256     * to spare CPU.
257     *
258     * @param $event
259     * @throws Exception
260     */
261    function ban(&$event)
262    {
263
264        $id = self::getOriginalIdFromRequest();
265        $page = MarkupPath::createMarkupFromId($id);
266        if (!FileSystems::exists($page)) {
267            // Well known
268            if (self::isWellKnownFile($id)) {
269                $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD);
270                ExecutionContext::getActualOrCreateFromEnv()
271                    ->response()
272                    ->setStatus(HttpResponseStatus::NOT_FOUND)
273                    ->end();
274                return;
275            }
276
277            // Shadow banned
278            if (self::isShadowBanned($id)) {
279                $webSiteHomePage = Site::getIndexPageName();
280                $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED);
281            }
282        }
283    }
284
285    /**
286     * @param $event Doku_Event
287     * @param $param
288     * @return void
289     * @throws Exception
290     */
291    function router(&$event, $param)
292    {
293
294        /**
295         * Just the {@link ExecutionContext::SHOW_ACTION}
296         * may be redirected
297         */
298        $executionContext = ExecutionContext::getActualOrCreateFromEnv();
299        if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) {
300            return;
301        }
302
303        $urlRewrite = Site::getUrlRewrite();
304        if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) {
305            UrlRewrite::sendErrorMessage();
306            return;
307        }
308
309        global $ID;
310
311        /**
312         * Without SQLite, this module does not work further
313         */
314        try {
315            Sqlite::createOrGetSqlite();
316        } catch (ExceptionSqliteNotAvailable $e) {
317            return;
318        }
319
320        $this->pageRules = new PageRules();
321
322
323        /**
324         * Unfortunately, DOKUWIKI_STARTED is not the first event
325         * The id may have been changed by
326         * {@link action_plugin_combo_lang::load_lang()}
327         * function, that's why we check against the {@link $_REQUEST}
328         * and not the global ID
329         */
330        $originalId = self::getOriginalIdFromRequest();
331
332        /**
333         * Page is an existing id ?
334         */
335        $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
336        if (FileSystems::exists($requestedMarkupPath)) {
337
338            /**
339             * If this is not the root home page
340             * and if the canonical id is the not the same,
341             * and if this is not a historical page (revision)
342             * redirect
343             */
344            if (
345                $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
346                && $ID != Site::getIndexPageName()
347                && !isset($_REQUEST["rev"])
348            ) {
349                /**
350                 * TODO: When saving for the first time, the page is not stored in the database
351                 *   but that's not the case actually
352                 */
353                $databasePageRow = $requestedMarkupPath->getDatabasePage();
354                if ($databasePageRow->exists()) {
355                    /**
356                     * A move may leave the database in a bad state,
357                     * unfortunately (ie page is not in index, unable to update, ...)
358                     * We test therefore if the database page id exists
359                     */
360                    $targetPageId = $databasePageRow->getFromRow("id");
361                    $targetPath = WikiPath::createMarkupPathFromId($targetPageId);
362                    if (FileSystems::exists($targetPath)) {
363                        $this->executePermanentRedirect(
364                            $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(),
365                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
366                        );
367                    }
368                }
369            }
370            return;
371        }
372
373
374        $identifier = $ID;
375
376
377        /**
378         * Page Id Website / root Permalink ?
379         */
380        $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
381        if ($shortPageId !== null) {
382            $pageId = PageUrlPath::decodePageId($shortPageId);
383            if ($requestedMarkupPath->getParent() === null && $pageId !== null) {
384                $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
385                if ($page !== null && $page->exists()) {
386                    $this->executePermanentRedirect(
387                        $page->getCanonicalUrl()->toAbsoluteUrlString(),
388                        self::TARGET_ORIGIN_PERMALINK
389                    );
390                }
391            }
392
393            /**
394             * Page Id Abbr ?
395             * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
396             */
397            if (
398                $pageId !== null
399            ) {
400                $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
401                if ($page === null) {
402                    // or the length of the abbr has changed
403                    $canonicalDatabasePage = new DatabasePageRow();
404                    $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
405                    if ($row !== null) {
406                        $canonicalDatabasePage->setRow($row);
407                        $page = $canonicalDatabasePage->getMarkupPath();
408                    }
409                }
410                if ($page !== null && $page->exists()) {
411                    /**
412                     * If the url canonical id has changed, we show it
413                     * to the writer by performing a permanent redirect
414                     */
415                    if ($identifier != $page->getUrlId()) {
416                        // Google asks for a redirect
417                        // https://developers.google.com/search/docs/advanced/crawling/301-redirects
418                        // People access your site through several different URLs.
419                        // If, for example, your home page can be reached in multiple ways
420                        // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
421                        // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
422                        // and use redirects to send traffic from the other URLs to your preferred URL.
423                        $this->executePermanentRedirect(
424                            $page->getCanonicalUrl()->toAbsoluteUrlString(),
425                            self::TARGET_ORIGIN_PERMALINK_EXTENDED
426                        );
427                        return;
428                    }
429
430                    $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED);
431                    return;
432
433                }
434                // permanent url not yet in the database
435                // Other permanent such as permanent canonical ?
436                // We let the process go with the new identifier
437
438            }
439
440        }
441
442        // Global variable needed in the process
443        global $conf;
444
445        /**
446         * Identifier is a Canonical ?
447         */
448        $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
449        $canonicalPage = $canonicalDatabasePage->getMarkupPath();
450        if ($canonicalPage !== null && $canonicalPage->exists()) {
451            /**
452             * Does the canonical url is canonical name based
453             * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
454             */
455            if ($canonicalPage->getUrlId() === $identifier) {
456                $res = $this->executeTransparentRedirect(
457                    $canonicalPage->getWikiId(),
458                    self::TARGET_ORIGIN_CANONICAL
459                );
460            } else {
461                $res = $this->executePermanentRedirect(
462                    $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property
463                    self::TARGET_ORIGIN_CANONICAL
464                );
465            }
466            if ($res) {
467                return;
468            }
469        }
470
471        /**
472         * Identifier is an alias
473         */
474        $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
475        if (
476            $aliasRequestedPage !== null
477            && $aliasRequestedPage->exists()
478            // The build alias is the file system metadata alias
479            // it may be null if the replication in the database was not successful
480            && $aliasRequestedPage->getBuildAlias() !== null
481        ) {
482            $buildAlias = $aliasRequestedPage->getBuildAlias();
483            switch ($buildAlias->getType()) {
484                case AliasType::REDIRECT:
485                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
486                    if ($res) {
487                        return;
488                    }
489                    break;
490                case AliasType::SYNONYM:
491                    $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS);
492                    if ($res) {
493                        return;
494                    }
495                    break;
496                default:
497                    LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
498                    $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
499                    if ($res) {
500                        return;
501                    }
502                    break;
503            }
504        }
505
506
507        // If there is a redirection defined in the page rules
508        $result = $this->processingPageRules();
509        if ($result) {
510            // A redirection has occurred
511            // finish the process
512            return;
513        }
514
515        /**
516         *
517         * There was no redirection found, redirect to edit mode if writer
518         *
519         */
520        if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) {
521
522            $this->gotToEditMode($event);
523            // Stop here
524            return;
525
526        }
527
528        /**
529         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
530         */
531        if ($this->getConf('ActionReaderFirst') == self::NOTHING) {
532            return;
533        }
534
535        // We are reader and their is no redirection set, we apply the algorithm
536        $readerAlgorithms = array();
537        $readerAlgorithms[0] = $this->getConf('ActionReaderFirst');
538        $readerAlgorithms[1] = $this->getConf('ActionReaderSecond');
539        $readerAlgorithms[2] = $this->getConf('ActionReaderThird');
540
541        while (
542            ($algorithm = array_shift($readerAlgorithms)) != null
543        ) {
544
545            switch ($algorithm) {
546
547                case self::NOTHING:
548                    return;
549
550                case self::GO_TO_BEST_END_PAGE_NAME:
551
552                    /**
553                     * @var MarkupPath $bestEndPage
554                     */
555                    list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
556                    if ($bestEndPage != null) {
557                        $res = false;
558                        switch ($method) {
559                            case self::REDIRECT_PERMANENT_METHOD:
560                                $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
561                                break;
562                            case self::REDIRECT_NOTFOUND_METHOD:
563                                $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
564                                break;
565                            default:
566                                LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
567                        }
568                        if ($res) {
569                            // Redirection has succeeded
570                            return;
571                        }
572                    }
573                    break;
574
575                case self::GO_TO_NS_START_PAGE:
576
577                    // Start page with the conf['start'] parameter
578                    $startPage = getNS($identifier) . ':' . $conf['start'];
579                    if (page_exists($startPage)) {
580                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
581                        if ($res) {
582                            return;
583                        }
584                    }
585
586                    // Start page with the same name than the namespace
587                    $startPage = getNS($identifier) . ':' . curNS($identifier);
588                    if (page_exists($startPage)) {
589                        $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
590                        if ($res) {
591                            return;
592                        }
593                    }
594                    break;
595
596                case self::GO_TO_BEST_PAGE_NAME:
597
598                    $bestPageId = null;
599
600                    $bestPage = $this->getBestPage($identifier);
601                    $bestPageId = $bestPage['id'];
602                    $scorePageName = $bestPage['score'];
603
604                    // Get Score from a Namespace
605                    $bestNamespace = $this->scoreBestNamespace($identifier);
606                    $bestNamespaceId = $bestNamespace['namespace'];
607                    $namespaceScore = $bestNamespace['score'];
608
609                    // Compare the two score
610                    if ($scorePageName > 0 or $namespaceScore > 0) {
611                        if ($scorePageName > $namespaceScore) {
612                            $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
613                        } else {
614                            $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
615                        }
616                        return;
617                    }
618                    break;
619
620                case self::GO_TO_BEST_NAMESPACE:
621
622                    $scoreNamespace = $this->scoreBestNamespace($identifier);
623                    $bestNamespaceId = $scoreNamespace['namespace'];
624                    $score = $scoreNamespace['score'];
625
626                    if ($score > 0) {
627                        $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE);
628                        return;
629                    }
630                    break;
631
632                case self::GO_TO_SEARCH_ENGINE:
633
634                    $this->redirectToSearchEngine();
635
636                    return;
637
638                // End Switch Action
639            }
640
641            // End While Action
642        }
643
644
645    }
646
647
648    /**
649     * getBestNamespace
650     * Return a list with 'BestNamespaceId Score'
651     * @param $id
652     * @return array
653     */
654    private
655    function scoreBestNamespace($id)
656    {
657
658        global $conf;
659
660        // Parameters
661        $pageNameSpace = getNS($id);
662
663        // If the page has an existing namespace start page take it, other search other namespace
664        $startPageNameSpace = $pageNameSpace . ":";
665        $dateAt = '';
666        // $startPageNameSpace will get a full path (ie with start or the namespace
667        resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true);
668        if (page_exists($startPageNameSpace)) {
669            $nameSpaces = array($startPageNameSpace);
670        } else {
671            $nameSpaces = ft_pageLookup($conf['start']);
672        }
673
674        // Parameters and search the best namespace
675        $pathNames = explode(':', $pageNameSpace);
676        $bestNbWordFound = 0;
677        $bestNamespaceId = '';
678        foreach ($nameSpaces as $nameSpace) {
679
680            $nbWordFound = 0;
681            foreach ($pathNames as $pathName) {
682                if (strlen($pathName) > 2) {
683                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
684                }
685            }
686            if ($nbWordFound > $bestNbWordFound) {
687                // Take only the smallest namespace
688                if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) {
689                    $bestNbWordFound = $nbWordFound;
690                    $bestNamespaceId = $nameSpace;
691                }
692            }
693        }
694
695        $startPageFactor = $this->getConf('WeightFactorForStartPage');
696        $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace');
697        if ($bestNbWordFound > 0) {
698            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
699        } else {
700            $bestNamespaceScore = 0;
701        }
702
703
704        return array(
705            'namespace' => $bestNamespaceId,
706            'score' => $bestNamespaceScore
707        );
708
709    }
710
711    /**
712     * @param $event
713     */
714    private
715    function gotToEditMode(&$event)
716    {
717        global $ACT;
718        $ACT = 'edit';
719
720    }
721
722
723    /**
724     * Redirect to an internal page ie:
725     *   * on the same domain
726     *   * no HTTP redirect
727     *   * id rewrite
728     * @param string $targetPageId - target page id
729     * @param string $targetOriginId - the source of the target (redirect)
730     * @return bool - return true if the user has the permission and that the redirect was done
731     * @throws Exception
732     */
733    private
734    function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool
735    {
736        /**
737         * Because we set the ID globally for the ID redirect
738         * we make sure that this is not a {@link MarkupPath}
739         * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem}
740         * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem}
741         */
742        if (is_object($targetPageId)) {
743            $class = get_class($targetPageId);
744            LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id");
745        }
746
747        if (is_object($targetOriginId)) {
748            $class = get_class($targetOriginId);
749            LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id");
750        }
751
752        // If the user does not have the right to see the target page
753        // don't do anything
754        if (!(Identity::isReader($targetPageId))) {
755            return false;
756        }
757
758        // Change the id
759        global $ID;
760        global $INFO;
761        $sourceId = $ID;
762        $ID = $targetPageId;
763        if (isset($_REQUEST["id"])) {
764            $_REQUEST["id"] = $targetPageId;
765        }
766        if (isset($_GET["id"])) {
767            $_GET["id"] = $targetPageId;
768        }
769
770        /**
771         * Refresh the $INFO data
772         *
773         * the info attributes are used elsewhere
774         *   'id': for the sidebar
775         *   'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()}
776         *   'rev' : for the edit button to be sure that the page is still the same
777         */
778        $INFO = pageinfo();
779
780        /**
781         * Not compatible with
782         * https://www.dokuwiki.org/config:send404 is enabled
783         *
784         * This check happens before that dokuwiki is started
785         * and send an header in doku.php
786         *
787         * We send a warning
788         */
789        global $conf;
790        if ($conf['send404'] == true) {
791            LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
792        }
793
794        // Redirection
795        $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD);
796
797        return true;
798
799    }
800
801    private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool
802    {
803        return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD);
804    }
805
806    /**
807     * The general HTTP Redirect method to an internal page
808     * where the redirection method decide which type of redirection
809     * @param string $targetIdOrUrl - a dokuwiki id or an url
810     * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin)
811     * @param string $method - the redirection method
812     */
813    private
814    function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool
815    {
816
817        global $ID;
818
819
820        // Log the redirections
821        $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method);
822
823
824        // An http external url ?
825        try {
826            $isValid = Url::createFromString($targetIdOrUrl)->isHttpUrl();
827        } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
828            $isValid = false;
829        }
830
831        // If there is a bug in the isValid function for an internal url
832        // We get a loop.
833        // The Url becomes the id, the id is unknown and we do a redirect again
834        //
835        // We check then if the target starts with the base url
836        // if this is the case, it's valid
837        if (!$isValid && strpos($targetIdOrUrl, DOKU_URL) === 0) {
838            $isValid = true;
839        }
840        if ($isValid) {
841
842            // defend against HTTP Response Splitting
843            // https://owasp.org/www-community/attacks/HTTP_Response_Splitting
844            $targetUrl = stripctl($targetIdOrUrl);
845
846        } else {
847
848
849            // Explode the page ID and the anchor (#)
850            $link = explode('#', $targetIdOrUrl, 2);
851
852            // Query String to pass the message
853            $urlParams = [];
854            if ($targetOrigin != self::TARGET_ORIGIN_PERMALINK) {
855                $urlParams = array(
856                    action_plugin_combo_routermessage::ORIGIN_PAGE => $ID,
857                    action_plugin_combo_routermessage::ORIGIN_TYPE => $targetOrigin
858                );
859            }
860
861            // if this is search engine redirect
862            if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) {
863                $replacementPart = array(':', '_', '-');
864                $query = str_replace($replacementPart, ' ', $ID);
865                $urlParams["do"] = "search";
866                $urlParams["q"] = $query;
867            }
868
869            $targetUrl = wl($link[0], $urlParams, true, '&');
870            // %3A back to :
871            $targetUrl = str_replace("%3A", ":", $targetUrl);
872            if ($link[1]) {
873                $targetUrl .= '#' . rawurlencode($link[1]);
874            }
875
876        }
877
878        /**
879         * The dokuwiki function {@link send_redirect()}
880         * set the `Location header` and in php, the header function
881         * in this case change the status code to 302 Arghhhh.
882         * The code below is adapted from this function {@link send_redirect()}
883         */
884        global $MSG; // are there any undisplayed messages? keep them in session for display
885        if (isset($MSG) && count($MSG) && !defined('NOSESSION')) {
886            //reopen session, store data and close session again
887            @session_start();
888            $_SESSION[DOKU_COOKIE]['msg'] = $MSG;
889        }
890        session_write_close(); // always close the session
891
892        switch ($method) {
893            case self::REDIRECT_PERMANENT_METHOD:
894                ExecutionContext::getActualOrCreateFromEnv()
895                    ->response()
896                    ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT)
897                    ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl)
898                    ->end();
899                return true;
900            case self::REDIRECT_NOTFOUND_METHOD:
901
902                // Empty 404 body to not get the standard 404 page of the browser
903                // but a blank page to avoid a sort of FOUC.
904                // ie the user see a page briefly
905                ExecutionContext::getActualOrCreateFromEnv()
906                    ->response()
907                    ->setStatus(HttpResponseStatus::NOT_FOUND)
908                    ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl)
909                    ->setBody(self::PAGE_404, Mime::getHtml())
910                    ->end();
911                return true;
912
913            default:
914                LogUtility::msg("The method ($method) is not an http redirection");
915                return false;
916        }
917
918
919    }
920
921    /**
922     * @param $id
923     * @return array
924     */
925    private
926    function getBestPage($id): array
927    {
928
929        // The return parameters
930        $bestPageId = null;
931        $scorePageName = null;
932
933        // Get Score from a page
934        $pageName = noNS($id);
935        $pagesWithSameName = ft_pageLookup($pageName);
936        if (count($pagesWithSameName) > 0) {
937
938            // Search same namespace in the page found than in the Id page asked.
939            $bestNbWordFound = 0;
940
941
942            $wordsInPageSourceId = explode(':', $id);
943            foreach ($pagesWithSameName as $targetPageId => $title) {
944
945                // Nb of word found in the target page id
946                // that are in the source page id
947                $nbWordFound = 0;
948                foreach ($wordsInPageSourceId as $word) {
949                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
950                }
951
952                if ($bestPageId == null) {
953
954                    $bestNbWordFound = $nbWordFound;
955                    $bestPageId = $targetPageId;
956
957                } else {
958
959                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
960
961                        $bestNbWordFound = $nbWordFound;
962                        $bestPageId = $targetPageId;
963
964                    }
965
966                }
967
968            }
969            $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace');
970            return array(
971                'id' => $bestPageId,
972                'score' => $scorePageName);
973        }
974        return array(
975            'id' => $bestPageId,
976            'score' => $scorePageName
977        );
978
979    }
980
981
982    /**
983     * Redirect to the search engine
984     */
985    private
986    function redirectToSearchEngine()
987    {
988
989        global $ID;
990        $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE);
991
992    }
993
994
995    /**
996     *
997     *   * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date
998     *   * For a SQlite database, it will add a row into the log
999     *
1000     * @param string $sourcePageId
1001     * @param $targetPageId
1002     * @param $algorithmic
1003     * @param $method - http or rewrite
1004     */
1005    function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method)
1006    {
1007
1008        $row = array(
1009            "TIMESTAMP" => date("c"),
1010            "SOURCE" => $sourcePageId,
1011            "TARGET" => $targetPageId,
1012            "REFERRER" => $_SERVER['HTTP_REFERER'],
1013            "TYPE" => $algorithmic,
1014            "METHOD" => $method
1015        );
1016        $request = Sqlite::createOrGetBackendSqlite()
1017            ->createRequest()
1018            ->setTableRow('redirections_log', $row);
1019        try {
1020            $request
1021                ->execute();
1022        } catch (ExceptionCompile $e) {
1023            LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}");
1024        } finally {
1025            $request->close();
1026        }
1027
1028
1029    }
1030
1031    /**
1032     * This function check if there is a redirection declared
1033     * in the redirection table
1034     * @return bool - true if a rewrite or redirection occurs
1035     * @throws Exception
1036     */
1037    private function processingPageRules(): bool
1038    {
1039        global $ID;
1040
1041        $calculatedTarget = null;
1042        $ruleMatcher = null; // Used in a warning message if the target page does not exist
1043        // Known redirection in the table
1044        // Get the page from redirection data
1045        $rules = $this->pageRules->getRules();
1046        foreach ($rules as $rule) {
1047
1048            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
1049            $ruleTarget = $rule[PageRules::TARGET_NAME];
1050
1051            // Glob to Rexgexp
1052            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
1053
1054            // Match ?
1055            // https://www.php.net/manual/en/function.preg-match.php
1056            $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
1057            if ($pregMatchResult === false) {
1058                // The `if` to take into account this problem
1059                // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
1060                LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
1061                return false;
1062            }
1063            if ($pregMatchResult) {
1064                $calculatedTarget = $ruleTarget;
1065                foreach ($matches as $key => $match) {
1066                    if ($key == 0) {
1067                        continue;
1068                    } else {
1069                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
1070                    }
1071                }
1072                break;
1073            }
1074        }
1075
1076        if ($calculatedTarget == null) {
1077            return false;
1078        }
1079
1080        // If this is an external redirect (other domain)
1081        try {
1082            $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl();
1083        } catch (ExceptionBadSyntax $e) {
1084            $isHttpUrl = false;
1085        }
1086        if ($isHttpUrl) {
1087            $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1088            return true;
1089        }
1090
1091        // If the page exist
1092        if (page_exists($calculatedTarget)) {
1093
1094            // This is DokuWiki Id and should always be lowercase
1095            // The page rule may have change that
1096            $calculatedTarget = strtolower($calculatedTarget);
1097            $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1098            if ($res) {
1099                return true;
1100            } else {
1101                return false;
1102            }
1103
1104        } else {
1105
1106            LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR);
1107            return false;
1108
1109        }
1110
1111    }
1112
1113    private function performNotFoundRedirect(string $targetId, string $origin): bool
1114    {
1115        return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD);
1116    }
1117
1118
1119}
1120