1 <?php
2 
3 
4 use ComboStrap\DatabasePageRow;
5 use ComboStrap\DokuwikiId;
6 use ComboStrap\ExceptionBadArgument;
7 use ComboStrap\ExceptionBadSyntax;
8 use ComboStrap\ExceptionCompile;
9 use ComboStrap\ExceptionSqliteNotAvailable;
10 use ComboStrap\ExecutionContext;
11 use ComboStrap\FileSystems;
12 use ComboStrap\HttpResponse;
13 use ComboStrap\HttpResponseStatus;
14 use ComboStrap\Identity;
15 use ComboStrap\LogUtility;
16 use ComboStrap\MarkupPath;
17 use ComboStrap\Meta\Field\AliasType;
18 use ComboStrap\Mime;
19 use ComboStrap\PageId;
20 use ComboStrap\PageRules;
21 use ComboStrap\PageUrlPath;
22 use ComboStrap\PageUrlType;
23 use ComboStrap\RouterBestEndPage;
24 use ComboStrap\Site;
25 use ComboStrap\SiteConfig;
26 use ComboStrap\Sqlite;
27 use ComboStrap\Web\Url;
28 use ComboStrap\Web\UrlEndpoint;
29 use ComboStrap\Web\UrlRewrite;
30 use ComboStrap\WikiPath;
31 
32 require_once(__DIR__ . '/../vendor/autoload.php');
33 
34 /**
35  * Class action_plugin_combo_url
36  *
37  * The actual URL manager
38  *
39  *
40  */
41 class action_plugin_combo_router extends DokuWiki_Action_Plugin
42 {
43 
44     /**
45      * @deprecated
46      */
47     const URL_MANAGER_ENABLE_CONF = "enableUrlManager";
48     const ROUTER_ENABLE_CONF = "enableRouter";
49 
50     // The redirect type
51     const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id)
52     // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects
53     const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301)
54     const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated)
55 
56     public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect";
57 
58     // Where the target id value comes from
59     const TARGET_ORIGIN_WELL_KNOWN = 'well-known';
60     const TARGET_ORIGIN_PAGE_RULES = 'pageRules';
61     /**
62      * Named Permalink (canonical)
63      */
64     const TARGET_ORIGIN_CANONICAL = 'canonical';
65     const TARGET_ORIGIN_ALIAS = 'alias';
66     /**
67      * Identifier Permalink (full page id)
68      */
69     const TARGET_ORIGIN_PERMALINK = "permalink";
70     /**
71      * Extended Permalink (abbreviated page id at the end)
72      */
73     const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink";
74     const TARGET_ORIGIN_START_PAGE = 'startPage';
75     const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName';
76     const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace';
77     const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine';
78     const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName';
79     const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned";
80 
81 
82     // The constant parameters
83     const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
84     const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
85     const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
86     const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
87     const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
88     const GO_TO_EDIT_MODE = 'GoToEditMode';
89     const NOTHING = 'Nothing';
90 
91     /** @var string - a name used in log and other places */
92     const NAME = 'Url Manager';
93     const CANONICAL = 'router';
94     const PAGE_404 = "<html lang=\"en\"><body></body></html>";
95     const REFRESH_HEADER_NAME = "Refresh";
96     const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url=';
97     const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": ";
98     public const URL_MANAGER_NAME = "Router";
99 
100 
101     /**
102      * @var PageRules
103      */
104     private $pageRules;
105 
106 
107     function __construct()
108     {
109         // enable direct access to language strings
110         // ie $this->lang
111         $this->setupLocale();
112 
113     }
114 
115     /**
116      * @param string $refreshHeader
117      * @return false|string
118      */
119     public static function getUrlFromRefresh(string $refreshHeader)
120     {
121         return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX));
122     }
123 
124     public static function getUrlFromLocation($refreshHeader)
125     {
126         return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX));
127     }
128 
129     /**
130      * @return array|mixed|string|string[]
131      *
132      * Unfortunately, DOKUWIKI_STARTED is not the first event
133      * The id may have been changed by
134      * {@link action_plugin_combo_metalang::load_lang()}
135      * function, that's why we have this function
136      * to get the original requested id
137      */
138     private static function getOriginalIdFromRequest()
139     {
140         $originalId = $_GET["id"] ?? null;
141         if ($originalId === null) {
142             return null;
143         }
144         return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
145     }
146 
147     /**
148      * Determine if the request should be banned based on the id
149      *
150      * @param string $id
151      * @return bool
152      *
153      * See also {@link https://perishablepress.com/7g-firewall/#features}
154      * for blocking rules on http request data such as:
155      *   * query_string
156      *   * user_agent,
157      *   * remote host
158      */
159     public static function isShadowBanned(string $id): bool
160     {
161         /**
162          * ie
163          * wp-json:api:flutter_woo:config_file
164          * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css
165          * wp-admin
166          * 2020:wp-includes:wlwmanifest.xml
167          * wp-content:start
168          * wp-admin:css:start
169          * sito:wp-includes:wlwmanifest.xml
170          * site:wp-includes:wlwmanifest.xml
171          * cms:wp-includes:wlwmanifest.xml
172          * test:wp-includes:wlwmanifest.xml
173          * media:wp-includes:wlwmanifest.xml
174          * wp2:wp-includes:wlwmanifest.xml
175          * 2019:wp-includes:wlwmanifest.xml
176          * shop:wp-includes:wlwmanifest.xml
177          * wp1:wp-includes:wlwmanifest.xml
178          * news:wp-includes:wlwmanifest.xml
179          * 2018:wp-includes:wlwmanifest.xml
180          */
181         if (strpos($id, 'wp-') !== false) {
182             return true;
183         }
184 
185         /**
186          * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33
187          * db:oracle:999999.9:union:all:select_null:from_dual
188          * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0
189          */
190         if (preg_match('/_chr_|_0_0/', $id) === 1) {
191             return true;
192         }
193 
194 
195         /**
196          * ie
197          * git:objects:
198          * git:refs:heads:stable
199          * git:logs:refs:heads:main
200          * git:logs:refs:heads:stable
201          * git:hooks:pre-push.sample
202          * git:hooks:pre-receive.sample
203          */
204         if (strpos($id, "git:") === 0) {
205             return true;
206         }
207 
208         return false;
209 
210     }
211 
212     /**
213      * @param string $id
214      * @return bool
215      * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md
216      * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/
217      * well-known:dnt-policy.txt
218      */
219     public static function isWellKnownFile(string $id): bool
220     {
221         return strpos($id, "well-known") === 0;
222     }
223 
224 
225     function register(Doku_Event_Handler $controller)
226     {
227 
228         if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) {
229 
230             /**
231              * This will call the function {@link action_plugin_combo_router::_router()}
232              * The event is not DOKUWIKI_STARTED because this is not the first one
233              *
234              * https://www.dokuwiki.org/devel:event:init_lang_load
235              */
236             $controller->register_hook('DOKUWIKI_STARTED',
237                 'BEFORE',
238                 $this,
239                 'router',
240                 array());
241 
242             /**
243              * This is the real first call of Dokuwiki
244              * Unfortunately, it does not create the environment
245              * We just ban to spare server resources
246              *
247              * https://www.dokuwiki.org/devel:event:init_lang_load
248              */
249             $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array());
250 
251         }
252 
253 
254     }
255 
256     /**
257      *
258      * We have created a spacial ban function that is
259      * called before the first function
260      * {@link action_plugin_combo_metalang::load_lang()}
261      * to spare CPU.
262      *
263      * @param $event
264      * @throws Exception
265      */
266     function ban(&$event)
267     {
268 
269         $id = self::getOriginalIdFromRequest();
270         if ($id === null) {
271             return;
272         }
273         $page = MarkupPath::createMarkupFromId($id);
274         if (!FileSystems::exists($page)) {
275             // Well known
276             if (self::isWellKnownFile($id)) {
277                 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD);
278                 ExecutionContext::getActualOrCreateFromEnv()
279                     ->response()
280                     ->setStatus(HttpResponseStatus::NOT_FOUND)
281                     ->end();
282                 return;
283             }
284 
285             // Shadow banned
286             if (self::isShadowBanned($id)) {
287                 $webSiteHomePage = Site::getIndexPageName();
288                 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED);
289             }
290         }
291     }
292 
293     /**
294      * @param $event Doku_Event
295      * @param $param
296      * @return void
297      * @throws Exception
298      */
299     function router(&$event, $param)
300     {
301 
302         /**
303          * Just the {@link ExecutionContext::SHOW_ACTION}
304          * may be redirected
305          */
306         $executionContext = ExecutionContext::getActualOrCreateFromEnv();
307         if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) {
308             return;
309         }
310 
311         $urlRewrite = Site::getUrlRewrite();
312         if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) {
313             UrlRewrite::sendErrorMessage();
314             return;
315         }
316 
317         global $ID;
318 
319         /**
320          * Without SQLite, this module does not work further
321          */
322         try {
323             Sqlite::createOrGetSqlite();
324         } catch (ExceptionSqliteNotAvailable $e) {
325             return;
326         }
327 
328         $this->pageRules = new PageRules();
329 
330 
331         /**
332          * Unfortunately, DOKUWIKI_STARTED is not the first event
333          * The id may have been changed by
334          * {@link action_plugin_combo_lang::load_lang()}
335          * function, that's why we check against the {@link $_REQUEST}
336          * and not the global ID
337          */
338         $originalId = self::getOriginalIdFromRequest();
339 
340         /**
341          * Page is an existing id ?
342          */
343         $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
344         if (FileSystems::exists($requestedMarkupPath)) {
345 
346             /**
347              * If this is not the root home page
348              * and if the canonical id is the not the same,
349              * and if this is not a historical page (revision)
350              * redirect
351              */
352             if (
353                 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
354                 && $ID != Site::getIndexPageName()
355                 && !isset($_REQUEST["rev"])
356             ) {
357                 /**
358                  * TODO: When saving for the first time, the page is not stored in the database
359                  *   but that's not the case actually
360                  */
361                 $databasePageRow = $requestedMarkupPath->getDatabasePage();
362                 if ($databasePageRow->exists()) {
363                     /**
364                      * A move may leave the database in a bad state,
365                      * unfortunately (ie page is not in index, unable to update, ...)
366                      * We test therefore if the database page id exists
367                      */
368                     $targetPageId = $databasePageRow->getFromRow("id");
369                     $targetPath = WikiPath::createMarkupPathFromId($targetPageId);
370                     if (FileSystems::exists($targetPath)) {
371                         $this->executePermanentRedirect(
372                             $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(),
373                             self::TARGET_ORIGIN_PERMALINK_EXTENDED
374                         );
375                     }
376                 }
377             }
378             return;
379         }
380 
381 
382         $identifier = $ID;
383 
384 
385         /**
386          * Page Id in the url
387          */
388         $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
389         if ($shortPageId != null) {
390             $pageId = PageUrlPath::decodePageId($shortPageId);
391         } else {
392             /**
393              * Permalink with id
394              */
395             $pageId = PageUrlPath::decodePageId($identifier);
396         }
397         if ($pageId !== null) {
398 
399             if ($requestedMarkupPath->getParent() === null) {
400                 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
401                 if ($page !== null && $page->exists()) {
402                     $this->executePermanentRedirect(
403                         $page->getCanonicalUrl()->toAbsoluteUrlString(),
404                         self::TARGET_ORIGIN_PERMALINK
405                     );
406                     return;
407                 }
408             }
409 
410             /**
411              * Page Id Abbr ?
412              * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
413              */
414             $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
415             if ($page === null) {
416                 // or the length of the abbr has changed
417                 $canonicalDatabasePage = new DatabasePageRow();
418                 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
419                 if ($row !== null) {
420                     $canonicalDatabasePage->setRow($row);
421                     $page = $canonicalDatabasePage->getMarkupPath();
422                 }
423             }
424             if ($page !== null && $page->exists()) {
425                 /**
426                  * If the url canonical id has changed, we show it
427                  * to the writer by performing a permanent redirect
428                  */
429                 if ($identifier != $page->getUrlId()) {
430                     // Google asks for a redirect
431                     // https://developers.google.com/search/docs/advanced/crawling/301-redirects
432                     // People access your site through several different URLs.
433                     // If, for example, your home page can be reached in multiple ways
434                     // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
435                     // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
436                     // and use redirects to send traffic from the other URLs to your preferred URL.
437                     $this->executePermanentRedirect(
438                         $page->getCanonicalUrl()->toAbsoluteUrlString(),
439                         self::TARGET_ORIGIN_PERMALINK_EXTENDED
440                     );
441                     return;
442                 }
443 
444                 $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED);
445                 return;
446 
447             }
448             // permanent url not yet in the database
449             // Other permanent such as permanent canonical ?
450             // We let the process go with the new identifier
451 
452         }
453 
454         // Global variable needed in the process
455         global $conf;
456 
457         /**
458          * Identifier is a Canonical ?
459          */
460         $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
461         $canonicalPage = $canonicalDatabasePage->getMarkupPath();
462         if ($canonicalPage !== null && $canonicalPage->exists()) {
463             /**
464              * Does the canonical url is canonical name based
465              * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
466              */
467             if ($canonicalPage->getUrlId() === $identifier) {
468                 $res = $this->executeTransparentRedirect(
469                     $canonicalPage->getWikiId(),
470                     self::TARGET_ORIGIN_CANONICAL
471                 );
472             } else {
473                 $res = $this->executePermanentRedirect(
474                     $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property
475                     self::TARGET_ORIGIN_CANONICAL
476                 );
477             }
478             if ($res) {
479                 return;
480             }
481         }
482 
483         /**
484          * Identifier is an alias
485          */
486         $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
487         if (
488             $aliasRequestedPage !== null
489             && $aliasRequestedPage->exists()
490             // The build alias is the file system metadata alias
491             // it may be null if the replication in the database was not successful
492             && $aliasRequestedPage->getBuildAlias() !== null
493         ) {
494             $buildAlias = $aliasRequestedPage->getBuildAlias();
495             switch ($buildAlias->getType()) {
496                 case AliasType::REDIRECT:
497                     $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
498                     if ($res) {
499                         return;
500                     }
501                     break;
502                 case AliasType::SYNONYM:
503                     $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS);
504                     if ($res) {
505                         return;
506                     }
507                     break;
508                 default:
509                     LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
510                     $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS);
511                     if ($res) {
512                         return;
513                     }
514                     break;
515             }
516         }
517 
518 
519         // If there is a redirection defined in the page rules
520         $result = $this->processingPageRules();
521         if ($result) {
522             // A redirection has occurred
523             // finish the process
524             return;
525         }
526 
527         /**
528          *
529          * There was no redirection found, redirect to edit mode if writer
530          *
531          */
532         if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) {
533 
534             $this->gotToEditMode($event);
535             // Stop here
536             return;
537 
538         }
539 
540         /**
541          *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
542          */
543         if ($this->getConf('ActionReaderFirst') == self::NOTHING) {
544             return;
545         }
546 
547         // We are reader and their is no redirection set, we apply the algorithm
548         $readerAlgorithms = array();
549         $readerAlgorithms[0] = $this->getConf('ActionReaderFirst');
550         $readerAlgorithms[1] = $this->getConf('ActionReaderSecond');
551         $readerAlgorithms[2] = $this->getConf('ActionReaderThird');
552 
553         while (
554             ($algorithm = array_shift($readerAlgorithms)) != null
555         ) {
556 
557             switch ($algorithm) {
558 
559                 case self::NOTHING:
560                     return;
561 
562                 case self::GO_TO_BEST_END_PAGE_NAME:
563 
564                     /**
565                      * @var MarkupPath $bestEndPage
566                      */
567                     list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
568                     if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) {
569                         $res = false;
570                         switch ($method) {
571                             case self::REDIRECT_PERMANENT_METHOD:
572                                 $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
573                                 break;
574                             case self::REDIRECT_NOTFOUND_METHOD:
575                                 $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME);
576                                 break;
577                             default:
578                                 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
579                         }
580                         if ($res) {
581                             // Redirection has succeeded
582                             return;
583                         }
584                     }
585                     break;
586 
587                 case self::GO_TO_NS_START_PAGE:
588 
589                     // Start page with the conf['start'] parameter
590                     $startPage = getNS($identifier) . ':' . $conf['start'];
591                     if (page_exists($startPage)) {
592                         $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
593                         if ($res) {
594                             return;
595                         }
596                     }
597 
598                     // Start page with the same name than the namespace
599                     $startPage = getNS($identifier) . ':' . curNS($identifier);
600                     if (page_exists($startPage)) {
601                         $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE);
602                         if ($res) {
603                             return;
604                         }
605                     }
606                     break;
607 
608                 case self::GO_TO_BEST_PAGE_NAME:
609 
610                     $bestPageId = null;
611 
612                     $bestPage = $this->getBestPage($identifier);
613                     $bestPageId = $bestPage['id'];
614                     $scorePageName = $bestPage['score'];
615 
616                     // Get Score from a Namespace
617                     $bestNamespace = $this->scoreBestNamespace($identifier);
618                     $bestNamespaceId = $bestNamespace['namespace'];
619                     $namespaceScore = $bestNamespace['score'];
620 
621                     // Compare the two score
622                     if ($scorePageName > 0 or $namespaceScore > 0) {
623                         if ($scorePageName > $namespaceScore) {
624                             $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
625                         } else {
626                             $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME);
627                         }
628                         return;
629                     }
630                     break;
631 
632                 case self::GO_TO_BEST_NAMESPACE:
633 
634                     $scoreNamespace = $this->scoreBestNamespace($identifier);
635                     $bestNamespaceId = $scoreNamespace['namespace'];
636                     $score = $scoreNamespace['score'];
637 
638                     if ($score > 0) {
639                         $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE);
640                         return;
641                     }
642                     break;
643 
644                 case self::GO_TO_SEARCH_ENGINE:
645 
646                     $this->redirectToSearchEngine();
647 
648                     return;
649 
650                 // End Switch Action
651             }
652 
653             // End While Action
654         }
655 
656 
657     }
658 
659 
660     /**
661      * getBestNamespace
662      * Return a list with 'BestNamespaceId Score'
663      * @param $id
664      * @return array
665      */
666     private
667     function scoreBestNamespace($id)
668     {
669 
670         global $conf;
671 
672         // Parameters
673         $pageNameSpace = getNS($id);
674 
675         // If the page has an existing namespace start page take it, other search other namespace
676         $startPageNameSpace = $pageNameSpace . ":";
677         $dateAt = '';
678         // $startPageNameSpace will get a full path (ie with start or the namespace
679         resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true);
680         if (page_exists($startPageNameSpace)) {
681             $nameSpaces = array($startPageNameSpace);
682         } else {
683             $nameSpaces = ft_pageLookup($conf['start']);
684         }
685 
686         // Parameters and search the best namespace
687         $pathNames = explode(':', $pageNameSpace);
688         $bestNbWordFound = 0;
689         $bestNamespaceId = '';
690         foreach ($nameSpaces as $nameSpace) {
691 
692             $nbWordFound = 0;
693             foreach ($pathNames as $pathName) {
694                 if (strlen($pathName) > 2) {
695                     $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
696                 }
697             }
698             if ($nbWordFound > $bestNbWordFound) {
699                 // Take only the smallest namespace
700                 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) {
701                     $bestNbWordFound = $nbWordFound;
702                     $bestNamespaceId = $nameSpace;
703                 }
704             }
705         }
706 
707         $startPageFactor = $this->getConf('WeightFactorForStartPage');
708         $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace');
709         if ($bestNbWordFound > 0) {
710             $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
711         } else {
712             $bestNamespaceScore = 0;
713         }
714 
715 
716         return array(
717             'namespace' => $bestNamespaceId,
718             'score' => $bestNamespaceScore
719         );
720 
721     }
722 
723     /**
724      * @param $event
725      */
726     private
727     function gotToEditMode(&$event)
728     {
729         global $ACT;
730         $ACT = 'edit';
731 
732     }
733 
734 
735     /**
736      * Redirect to an internal page ie:
737      *   * on the same domain
738      *   * no HTTP redirect
739      *   * id rewrite
740      * @param string $targetPageId - target page id
741      * @param string $targetOriginId - the source of the target (redirect)
742      * @return bool - return true if the user has the permission and that the redirect was done
743      * @throws Exception
744      */
745     private
746     function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool
747     {
748         /**
749          * Because we set the ID globally for the ID redirect
750          * we make sure that this is not a {@link MarkupPath}
751          * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem}
752          * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem}
753          */
754         if (is_object($targetPageId)) {
755             $class = get_class($targetPageId);
756             LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id");
757         }
758 
759         if (is_object($targetOriginId)) {
760             $class = get_class($targetOriginId);
761             LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id");
762         }
763 
764         // If the user does not have the right to see the target page
765         // don't do anything
766         if (!(Identity::isReader($targetPageId))) {
767             return false;
768         }
769 
770         // Change the id
771         global $ID;
772         global $INFO;
773         $sourceId = $ID;
774         $ID = $targetPageId;
775         if (isset($_REQUEST["id"])) {
776             $_REQUEST["id"] = $targetPageId;
777         }
778         if (isset($_GET["id"])) {
779             $_GET["id"] = $targetPageId;
780         }
781 
782         /**
783          * Refresh the $INFO data
784          *
785          * the info attributes are used elsewhere
786          *   'id': for the sidebar
787          *   'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()}
788          *   'rev' : for the edit button to be sure that the page is still the same
789          */
790         $INFO = pageinfo();
791 
792         /**
793          * Not compatible with
794          * https://www.dokuwiki.org/config:send404 is enabled
795          *
796          * This check happens before that dokuwiki is started
797          * and send an header in doku.php
798          *
799          * We send a warning
800          */
801         global $conf;
802         if ($conf['send404'] == true) {
803             LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
804         }
805 
806         // Redirection
807         $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD);
808 
809         return true;
810 
811     }
812 
813     private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool
814     {
815         return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD);
816     }
817 
818     /**
819      * The general HTTP Redirect method to an internal page
820      * where the redirection method decide which type of redirection
821      * @param string $targetIdOrUrl - a dokuwiki id or an url
822      * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin)
823      * @param string $method - the redirection method
824      */
825     private
826     function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool
827     {
828 
829         global $ID;
830 
831 
832         // Log the redirections
833         $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method);
834 
835 
836         // An http external url ?
837         try {
838             $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl();
839         } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
840             $isHttpUrl = false;
841         }
842 
843         // If there is a bug in the isValid function for an internal url
844         // We get a loop.
845         // The Url becomes the id, the id is unknown and we do a redirect again
846         //
847         // We check then if the target starts with the base url
848         // if this is the case, it's valid
849         if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) {
850             $isHttpUrl = true;
851         }
852         if ($isHttpUrl) {
853 
854             // defend against HTTP Response Splitting
855             // https://owasp.org/www-community/attacks/HTTP_Response_Splitting
856             $targetUrl = stripctl($targetIdOrUrl);
857 
858         } else {
859 
860 
861             // Explode the page ID and the anchor (#)
862             $link = explode('#', $targetIdOrUrl, 2);
863 
864             $url = UrlEndpoint::createDokuUrl();
865 
866             $urlParams = [];
867             // if this is search engine redirect
868             if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) {
869                 $replacementPart = array(':', '_', '-');
870                 $query = str_replace($replacementPart, ' ', $ID);
871                 $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION);
872                 $url->setQueryParameter("q", $query);
873             }
874 
875             /**
876              * Doing a permanent redirect with a added query string
877              * create a new page url on the search engine
878              *
879              * ie
880              * http://host/page
881              * is not the same
882              * than
883              * http://host/page?whatever
884              *
885              * We can't pass query string otherwise, we get
886              * the SEO warning / error
887              * `Alternative page with proper canonical tag`
888              *
889              * Use HTTP X header for debug
890              */
891             if ($method !== self::REDIRECT_PERMANENT_METHOD) {
892                 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID);
893                 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin);
894             }
895 
896             $id = $link[0];
897             $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id);
898             if (array_key_exists(1, $link)) {
899                 $url->setFragment($link[1]);
900             }
901             $targetUrl = $url->toAbsoluteUrlString();
902 
903         }
904 
905         /**
906          * The dokuwiki function {@link send_redirect()}
907          * set the `Location header` and in php, the header function
908          * in this case change the status code to 302 Arghhhh.
909          * The code below is adapted from this function {@link send_redirect()}
910          */
911         global $MSG; // are there any undisplayed messages? keep them in session for display
912         if (isset($MSG) && count($MSG) && !defined('NOSESSION')) {
913             //reopen session, store data and close session again
914             @session_start();
915             $_SESSION[DOKU_COOKIE]['msg'] = $MSG;
916         }
917         session_write_close(); // always close the session
918 
919         switch ($method) {
920 
921             case self::REDIRECT_PERMANENT_METHOD:
922                 ExecutionContext::getActualOrCreateFromEnv()
923                     ->response()
924                     ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT)
925                     ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl)
926                     ->end();
927                 return true;
928 
929             case self::REDIRECT_NOTFOUND_METHOD:
930 
931 
932                 // Empty 404 body to not get the standard 404 page of the browser
933                 // but a blank page to avoid a sort of FOUC.
934                 // ie the user see a page briefly
935                 ExecutionContext::getActualOrCreateFromEnv()
936                     ->response()
937                     ->setStatus(HttpResponseStatus::NOT_FOUND)
938                     ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl)
939                     ->setBody(self::PAGE_404, Mime::getHtml())
940                     ->end();
941                 return true;
942 
943             default:
944                 LogUtility::msg("The method ($method) is not an http redirection");
945                 return false;
946         }
947 
948 
949     }
950 
951     /**
952      * @param $id
953      * @return array
954      */
955     private
956     function getBestPage($id): array
957     {
958 
959         // The return parameters
960         $bestPageId = null;
961         $scorePageName = null;
962 
963         // Get Score from a page
964         $pageName = noNS($id);
965         $pagesWithSameName = ft_pageLookup($pageName);
966         if (count($pagesWithSameName) > 0) {
967 
968             // Search same namespace in the page found than in the Id page asked.
969             $bestNbWordFound = 0;
970 
971 
972             $wordsInPageSourceId = explode(':', $id);
973             foreach ($pagesWithSameName as $targetPageId => $title) {
974 
975                 // Nb of word found in the target page id
976                 // that are in the source page id
977                 $nbWordFound = 0;
978                 foreach ($wordsInPageSourceId as $word) {
979                     $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
980                 }
981 
982                 if ($bestPageId == null) {
983 
984                     $bestNbWordFound = $nbWordFound;
985                     $bestPageId = $targetPageId;
986 
987                 } else {
988 
989                     if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
990 
991                         $bestNbWordFound = $nbWordFound;
992                         $bestPageId = $targetPageId;
993 
994                     }
995 
996                 }
997 
998             }
999             $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace');
1000             return array(
1001                 'id' => $bestPageId,
1002                 'score' => $scorePageName);
1003         }
1004         return array(
1005             'id' => $bestPageId,
1006             'score' => $scorePageName
1007         );
1008 
1009     }
1010 
1011 
1012     /**
1013      * Redirect to the search engine
1014      */
1015     private
1016     function redirectToSearchEngine()
1017     {
1018 
1019         global $ID;
1020         $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE);
1021 
1022     }
1023 
1024 
1025     /**
1026      *
1027      *   * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date
1028      *   * For a SQlite database, it will add a row into the log
1029      *
1030      * @param string $sourcePageId
1031      * @param $targetPageId
1032      * @param $algorithmic
1033      * @param $method - http or rewrite
1034      */
1035     function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method)
1036     {
1037 
1038         $row = array(
1039             "TIMESTAMP" => date("c"),
1040             "SOURCE" => $sourcePageId,
1041             "TARGET" => $targetPageId,
1042             "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null,
1043             "TYPE" => $algorithmic,
1044             "METHOD" => $method
1045         );
1046         $request = Sqlite::createOrGetBackendSqlite()
1047             ->createRequest()
1048             ->setTableRow('redirections_log', $row);
1049         try {
1050             $request
1051                 ->execute();
1052         } catch (ExceptionCompile $e) {
1053             LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}");
1054         } finally {
1055             $request->close();
1056         }
1057 
1058 
1059     }
1060 
1061     /**
1062      * This function check if there is a redirection declared
1063      * in the redirection table
1064      * @return bool - true if a rewrite or redirection occurs
1065      * @throws Exception
1066      */
1067     private function processingPageRules(): bool
1068     {
1069         global $ID;
1070 
1071         $calculatedTarget = null;
1072         $ruleMatcher = null; // Used in a warning message if the target page does not exist
1073         // Known redirection in the table
1074         // Get the page from redirection data
1075         $rules = $this->pageRules->getRules();
1076         foreach ($rules as $rule) {
1077 
1078             $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
1079             $ruleTarget = $rule[PageRules::TARGET_NAME];
1080 
1081             // Glob to Rexgexp
1082             $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
1083 
1084             // Match ?
1085             // https://www.php.net/manual/en/function.preg-match.php
1086             $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
1087             if ($pregMatchResult === false) {
1088                 // The `if` to take into account this problem
1089                 // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
1090                 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
1091                 return false;
1092             }
1093             if ($pregMatchResult) {
1094                 $calculatedTarget = $ruleTarget;
1095                 foreach ($matches as $key => $match) {
1096                     if ($key == 0) {
1097                         continue;
1098                     } else {
1099                         $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
1100                     }
1101                 }
1102                 break;
1103             }
1104         }
1105 
1106         if ($calculatedTarget == null) {
1107             return false;
1108         }
1109 
1110         // If this is an external redirect (other domain)
1111         try {
1112             $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl();
1113         } catch (ExceptionBadSyntax $e) {
1114             $isHttpUrl = false;
1115         }
1116         if ($isHttpUrl) {
1117             $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1118             return true;
1119         }
1120 
1121         // If the page exist
1122         if (page_exists($calculatedTarget)) {
1123 
1124             // This is DokuWiki Id and should always be lowercase
1125             // The page rule may have change that
1126             $calculatedTarget = strtolower($calculatedTarget);
1127             $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD);
1128             if ($res) {
1129                 return true;
1130             } else {
1131                 return false;
1132             }
1133 
1134         } else {
1135 
1136             LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR);
1137             return false;
1138 
1139         }
1140 
1141     }
1142 
1143     private function performNotFoundRedirect(string $targetId, string $origin): bool
1144     {
1145         return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD);
1146     }
1147 
1148 
1149 }
1150