1<?php
2
3namespace ComboStrap;
4
5use ComboStrap\Meta\Field\AliasType;
6use ComboStrap\Web\Url;
7
8class Router
9{
10
11
12    public const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
13    public const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
14    public const GO_TO_EDIT_MODE = 'GoToEditMode';
15    public const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
16    public const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
17    public const NOTHING = 'Nothing';
18    public const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
19    private PageRules $pageRules;
20
21    /**
22     * @throws ExceptionSqliteNotAvailable
23     * @throws ExceptionNotFound - no redirection found
24     */
25    public function getRedirection(): RouterRedirection
26    {
27
28        /**
29         * Without SQLite, this module does not work further
30         * It throws
31         */
32        Sqlite::createOrGetSqlite();
33
34        /**
35         * Initiate Page Rules
36         */
37        $this->pageRules = new PageRules();
38
39
40        /**
41         * Unfortunately, DOKUWIKI_STARTED is not the first event
42         * The id may have been changed by
43         * {@link action_plugin_combo_lang::load_lang()}
44         * function, that's why we check against the {@link $_REQUEST}
45         * and not the global ID
46         */
47        $originalId = self::getOriginalIdFromRequest();
48
49        /**
50         * Page is an existing id
51         * in the database ?
52         */
53        global $ID;
54        $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
55        if (FileSystems::exists($requestedMarkupPath)) {
56
57            /**
58             * If this is not the root home page
59             * and if the canonical id is the not the same (the id has changed)
60             * and if this is not a historical page (revision)
61             * redirect
62             */
63            if (
64                $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
65                && $ID != Site::getIndexPageName()
66                && !isset($_REQUEST["rev"])
67            ) {
68                /**
69                 * TODO: When saving for the first time, the page is not stored in the database
70                 *   but that's not the case actually
71                 */
72                $databasePageRow = $requestedMarkupPath->getDatabasePage();
73                if ($databasePageRow->exists()) {
74                    /**
75                     * A move may leave the database in a bad state,
76                     * unfortunately (ie page is not in index, unable to update, ...)
77                     * We test therefore if the database page id exists
78                     */
79                    $targetPageId = $databasePageRow->getFromRow("id");
80                    $targetPath = MarkupPath::createMarkupFromId($targetPageId);
81                    if (FileSystems::exists($targetPath)) {
82                        return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED)
83                            ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
84                            ->setTargetMarkupPath($targetPath)
85                            ->build();
86                    }
87
88                }
89            }
90        }
91
92        $identifier = $ID;
93
94        /**
95         * Page Id in the url
96         * Note that if the ID is a permalink, global $ID has already the real id
97         * Why? because unfortunately, DOKUWIKI_STARTED is not the first event
98         * {@link action_plugin_combo_lang::load_lang()} may have already
99         * transformed a permalink into a real dokuwiki id
100         *
101         * We let it here because we don't know for sure that it will stay this way
102         * What fucked up is fucked up
103         */
104        $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
105        if ($shortPageId != null) {
106            $pageId = PageUrlPath::decodePageId($shortPageId);
107        } else {
108            /**
109             * Permalink with id
110             */
111            $pageId = PageUrlPath::decodePageId($identifier);
112        }
113        if ($pageId !== null) {
114
115            if ($requestedMarkupPath->getParent() === null) {
116                $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
117                if ($page !== null && $page->exists()) {
118                    return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK)
119                        ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
120                        ->setTargetMarkupPath($page)
121                        ->build();
122                }
123            }
124
125            /**
126             * Page Id Abbr ?
127             * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
128             */
129            $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
130            if ($page === null) {
131                // or the length of the abbr has changed
132                $canonicalDatabasePage = new DatabasePageRow();
133                try {
134                    $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
135                    $canonicalDatabasePage->setRow($row);
136                    $page = $canonicalDatabasePage->getMarkupPath();
137                } catch (ExceptionNotFound $e) {
138                    // nothing to do
139                }
140            }
141            if ($page !== null && $page->exists()) {
142                /**
143                 * If the url canonical id has changed, we show it
144                 * to the writer by performing a permanent redirect
145                 */
146                if ($identifier != $page->getUrlId()) {
147                    // Google asks for a redirect
148                    // https://developers.google.com/search/docs/advanced/crawling/301-redirects
149                    // People access your site through several different URLs.
150                    // If, for example, your home page can be reached in multiple ways
151                    // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
152                    // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
153                    // and use redirects to send traffic from the other URLs to your preferred URL.
154                    return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED)
155                        ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
156                        ->setTargetMarkupPath($page)
157                        ->build();
158
159                }
160
161                return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED)
162                    ->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD)
163                    ->setTargetMarkupPath($page)
164                    ->build();
165
166            }
167            // permanent url not yet in the database
168            // Other permanent such as permanent canonical ?
169            // We let the process go with the new identifier
170
171        }
172
173        /**
174         * Identifier is a Canonical ?
175         */
176        $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
177        $canonicalPage = $canonicalDatabasePage->getMarkupPath();
178        if ($canonicalPage !== null && $canonicalPage->exists()) {
179            $builder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_CANONICAL)
180                ->setTargetMarkupPath($canonicalPage);
181            /**
182             * Does the canonical url is canonical name based
183             * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
184             */
185            if ($canonicalPage->getUrlId() === $identifier) {
186                $builder->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD);
187            } else {
188                $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD);
189            }
190            return $builder->build();
191
192        }
193
194        /**
195         * Identifier is an alias
196         */
197        $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
198        if (
199            $aliasRequestedPage !== null
200            && $aliasRequestedPage->exists()
201            // The build alias is the file system metadata alias
202            // it may be null if the replication in the database was not successful
203            && $aliasRequestedPage->getBuildAlias() !== null
204        ) {
205            $buildAlias = $aliasRequestedPage->getBuildAlias();
206            $builder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_ALIAS)
207                ->setTargetMarkupPath($aliasRequestedPage);
208            switch ($buildAlias->getType()) {
209                case AliasType::REDIRECT:
210                    return $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)->build();
211                case AliasType::SYNONYM:
212                    return $builder->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD)->build();
213                default:
214                    LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
215                    return $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)->build();
216            }
217        }
218
219        /**
220         * Do we have a page rules
221         * If there is a redirection defined in the page rules
222         */
223        try {
224            return $this->getRedirectionFromPageRules();
225        } catch (ExceptionNotFound $e) {
226            // no pages rules redirection
227        }
228
229        /**
230         * No redirection found in the database by id
231         */
232
233        /**
234         * Edit mode
235         */
236        $conf = ExecutionContext::getActualOrCreateFromEnv()->getConfig();
237        if (Identity::isWriter() && $conf->getBooleanValue(self::GO_TO_EDIT_MODE, true)) {
238
239            // Stop here
240            return RouterRedirectionBuilder::createFromOrigin(self::GO_TO_EDIT_MODE)
241                ->build();
242
243        }
244
245        /**
246         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
247         */
248        $actionReaderFirst = $conf->getValue('ActionReaderFirst');
249        if ($actionReaderFirst == self::NOTHING) {
250            throw new ExceptionNotFound();
251        }
252
253        // We are reader and their is no redirection set, we apply the algorithm
254        $readerAlgorithms = array();
255        $readerAlgorithms[0] = $actionReaderFirst;
256        $readerAlgorithms[1] = $conf->getValue('ActionReaderSecond');
257        $readerAlgorithms[2] = $conf->getValue('ActionReaderThird');
258
259        while (
260            ($algorithm = array_shift($readerAlgorithms)) != null
261        ) {
262
263            switch ($algorithm) {
264
265                case self::NOTHING:
266                    throw new ExceptionNotFound();
267
268                case self::GO_TO_BEST_END_PAGE_NAME:
269
270                    /**
271                     * @var MarkupPath $bestEndPage
272                     */
273                    list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
274                    if ($bestEndPage != null) {
275                        try {
276                            $notSamePage = $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId();
277                        } catch (ExceptionBadArgument $e) {
278                            LogUtility::error("The path should be wiki markup path", LogUtility::SUPPORT_CANONICAL, $e);
279                            $notSamePage = false;
280                        }
281                        if ($notSamePage) {
282                            $redirectionBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_END_PAGE_NAME)
283                                ->setTargetMarkupPath($bestEndPage);
284                            switch ($method) {
285                                case RouterRedirection::REDIRECT_PERMANENT_METHOD:
286                                    return $redirectionBuilder
287                                        ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
288                                        ->build();
289                                case RouterRedirection::REDIRECT_NOTFOUND_METHOD:
290                                    return $redirectionBuilder
291                                        ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD)
292                                        ->build();
293                                default:
294                                    LogUtility::error("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
295                            }
296                        }
297
298                    }
299                    break;
300
301                case self::GO_TO_NS_START_PAGE:
302
303                    $redirectBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_START_PAGE)
304                        ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD);
305
306                    // Start page with the conf['start'] parameter
307                    $startPage = getNS($identifier) . ':' . $conf['start'];
308                    $startPath = MarkupPath::createMarkupFromId($startPage);
309                    if (FileSystems::exists($startPath)) {
310                        return $redirectBuilder->setTargetMarkupPath($startPath)->build();
311                    }
312
313                    // Start page with the same name than the namespace
314                    $startPage = getNS($identifier) . ':' . curNS($identifier);
315                    $startPath = MarkupPath::createMarkupFromId($startPage);
316                    if (FileSystems::exists($startPath)) {
317                        return $redirectBuilder->setTargetMarkupPath($startPath)->build();
318                    }
319
320                    break;
321
322                case self::GO_TO_BEST_PAGE_NAME:
323
324                    $bestPageId = null;
325
326                    $bestPage = $this->getBestPage($identifier);
327                    $bestPageId = $bestPage['id'];
328                    $scorePageName = $bestPage['score'];
329
330                    // Get Score from a Namespace
331                    $bestNamespace = $this->scoreBestNamespace($identifier);
332                    $bestNamespaceId = $bestNamespace['namespace'];
333                    $namespaceScore = $bestNamespace['score'];
334
335                    // Compare the two score
336                    if ($scorePageName > 0 or $namespaceScore > 0) {
337                        $redirectionBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_PAGE_NAME)
338                            ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD);
339                        if ($scorePageName > $namespaceScore) {
340                            return $redirectionBuilder
341                                ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestPageId))
342                                ->build();
343                        }
344                        return $redirectionBuilder
345                            ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestNamespaceId))
346                            ->build();
347                    }
348                    break;
349
350                case self::GO_TO_BEST_NAMESPACE:
351
352                    $scoreNamespace = $this->scoreBestNamespace($identifier);
353                    $bestNamespaceId = $scoreNamespace['namespace'];
354                    $score = $scoreNamespace['score'];
355
356                    if ($score > 0) {
357                        return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_NAMESPACE)
358                            ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD)
359                            ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestNamespaceId))
360                            ->build();
361                    }
362                    break;
363
364                case self::GO_TO_SEARCH_ENGINE:
365
366                    return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_SEARCH_ENGINE)
367                        ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD)
368                        ->build();
369
370            }
371
372        }
373
374        throw new ExceptionNotFound();
375
376    }
377
378
379    /**
380     * @return string|null
381     *
382     * Return the original id from the request
383     * ie `howto:how-to-get-started-with-combostrap-m3i8vga8`
384     * if `/howto/how-to-get-started-with-combostrap-m3i8vga8`
385     *
386     * Unfortunately, DOKUWIKI_STARTED is not the first event
387     * The id may have been changed by
388     * {@link action_plugin_combo_lang::load_lang()}
389     * function, that's why we have this function
390     * to get the original requested id
391     */
392    static function getOriginalIdFromRequest(): ?string
393    {
394        $originalId = $_GET["id"] ?? null;
395        if ($originalId === null) {
396            return null;
397        }
398        // We may get a `/` as first character
399        // because we return an id, we need to delete it
400        if (substr($originalId, 0, 1) === "/") {
401            $originalId = substr($originalId, 1);
402        }
403        // transform / to :
404        return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
405    }
406
407    /**
408     * Return a redirection declared in the redirection table or throw if not found
409     * @throws ExceptionNotFound
410     */
411    private function getRedirectionFromPageRules(): RouterRedirection
412    {
413        global $ID;
414
415        $calculatedTarget = null;
416        $ruleMatcher = null; // Used in a warning message if the target page does not exist
417        // Known redirection in the table
418        // Get the page from redirection data
419        $rules = $this->pageRules->getRules();
420        foreach ($rules as $rule) {
421
422            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
423            $ruleTarget = $rule[PageRules::TARGET_NAME];
424
425            // Glob to Rexgexp
426            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
427
428            // Match ?
429            // https://www.php.net/manual/en/function.preg-match.php
430            $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
431            if ($pregMatchResult === false) {
432                // The `if` to take into account this problem
433                // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
434                LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
435                throw new ExceptionNotFound();
436            }
437            if ($pregMatchResult) {
438                $calculatedTarget = $ruleTarget;
439                foreach ($matches as $key => $match) {
440                    if ($key == 0) {
441                        continue;
442                    } else {
443                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
444                    }
445                }
446                break;
447            }
448        }
449
450        if ($calculatedTarget == null) {
451            throw new ExceptionNotFound();
452        }
453
454        // If this is an external redirect (other domain)
455        try {
456            $url = Url::createFromString($calculatedTarget);
457            // Unfortunately, the page id `my:page` is a valid url after parsing with the scheme `my`
458            try {
459                $isHttp = strpos($url->getScheme(), "http") === 0;
460            } catch (ExceptionNotFound $e) {
461                $isHttp = false;
462            }
463            if ($isHttp) {
464                return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PAGE_RULES)
465                    ->setTargetUrl($url)
466                    ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
467                    ->build();
468            }
469        } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
470            // not an URL
471        }
472
473
474        // If the page exist
475        // This is DokuWiki Id and should always be lowercase
476        // The page rule may have change that
477        $calculatedTarget = strtolower($calculatedTarget);
478        $markupPath = MarkupPath::createMarkupFromId($calculatedTarget);
479        if (FileSystems::exists($markupPath)) {
480
481            return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PAGE_RULES)
482                ->setTargetMarkupPath($markupPath)
483                ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
484                ->build();
485
486        }
487
488        LogUtility::error("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist");
489        throw new ExceptionNotFound();
490
491    }
492
493
494    /**
495     * @param $id
496     * @return array
497     */
498    private
499    function getBestPage($id): array
500    {
501
502        // The return parameters
503        $bestPageId = null;
504        $scorePageName = null;
505
506        // Get Score from a page
507        $pageName = noNS($id);
508        $pagesWithSameName = ft_pageLookup($pageName);
509        if (count($pagesWithSameName) > 0) {
510
511            // Search same namespace in the page found than in the Id page asked.
512            $bestNbWordFound = 0;
513
514
515            $wordsInPageSourceId = explode(':', $id);
516            foreach ($pagesWithSameName as $targetPageId => $title) {
517
518                // Nb of word found in the target page id
519                // that are in the source page id
520                $nbWordFound = 0;
521                foreach ($wordsInPageSourceId as $word) {
522                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
523                }
524
525                if ($bestPageId == null) {
526
527                    $bestNbWordFound = $nbWordFound;
528                    $bestPageId = $targetPageId;
529
530                } else {
531
532                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
533
534                        $bestNbWordFound = $nbWordFound;
535                        $bestPageId = $targetPageId;
536
537                    }
538
539                }
540
541            }
542            $config = ExecutionContext::getActualOrCreateFromEnv()->getConfig();
543            $weightFactorForSamePageName = $config->getValue('WeightFactorForSamePageName');
544            $weightFactorForSameNamespace = $config->getValue('WeightFactorForSameNamespace');
545            $scorePageName = $weightFactorForSamePageName + ($bestNbWordFound - 1) * $weightFactorForSameNamespace;
546            return array(
547                'id' => $bestPageId,
548                'score' => $scorePageName);
549        }
550        return array(
551            'id' => $bestPageId,
552            'score' => $scorePageName
553        );
554
555    }
556
557    /**
558     * getBestNamespace
559     * Return a list with 'BestNamespaceId Score'
560     * @param $id
561     * @return array
562     */
563    private
564    function scoreBestNamespace($id): array
565    {
566
567        $nameSpaces = array();
568        $pathNames = array();
569
570        // Parameters
571        $requestedPath = MarkupPath::createMarkupFromId($id);
572        try {
573            $pageNameSpace = $requestedPath->getParent();
574            $pathNames = array_slice($pageNameSpace->getNames(), 0, -1);
575            if (FileSystems::exists($pageNameSpace)) {
576                $nameSpaces = array($pageNameSpace->toAbsoluteId());
577            } else {
578                global $conf;
579                $nameSpaces = ft_pageLookup($conf['start']);
580            }
581        } catch (ExceptionNotFound $e) {
582            // no parent, root
583        }
584
585        // Parameters and search the best namespace
586        $bestNbWordFound = 0;
587        $bestNamespaceId = null;
588        foreach ($nameSpaces as $nameSpace) {
589
590            $nbWordFound = 0;
591            foreach ($pathNames as $pathName) {
592                if (strlen($pathName) > 2) {
593                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
594                }
595            }
596            if ($nbWordFound > $bestNbWordFound) {
597                // Take only the smallest namespace
598                if ($bestNbWordFound == null || strlen($nameSpace) < strlen($bestNamespaceId)) {
599                    $bestNbWordFound = $nbWordFound;
600                    $bestNamespaceId = $nameSpace;
601                }
602            }
603        }
604        $config = ExecutionContext::getActualOrCreateFromEnv()->getConfig();
605        $startPageFactor = $config->getValue('WeightFactorForStartPage');
606        $nameSpaceFactor = $config->getValue('WeightFactorForSameNamespace');
607        if ($bestNbWordFound > 0) {
608            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
609        } else {
610            $bestNamespaceScore = 0;
611        }
612
613
614        return array(
615            'namespace' => $bestNamespaceId,
616            'score' => $bestNamespaceScore
617        );
618
619    }
620
621
622}
623