xref: /plugin/combo/ComboStrap/Router.php (revision 45a874f4355f8bee7459e5d3b79e86e68468b316)
1<?php
2
3namespace ComboStrap;
4
5use ComboStrap\Meta\Field\AliasType;
6use ComboStrap\Web\Url;
7
8class Router
9{
10
11
12    public const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine';
13    public const GO_TO_NS_START_PAGE = 'GoToNsStartPage';
14    public const GO_TO_EDIT_MODE = 'GoToEditMode';
15    public const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName';
16    public const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace';
17    public const NOTHING = 'Nothing';
18    public const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName';
19    private PageRules $pageRules;
20
21    /**
22     * @throws ExceptionSqliteNotAvailable
23     * @throws ExceptionNotFound - no redirection found
24     */
25    public function getRedirection(): RouterRedirection
26    {
27
28        /**
29         * Without SQLite, this module does not work further
30         * It throws
31         */
32        Sqlite::createOrGetSqlite();
33
34        /**
35         * Initiate Page Rules
36         */
37        $this->pageRules = new PageRules();
38
39
40        /**
41         * Unfortunately, DOKUWIKI_STARTED is not the first event
42         * The id may have been changed by
43         * {@link action_plugin_combo_lang::load_lang()}
44         * function, that's why we check against the {@link $_REQUEST}
45         * and not the global ID
46         */
47        $originalId = self::getOriginalIdFromRequest();
48
49        /**
50         * Page is an existing id
51         * in the database ?
52         */
53        global $ID;
54        $requestedMarkupPath = MarkupPath::createMarkupFromId($ID);
55        if (FileSystems::exists($requestedMarkupPath)) {
56
57            /**
58             * If this is not the root home page
59             * and if the canonical id is the not the same (the id has changed)
60             * and if this is not a historical page (revision)
61             * redirect
62             */
63            if (
64                $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed
65                && $ID != Site::getIndexPageName()
66                && !isset($_REQUEST["rev"])
67            ) {
68                /**
69                 * TODO: When saving for the first time, the page is not stored in the database
70                 *   but that's not the case actually
71                 */
72                $databasePageRow = $requestedMarkupPath->getDatabasePage();
73                if ($databasePageRow->exists()) {
74                    /**
75                     * A move may leave the database in a bad state,
76                     * unfortunately (ie page is not in index, unable to update, ...)
77                     * We test therefore if the database page id exists
78                     */
79                    $targetPageId = $databasePageRow->getFromRow("id");
80                    $targetPath = MarkupPath::createMarkupFromId($targetPageId);
81                    if (FileSystems::exists($targetPath)) {
82                        return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED)
83                            ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
84                            ->setTargetMarkupPath($targetPath)
85                            ->build();
86                    }
87
88                }
89            }
90        }
91
92        $identifier = $ID;
93
94        /**
95         * Page Id in the url
96         */
97        $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension());
98        if ($shortPageId != null) {
99            $pageId = PageUrlPath::decodePageId($shortPageId);
100        } else {
101            /**
102             * Permalink with id
103             */
104            $pageId = PageUrlPath::decodePageId($identifier);
105        }
106        if ($pageId !== null) {
107
108            if ($requestedMarkupPath->getParent() === null) {
109                $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath();
110                if ($page !== null && $page->exists()) {
111                    return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK)
112                        ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
113                        ->setTargetMarkupPath($page)
114                        ->build();
115                }
116            }
117
118            /**
119             * Page Id Abbr ?
120             * {@link PageUrlType::CONF_CANONICAL_URL_TYPE}
121             */
122            $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath();
123            if ($page === null) {
124                // or the length of the abbr has changed
125                $canonicalDatabasePage = new DatabasePageRow();
126                try {
127                    $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId);
128                    $canonicalDatabasePage->setRow($row);
129                    $page = $canonicalDatabasePage->getMarkupPath();
130                } catch (ExceptionNotFound $e) {
131                    // nothing to do
132                }
133            }
134            if ($page !== null && $page->exists()) {
135                /**
136                 * If the url canonical id has changed, we show it
137                 * to the writer by performing a permanent redirect
138                 */
139                if ($identifier != $page->getUrlId()) {
140                    // Google asks for a redirect
141                    // https://developers.google.com/search/docs/advanced/crawling/301-redirects
142                    // People access your site through several different URLs.
143                    // If, for example, your home page can be reached in multiple ways
144                    // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com),
145                    // it's a good idea to pick one of those URLs as your preferred (canonical) destination,
146                    // and use redirects to send traffic from the other URLs to your preferred URL.
147                    return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED)
148                        ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
149                        ->setTargetMarkupPath($page)
150                        ->build();
151
152                }
153
154                return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED)
155                    ->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD)
156                    ->setTargetMarkupPath($page)
157                    ->build();
158
159            }
160            // permanent url not yet in the database
161            // Other permanent such as permanent canonical ?
162            // We let the process go with the new identifier
163
164        }
165
166        /**
167         * Identifier is a Canonical ?
168         */
169        $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier);
170        $canonicalPage = $canonicalDatabasePage->getMarkupPath();
171        if ($canonicalPage !== null && $canonicalPage->exists()) {
172            $builder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_CANONICAL)
173                ->setTargetMarkupPath($canonicalPage);
174            /**
175             * Does the canonical url is canonical name based
176             * ie {@link  PageUrlType::CONF_VALUE_CANONICAL_PATH}
177             */
178            if ($canonicalPage->getUrlId() === $identifier) {
179                $builder->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD);
180            } else {
181                $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD);
182            }
183            return $builder->build();
184
185        }
186
187        /**
188         * Identifier is an alias
189         */
190        $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath();
191        if (
192            $aliasRequestedPage !== null
193            && $aliasRequestedPage->exists()
194            // The build alias is the file system metadata alias
195            // it may be null if the replication in the database was not successful
196            && $aliasRequestedPage->getBuildAlias() !== null
197        ) {
198            $buildAlias = $aliasRequestedPage->getBuildAlias();
199            $builder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_ALIAS)
200                ->setTargetMarkupPath($aliasRequestedPage);
201            switch ($buildAlias->getType()) {
202                case AliasType::REDIRECT:
203                    return $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)->build();
204                case AliasType::SYNONYM:
205                    return $builder->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD)->build();
206                default:
207                    LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier");
208                    return $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)->build();
209            }
210        }
211
212        /**
213         * Do we have a page rules
214         * If there is a redirection defined in the page rules
215         */
216        try {
217            return $this->getRedirectionFromPageRules();
218        } catch (ExceptionNotFound $e) {
219            // no pages rules redirection
220        }
221
222        /**
223         * No redirection found in the database by id
224         */
225
226        /**
227         * Edit mode
228         */
229        $conf = ExecutionContext::getActualOrCreateFromEnv()->getConfig();
230        if (Identity::isWriter() && $conf->getBooleanValue(self::GO_TO_EDIT_MODE, true)) {
231
232            // Stop here
233            return RouterRedirectionBuilder::createFromOrigin(self::GO_TO_EDIT_MODE)
234                ->build();
235
236        }
237
238        /**
239         *  We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other)
240         */
241        $actionReaderFirst = $conf->getValue('ActionReaderFirst');
242        if ($actionReaderFirst == self::NOTHING) {
243            throw new ExceptionNotFound();
244        }
245
246        // We are reader and their is no redirection set, we apply the algorithm
247        $readerAlgorithms = array();
248        $readerAlgorithms[0] = $actionReaderFirst;
249        $readerAlgorithms[1] = $conf->getValue('ActionReaderSecond');
250        $readerAlgorithms[2] = $conf->getValue('ActionReaderThird');
251
252        while (
253            ($algorithm = array_shift($readerAlgorithms)) != null
254        ) {
255
256            switch ($algorithm) {
257
258                case self::NOTHING:
259                    throw new ExceptionNotFound();
260
261                case self::GO_TO_BEST_END_PAGE_NAME:
262
263                    /**
264                     * @var MarkupPath $bestEndPage
265                     */
266                    list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath);
267                    if ($bestEndPage != null) {
268                        try {
269                            $notSamePage = $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId();
270                        } catch (ExceptionBadArgument $e) {
271                            LogUtility::error("The path should be wiki markup path", LogUtility::SUPPORT_CANONICAL, $e);
272                            $notSamePage = false;
273                        }
274                        if ($notSamePage) {
275                            $redirectionBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_END_PAGE_NAME)
276                                ->setTargetMarkupPath($bestEndPage);
277                            switch ($method) {
278                                case RouterRedirection::REDIRECT_PERMANENT_METHOD:
279                                    return $redirectionBuilder
280                                        ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
281                                        ->build();
282                                case RouterRedirection::REDIRECT_NOTFOUND_METHOD:
283                                    return $redirectionBuilder
284                                        ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD)
285                                        ->build();
286                                default:
287                                    LogUtility::error("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)");
288                            }
289                        }
290
291                    }
292                    break;
293
294                case self::GO_TO_NS_START_PAGE:
295
296                    $redirectBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_START_PAGE)
297                        ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD);
298
299                    // Start page with the conf['start'] parameter
300                    $startPage = getNS($identifier) . ':' . $conf['start'];
301                    $startPath = MarkupPath::createMarkupFromId($startPage);
302                    if (FileSystems::exists($startPath)) {
303                        return $redirectBuilder->setTargetMarkupPath($startPath)->build();
304                    }
305
306                    // Start page with the same name than the namespace
307                    $startPage = getNS($identifier) . ':' . curNS($identifier);
308                    $startPath = MarkupPath::createMarkupFromId($startPage);
309                    if (FileSystems::exists($startPath)) {
310                        return $redirectBuilder->setTargetMarkupPath($startPath)->build();
311                    }
312
313                    break;
314
315                case self::GO_TO_BEST_PAGE_NAME:
316
317                    $bestPageId = null;
318
319                    $bestPage = $this->getBestPage($identifier);
320                    $bestPageId = $bestPage['id'];
321                    $scorePageName = $bestPage['score'];
322
323                    // Get Score from a Namespace
324                    $bestNamespace = $this->scoreBestNamespace($identifier);
325                    $bestNamespaceId = $bestNamespace['namespace'];
326                    $namespaceScore = $bestNamespace['score'];
327
328                    // Compare the two score
329                    if ($scorePageName > 0 or $namespaceScore > 0) {
330                        $redirectionBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_PAGE_NAME)
331                            ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD);
332                        if ($scorePageName > $namespaceScore) {
333                            return $redirectionBuilder
334                                ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestPageId))
335                                ->build();
336                        }
337                        return $redirectionBuilder
338                            ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestNamespaceId))
339                            ->build();
340                    }
341                    break;
342
343                case self::GO_TO_BEST_NAMESPACE:
344
345                    $scoreNamespace = $this->scoreBestNamespace($identifier);
346                    $bestNamespaceId = $scoreNamespace['namespace'];
347                    $score = $scoreNamespace['score'];
348
349                    if ($score > 0) {
350                        return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_NAMESPACE)
351                            ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD)
352                            ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestNamespaceId))
353                            ->build();
354                    }
355                    break;
356
357                case self::GO_TO_SEARCH_ENGINE:
358
359                    return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_SEARCH_ENGINE)
360                        ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD)
361                        ->build();
362
363            }
364
365        }
366
367        throw new ExceptionNotFound();
368
369    }
370
371
372    /**
373     * @return string|null
374     *
375     * Return the original id from the request
376     * ie `howto:how-to-get-started-with-combostrap-m3i8vga8`
377     * if `/howto/how-to-get-started-with-combostrap-m3i8vga8`
378     *
379     * Unfortunately, DOKUWIKI_STARTED is not the first event
380     * The id may have been changed by
381     * {@link action_plugin_combo_lang::load_lang()}
382     * function, that's why we have this function
383     * to get the original requested id
384     */
385    static function getOriginalIdFromRequest(): ?string
386    {
387        $originalId = $_GET["id"] ?? null;
388        if ($originalId === null) {
389            return null;
390        }
391        // We may get a `/` as first character
392        // because we return an id, we need to delete it
393        if (substr($originalId, 0, 1) === "/") {
394            $originalId = substr($originalId, 1);
395        }
396        // transform / to :
397        return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId);
398    }
399
400    /**
401     * Return a redirection declared in the redirection table or throw if not found
402     * @throws ExceptionNotFound
403     */
404    private function getRedirectionFromPageRules(): RouterRedirection
405    {
406        global $ID;
407
408        $calculatedTarget = null;
409        $ruleMatcher = null; // Used in a warning message if the target page does not exist
410        // Known redirection in the table
411        // Get the page from redirection data
412        $rules = $this->pageRules->getRules();
413        foreach ($rules as $rule) {
414
415            $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]);
416            $ruleTarget = $rule[PageRules::TARGET_NAME];
417
418            // Glob to Rexgexp
419            $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i';
420
421            // Match ?
422            // https://www.php.net/manual/en/function.preg-match.php
423            $pregMatchResult = @preg_match($regexpPattern, $ID, $matches);
424            if ($pregMatchResult === false) {
425                // The `if` to take into account this problem
426                // PHP Warning:  preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972
427                LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING);
428                throw new ExceptionNotFound();
429            }
430            if ($pregMatchResult) {
431                $calculatedTarget = $ruleTarget;
432                foreach ($matches as $key => $match) {
433                    if ($key == 0) {
434                        continue;
435                    } else {
436                        $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget);
437                    }
438                }
439                break;
440            }
441        }
442
443        if ($calculatedTarget == null) {
444            throw new ExceptionNotFound();
445        }
446
447        // If this is an external redirect (other domain)
448        try {
449            $url = Url::createFromString($calculatedTarget);
450            return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PAGE_RULES)
451                ->setTargetUrl($url)
452                ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
453                ->build();
454        } catch (ExceptionBadSyntax|ExceptionBadArgument $e) {
455            // not an URL
456        }
457
458
459        // If the page exist
460        // This is DokuWiki Id and should always be lowercase
461        // The page rule may have change that
462        $calculatedTarget = strtolower($calculatedTarget);
463        $markupPath = MarkupPath::createMarkupFromId($calculatedTarget);
464        if (FileSystems::exists($markupPath)) {
465
466            return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PAGE_RULES)
467                ->setTargetMarkupPath($markupPath)
468                ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)
469                ->build();
470
471        }
472
473        LogUtility::error("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist");
474        throw new ExceptionNotFound();
475
476    }
477
478
479    /**
480     * @param $id
481     * @return array
482     */
483    private
484    function getBestPage($id): array
485    {
486
487        // The return parameters
488        $bestPageId = null;
489        $scorePageName = null;
490
491        // Get Score from a page
492        $pageName = noNS($id);
493        $pagesWithSameName = ft_pageLookup($pageName);
494        if (count($pagesWithSameName) > 0) {
495
496            // Search same namespace in the page found than in the Id page asked.
497            $bestNbWordFound = 0;
498
499
500            $wordsInPageSourceId = explode(':', $id);
501            foreach ($pagesWithSameName as $targetPageId => $title) {
502
503                // Nb of word found in the target page id
504                // that are in the source page id
505                $nbWordFound = 0;
506                foreach ($wordsInPageSourceId as $word) {
507                    $nbWordFound = $nbWordFound + substr_count($targetPageId, $word);
508                }
509
510                if ($bestPageId == null) {
511
512                    $bestNbWordFound = $nbWordFound;
513                    $bestPageId = $targetPageId;
514
515                } else {
516
517                    if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) {
518
519                        $bestNbWordFound = $nbWordFound;
520                        $bestPageId = $targetPageId;
521
522                    }
523
524                }
525
526            }
527            $config = ExecutionContext::getActualOrCreateFromEnv()->getConfig();
528            $weightFactorForSamePageName = $config->getValue('WeightFactorForSamePageName');
529            $weightFactorForSameNamespace = $config->getValue('WeightFactorForSameNamespace');
530            $scorePageName = $weightFactorForSamePageName + ($bestNbWordFound - 1) * $weightFactorForSameNamespace;
531            return array(
532                'id' => $bestPageId,
533                'score' => $scorePageName);
534        }
535        return array(
536            'id' => $bestPageId,
537            'score' => $scorePageName
538        );
539
540    }
541
542    /**
543     * getBestNamespace
544     * Return a list with 'BestNamespaceId Score'
545     * @param $id
546     * @return array
547     */
548    private
549    function scoreBestNamespace($id): array
550    {
551
552        $nameSpaces = array();
553        $pathNames = array();
554
555        // Parameters
556        $requestedPath = MarkupPath::createMarkupFromId($id);
557        try {
558            $pageNameSpace = $requestedPath->getParent();
559            $pathNames = array_slice($pageNameSpace->getNames(), 0, -1);
560            if (FileSystems::exists($pageNameSpace)) {
561                $nameSpaces = array($pageNameSpace->toAbsoluteId());
562            } else {
563                global $conf;
564                $nameSpaces = ft_pageLookup($conf['start']);
565            }
566        } catch (ExceptionNotFound $e) {
567            // no parent, root
568        }
569
570        // Parameters and search the best namespace
571        $bestNbWordFound = 0;
572        $bestNamespaceId = null;
573        foreach ($nameSpaces as $nameSpace) {
574
575            $nbWordFound = 0;
576            foreach ($pathNames as $pathName) {
577                if (strlen($pathName) > 2) {
578                    $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName);
579                }
580            }
581            if ($nbWordFound > $bestNbWordFound) {
582                // Take only the smallest namespace
583                if ($bestNbWordFound == null || strlen($nameSpace) < strlen($bestNamespaceId)) {
584                    $bestNbWordFound = $nbWordFound;
585                    $bestNamespaceId = $nameSpace;
586                }
587            }
588        }
589        $config = ExecutionContext::getActualOrCreateFromEnv()->getConfig();
590        $startPageFactor = $config->getValue('WeightFactorForStartPage');
591        $nameSpaceFactor = $config->getValue('WeightFactorForSameNamespace');
592        if ($bestNbWordFound > 0) {
593            $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor;
594        } else {
595            $bestNamespaceScore = 0;
596        }
597
598
599        return array(
600            'namespace' => $bestNamespaceId,
601            'score' => $bestNamespaceScore
602        );
603
604    }
605
606
607}
608