1<?php 2 3namespace ComboStrap; 4 5use ComboStrap\Meta\Field\AliasType; 6use ComboStrap\Web\Url; 7 8class Router 9{ 10 11 12 public const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 13 public const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 14 public const GO_TO_EDIT_MODE = 'GoToEditMode'; 15 public const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 16 public const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 17 public const NOTHING = 'Nothing'; 18 public const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 19 private PageRules $pageRules; 20 21 /** 22 * @throws ExceptionSqliteNotAvailable 23 * @throws ExceptionNotFound - no redirection found 24 */ 25 public function getRedirection(): RouterRedirection 26 { 27 28 /** 29 * Without SQLite, this module does not work further 30 * It throws 31 */ 32 Sqlite::createOrGetSqlite(); 33 34 /** 35 * Initiate Page Rules 36 */ 37 $this->pageRules = new PageRules(); 38 39 40 /** 41 * Unfortunately, DOKUWIKI_STARTED is not the first event 42 * The id may have been changed by 43 * {@link action_plugin_combo_lang::load_lang()} 44 * function, that's why we check against the {@link $_REQUEST} 45 * and not the global ID 46 */ 47 $originalId = self::getOriginalIdFromRequest(); 48 49 /** 50 * Page is an existing id 51 * in the database ? 52 */ 53 global $ID; 54 $requestedMarkupPath = MarkupPath::createMarkupFromId($ID); 55 if (FileSystems::exists($requestedMarkupPath)) { 56 57 /** 58 * If this is not the root home page 59 * and if the canonical id is the not the same (the id has changed) 60 * and if this is not a historical page (revision) 61 * redirect 62 */ 63 if ( 64 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed 65 && $ID != Site::getIndexPageName() 66 && !isset($_REQUEST["rev"]) 67 ) { 68 /** 69 * TODO: When saving for the first time, the page is not stored in the database 70 * but that's not the case actually 71 */ 72 $databasePageRow = $requestedMarkupPath->getDatabasePage(); 73 if ($databasePageRow->exists()) { 74 /** 75 * A move may leave the database in a bad state, 76 * unfortunately (ie page is not in index, unable to update, ...) 77 * We test therefore if the database page id exists 78 */ 79 $targetPageId = $databasePageRow->getFromRow("id"); 80 $targetPath = MarkupPath::createMarkupFromId($targetPageId); 81 if (FileSystems::exists($targetPath)) { 82 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED) 83 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 84 ->setTargetMarkupPath($targetPath) 85 ->build(); 86 } 87 88 } 89 } 90 } 91 92 $identifier = $ID; 93 94 /** 95 * Page Id in the url 96 * Note that if the ID is a permalink, global $ID has already the real id 97 * Why? because unfortunately, DOKUWIKI_STARTED is not the first event 98 * {@link action_plugin_combo_lang::load_lang()} may have already 99 * transformed a permalink into a real dokuwiki id 100 * 101 * We let it here because we don't know for sure that it will stay this way 102 * What fucked up is fucked up 103 */ 104 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension()); 105 if ($shortPageId != null) { 106 $pageId = PageUrlPath::decodePageId($shortPageId); 107 } else { 108 /** 109 * Permalink with id 110 */ 111 $pageId = PageUrlPath::decodePageId($identifier); 112 } 113 if ($pageId !== null) { 114 115 if ($requestedMarkupPath->getParent() === null) { 116 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath(); 117 if ($page !== null && $page->exists()) { 118 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK) 119 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 120 ->setTargetMarkupPath($page) 121 ->build(); 122 } 123 } 124 125 /** 126 * Page Id Abbr ? 127 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 128 */ 129 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath(); 130 if ($page === null) { 131 // or the length of the abbr has changed 132 $canonicalDatabasePage = new DatabasePageRow(); 133 try { 134 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 135 $canonicalDatabasePage->setRow($row); 136 $page = $canonicalDatabasePage->getMarkupPath(); 137 } catch (ExceptionNotFound $e) { 138 // nothing to do 139 } 140 } 141 if ($page !== null && $page->exists()) { 142 /** 143 * If the url canonical id has changed, we show it 144 * to the writer by performing a permanent redirect 145 */ 146 if ($identifier != $page->getUrlId()) { 147 // Google asks for a redirect 148 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 149 // People access your site through several different URLs. 150 // If, for example, your home page can be reached in multiple ways 151 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 152 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 153 // and use redirects to send traffic from the other URLs to your preferred URL. 154 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED) 155 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 156 ->setTargetMarkupPath($page) 157 ->build(); 158 159 } 160 161 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED) 162 ->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD) 163 ->setTargetMarkupPath($page) 164 ->build(); 165 166 } 167 // permanent url not yet in the database 168 // Other permanent such as permanent canonical ? 169 // We let the process go with the new identifier 170 171 } 172 173 /** 174 * Identifier is a Canonical ? 175 */ 176 $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier); 177 $canonicalPage = $canonicalDatabasePage->getMarkupPath(); 178 if ($canonicalPage !== null && $canonicalPage->exists()) { 179 $builder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_CANONICAL) 180 ->setTargetMarkupPath($canonicalPage); 181 /** 182 * Does the canonical url is canonical name based 183 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 184 */ 185 if ($canonicalPage->getUrlId() === $identifier) { 186 $builder->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD); 187 } else { 188 $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD); 189 } 190 return $builder->build(); 191 192 } 193 194 /** 195 * Identifier is an alias 196 */ 197 $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath(); 198 if ( 199 $aliasRequestedPage !== null 200 && $aliasRequestedPage->exists() 201 // The build alias is the file system metadata alias 202 // it may be null if the replication in the database was not successful 203 && $aliasRequestedPage->getBuildAlias() !== null 204 ) { 205 $buildAlias = $aliasRequestedPage->getBuildAlias(); 206 $builder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_ALIAS) 207 ->setTargetMarkupPath($aliasRequestedPage); 208 switch ($buildAlias->getType()) { 209 case AliasType::REDIRECT: 210 return $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)->build(); 211 case AliasType::SYNONYM: 212 return $builder->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD)->build(); 213 default: 214 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 215 return $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)->build(); 216 } 217 } 218 219 /** 220 * Do we have a page rules 221 * If there is a redirection defined in the page rules 222 */ 223 try { 224 return $this->getRedirectionFromPageRules(); 225 } catch (ExceptionNotFound $e) { 226 // no pages rules redirection 227 } 228 229 /** 230 * No redirection found in the database by id 231 */ 232 233 /** 234 * Edit mode 235 */ 236 $conf = ExecutionContext::getActualOrCreateFromEnv()->getConfig(); 237 if (Identity::isWriter() && $conf->getBooleanValue(self::GO_TO_EDIT_MODE, true)) { 238 239 // Stop here 240 return RouterRedirectionBuilder::createFromOrigin(self::GO_TO_EDIT_MODE) 241 ->build(); 242 243 } 244 245 /** 246 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 247 */ 248 $actionReaderFirst = $conf->getValue('ActionReaderFirst'); 249 if ($actionReaderFirst == self::NOTHING) { 250 throw new ExceptionNotFound(); 251 } 252 253 // We are reader and their is no redirection set, we apply the algorithm 254 $readerAlgorithms = array(); 255 $readerAlgorithms[0] = $actionReaderFirst; 256 $readerAlgorithms[1] = $conf->getValue('ActionReaderSecond'); 257 $readerAlgorithms[2] = $conf->getValue('ActionReaderThird'); 258 259 while ( 260 ($algorithm = array_shift($readerAlgorithms)) != null 261 ) { 262 263 switch ($algorithm) { 264 265 case self::NOTHING: 266 throw new ExceptionNotFound(); 267 268 case self::GO_TO_BEST_END_PAGE_NAME: 269 270 /** 271 * @var MarkupPath $bestEndPage 272 */ 273 list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath); 274 if ($bestEndPage != null) { 275 try { 276 $notSamePage = $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId(); 277 } catch (ExceptionBadArgument $e) { 278 LogUtility::error("The path should be wiki markup path", LogUtility::SUPPORT_CANONICAL, $e); 279 $notSamePage = false; 280 } 281 if ($notSamePage) { 282 $redirectionBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_END_PAGE_NAME) 283 ->setTargetMarkupPath($bestEndPage); 284 switch ($method) { 285 case RouterRedirection::REDIRECT_PERMANENT_METHOD: 286 return $redirectionBuilder 287 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 288 ->build(); 289 case RouterRedirection::REDIRECT_NOTFOUND_METHOD: 290 return $redirectionBuilder 291 ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD) 292 ->build(); 293 default: 294 LogUtility::error("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 295 } 296 } 297 298 } 299 break; 300 301 case self::GO_TO_NS_START_PAGE: 302 303 $redirectBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_START_PAGE) 304 ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD); 305 306 // Start page with the conf['start'] parameter 307 $startPage = getNS($identifier) . ':' . $conf['start']; 308 $startPath = MarkupPath::createMarkupFromId($startPage); 309 if (FileSystems::exists($startPath)) { 310 return $redirectBuilder->setTargetMarkupPath($startPath)->build(); 311 } 312 313 // Start page with the same name than the namespace 314 $startPage = getNS($identifier) . ':' . curNS($identifier); 315 $startPath = MarkupPath::createMarkupFromId($startPage); 316 if (FileSystems::exists($startPath)) { 317 return $redirectBuilder->setTargetMarkupPath($startPath)->build(); 318 } 319 320 break; 321 322 case self::GO_TO_BEST_PAGE_NAME: 323 324 $bestPageId = null; 325 326 $bestPage = $this->getBestPage($identifier); 327 $bestPageId = $bestPage['id']; 328 $scorePageName = $bestPage['score']; 329 330 // Get Score from a Namespace 331 $bestNamespace = $this->scoreBestNamespace($identifier); 332 $bestNamespaceId = $bestNamespace['namespace']; 333 $namespaceScore = $bestNamespace['score']; 334 335 // Compare the two score 336 if ($scorePageName > 0 or $namespaceScore > 0) { 337 $redirectionBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_PAGE_NAME) 338 ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD); 339 if ($scorePageName > $namespaceScore) { 340 return $redirectionBuilder 341 ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestPageId)) 342 ->build(); 343 } 344 return $redirectionBuilder 345 ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestNamespaceId)) 346 ->build(); 347 } 348 break; 349 350 case self::GO_TO_BEST_NAMESPACE: 351 352 $scoreNamespace = $this->scoreBestNamespace($identifier); 353 $bestNamespaceId = $scoreNamespace['namespace']; 354 $score = $scoreNamespace['score']; 355 356 if ($score > 0) { 357 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_NAMESPACE) 358 ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD) 359 ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestNamespaceId)) 360 ->build(); 361 } 362 break; 363 364 case self::GO_TO_SEARCH_ENGINE: 365 366 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_SEARCH_ENGINE) 367 ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD) 368 ->build(); 369 370 } 371 372 } 373 374 throw new ExceptionNotFound(); 375 376 } 377 378 379 /** 380 * @return string|null 381 * 382 * Return the original id from the request 383 * ie `howto:how-to-get-started-with-combostrap-m3i8vga8` 384 * if `/howto/how-to-get-started-with-combostrap-m3i8vga8` 385 * 386 * Unfortunately, DOKUWIKI_STARTED is not the first event 387 * The id may have been changed by 388 * {@link action_plugin_combo_lang::load_lang()} 389 * function, that's why we have this function 390 * to get the original requested id 391 */ 392 static function getOriginalIdFromRequest(): ?string 393 { 394 $originalId = $_GET["id"] ?? null; 395 if ($originalId === null) { 396 return null; 397 } 398 // We may get a `/` as first character 399 // because we return an id, we need to delete it 400 if (substr($originalId, 0, 1) === "/") { 401 $originalId = substr($originalId, 1); 402 } 403 // transform / to : 404 return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId); 405 } 406 407 /** 408 * Return a redirection declared in the redirection table or throw if not found 409 * @throws ExceptionNotFound 410 */ 411 private function getRedirectionFromPageRules(): RouterRedirection 412 { 413 global $ID; 414 415 $calculatedTarget = null; 416 $ruleMatcher = null; // Used in a warning message if the target page does not exist 417 // Known redirection in the table 418 // Get the page from redirection data 419 $rules = $this->pageRules->getRules(); 420 foreach ($rules as $rule) { 421 422 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 423 $ruleTarget = $rule[PageRules::TARGET_NAME]; 424 425 // Glob to Rexgexp 426 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i'; 427 428 // Match ? 429 // https://www.php.net/manual/en/function.preg-match.php 430 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 431 if ($pregMatchResult === false) { 432 // The `if` to take into account this problem 433 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 434 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 435 throw new ExceptionNotFound(); 436 } 437 if ($pregMatchResult) { 438 $calculatedTarget = $ruleTarget; 439 foreach ($matches as $key => $match) { 440 if ($key == 0) { 441 continue; 442 } else { 443 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 444 } 445 } 446 break; 447 } 448 } 449 450 if ($calculatedTarget == null) { 451 throw new ExceptionNotFound(); 452 } 453 454 // If this is an external redirect (other domain) 455 try { 456 $url = Url::createFromString($calculatedTarget); 457 // Unfortunately, the page id `my:page` is a valid url after parsing with the scheme `my` 458 try { 459 $isHttp = strpos($url->getScheme(), "http") === 0; 460 } catch (ExceptionNotFound $e) { 461 $isHttp = false; 462 } 463 if ($isHttp) { 464 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PAGE_RULES) 465 ->setTargetUrl($url) 466 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 467 ->build(); 468 } 469 } catch (ExceptionBadSyntax|ExceptionBadArgument $e) { 470 // not an URL 471 } 472 473 474 // If the page exist 475 // This is DokuWiki Id and should always be lowercase 476 // The page rule may have change that 477 $calculatedTarget = strtolower($calculatedTarget); 478 $markupPath = MarkupPath::createMarkupFromId($calculatedTarget); 479 if (FileSystems::exists($markupPath)) { 480 481 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PAGE_RULES) 482 ->setTargetMarkupPath($markupPath) 483 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 484 ->build(); 485 486 } 487 488 LogUtility::error("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist"); 489 throw new ExceptionNotFound(); 490 491 } 492 493 494 /** 495 * @param $id 496 * @return array 497 */ 498 private 499 function getBestPage($id): array 500 { 501 502 // The return parameters 503 $bestPageId = null; 504 $scorePageName = null; 505 506 // Get Score from a page 507 $pageName = noNS($id); 508 $pagesWithSameName = ft_pageLookup($pageName); 509 if (count($pagesWithSameName) > 0) { 510 511 // Search same namespace in the page found than in the Id page asked. 512 $bestNbWordFound = 0; 513 514 515 $wordsInPageSourceId = explode(':', $id); 516 foreach ($pagesWithSameName as $targetPageId => $title) { 517 518 // Nb of word found in the target page id 519 // that are in the source page id 520 $nbWordFound = 0; 521 foreach ($wordsInPageSourceId as $word) { 522 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 523 } 524 525 if ($bestPageId == null) { 526 527 $bestNbWordFound = $nbWordFound; 528 $bestPageId = $targetPageId; 529 530 } else { 531 532 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 533 534 $bestNbWordFound = $nbWordFound; 535 $bestPageId = $targetPageId; 536 537 } 538 539 } 540 541 } 542 $config = ExecutionContext::getActualOrCreateFromEnv()->getConfig(); 543 $weightFactorForSamePageName = $config->getValue('WeightFactorForSamePageName'); 544 $weightFactorForSameNamespace = $config->getValue('WeightFactorForSameNamespace'); 545 $scorePageName = $weightFactorForSamePageName + ($bestNbWordFound - 1) * $weightFactorForSameNamespace; 546 return array( 547 'id' => $bestPageId, 548 'score' => $scorePageName); 549 } 550 return array( 551 'id' => $bestPageId, 552 'score' => $scorePageName 553 ); 554 555 } 556 557 /** 558 * getBestNamespace 559 * Return a list with 'BestNamespaceId Score' 560 * @param $id 561 * @return array 562 */ 563 private 564 function scoreBestNamespace($id): array 565 { 566 567 $nameSpaces = array(); 568 $pathNames = array(); 569 570 // Parameters 571 $requestedPath = MarkupPath::createMarkupFromId($id); 572 try { 573 $pageNameSpace = $requestedPath->getParent(); 574 $pathNames = array_slice($pageNameSpace->getNames(), 0, -1); 575 if (FileSystems::exists($pageNameSpace)) { 576 $nameSpaces = array($pageNameSpace->toAbsoluteId()); 577 } else { 578 global $conf; 579 $nameSpaces = ft_pageLookup($conf['start']); 580 } 581 } catch (ExceptionNotFound $e) { 582 // no parent, root 583 } 584 585 // Parameters and search the best namespace 586 $bestNbWordFound = 0; 587 $bestNamespaceId = null; 588 foreach ($nameSpaces as $nameSpace) { 589 590 $nbWordFound = 0; 591 foreach ($pathNames as $pathName) { 592 if (strlen($pathName) > 2) { 593 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 594 } 595 } 596 if ($nbWordFound > $bestNbWordFound) { 597 // Take only the smallest namespace 598 if ($bestNbWordFound == null || strlen($nameSpace) < strlen($bestNamespaceId)) { 599 $bestNbWordFound = $nbWordFound; 600 $bestNamespaceId = $nameSpace; 601 } 602 } 603 } 604 $config = ExecutionContext::getActualOrCreateFromEnv()->getConfig(); 605 $startPageFactor = $config->getValue('WeightFactorForStartPage'); 606 $nameSpaceFactor = $config->getValue('WeightFactorForSameNamespace'); 607 if ($bestNbWordFound > 0) { 608 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 609 } else { 610 $bestNamespaceScore = 0; 611 } 612 613 614 return array( 615 'namespace' => $bestNamespaceId, 616 'score' => $bestNamespaceScore 617 ); 618 619 } 620 621 622} 623