1<?php 2 3namespace ComboStrap; 4 5use ComboStrap\Meta\Field\AliasType; 6use ComboStrap\Web\Url; 7 8class Router 9{ 10 11 12 public const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 13 public const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 14 public const GO_TO_EDIT_MODE = 'GoToEditMode'; 15 public const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 16 public const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 17 public const NOTHING = 'Nothing'; 18 public const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 19 private PageRules $pageRules; 20 21 /** 22 * @throws ExceptionSqliteNotAvailable 23 * @throws ExceptionNotFound - no redirection found 24 */ 25 public function getRedirection(): RouterRedirection 26 { 27 28 /** 29 * Without SQLite, this module does not work further 30 * It throws 31 */ 32 Sqlite::createOrGetSqlite(); 33 34 /** 35 * Initiate Page Rules 36 */ 37 $this->pageRules = new PageRules(); 38 39 40 /** 41 * Unfortunately, DOKUWIKI_STARTED is not the first event 42 * The id may have been changed by 43 * {@link action_plugin_combo_lang::load_lang()} 44 * function, that's why we check against the {@link $_REQUEST} 45 * and not the global ID 46 */ 47 $originalId = self::getOriginalIdFromRequest(); 48 49 /** 50 * Page is an existing id 51 * in the database ? 52 */ 53 global $ID; 54 $requestedMarkupPath = MarkupPath::createMarkupFromId($ID); 55 if (FileSystems::exists($requestedMarkupPath)) { 56 57 /** 58 * If this is not the root home page 59 * and if the canonical id is the not the same (the id has changed) 60 * and if this is not a historical page (revision) 61 * redirect 62 */ 63 if ( 64 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed 65 && $ID != Site::getIndexPageName() 66 && !isset($_REQUEST["rev"]) 67 ) { 68 /** 69 * TODO: When saving for the first time, the page is not stored in the database 70 * but that's not the case actually 71 */ 72 $databasePageRow = $requestedMarkupPath->getDatabasePage(); 73 if ($databasePageRow->exists()) { 74 /** 75 * A move may leave the database in a bad state, 76 * unfortunately (ie page is not in index, unable to update, ...) 77 * We test therefore if the database page id exists 78 */ 79 $targetPageId = $databasePageRow->getFromRow("id"); 80 $targetPath = MarkupPath::createMarkupFromId($targetPageId); 81 if (FileSystems::exists($targetPath)) { 82 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED) 83 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 84 ->setTargetMarkupPath($targetPath) 85 ->build(); 86 } 87 88 } 89 } 90 } 91 92 $identifier = $ID; 93 94 /** 95 * Page Id in the url 96 */ 97 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension()); 98 if ($shortPageId != null) { 99 $pageId = PageUrlPath::decodePageId($shortPageId); 100 } else { 101 /** 102 * Permalink with id 103 */ 104 $pageId = PageUrlPath::decodePageId($identifier); 105 } 106 if ($pageId !== null) { 107 108 if ($requestedMarkupPath->getParent() === null) { 109 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath(); 110 if ($page !== null && $page->exists()) { 111 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK) 112 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 113 ->setTargetMarkupPath($page) 114 ->build(); 115 } 116 } 117 118 /** 119 * Page Id Abbr ? 120 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 121 */ 122 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath(); 123 if ($page === null) { 124 // or the length of the abbr has changed 125 $canonicalDatabasePage = new DatabasePageRow(); 126 try { 127 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 128 $canonicalDatabasePage->setRow($row); 129 $page = $canonicalDatabasePage->getMarkupPath(); 130 } catch (ExceptionNotFound $e) { 131 // nothing to do 132 } 133 } 134 if ($page !== null && $page->exists()) { 135 /** 136 * If the url canonical id has changed, we show it 137 * to the writer by performing a permanent redirect 138 */ 139 if ($identifier != $page->getUrlId()) { 140 // Google asks for a redirect 141 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 142 // People access your site through several different URLs. 143 // If, for example, your home page can be reached in multiple ways 144 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 145 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 146 // and use redirects to send traffic from the other URLs to your preferred URL. 147 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED) 148 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 149 ->setTargetMarkupPath($page) 150 ->build(); 151 152 } 153 154 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PERMALINK_EXTENDED) 155 ->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD) 156 ->setTargetMarkupPath($page) 157 ->build(); 158 159 } 160 // permanent url not yet in the database 161 // Other permanent such as permanent canonical ? 162 // We let the process go with the new identifier 163 164 } 165 166 /** 167 * Identifier is a Canonical ? 168 */ 169 $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier); 170 $canonicalPage = $canonicalDatabasePage->getMarkupPath(); 171 if ($canonicalPage !== null && $canonicalPage->exists()) { 172 $builder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_CANONICAL) 173 ->setTargetMarkupPath($canonicalPage); 174 /** 175 * Does the canonical url is canonical name based 176 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 177 */ 178 if ($canonicalPage->getUrlId() === $identifier) { 179 $builder->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD); 180 } else { 181 $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD); 182 } 183 return $builder->build(); 184 185 } 186 187 /** 188 * Identifier is an alias 189 */ 190 $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath(); 191 if ( 192 $aliasRequestedPage !== null 193 && $aliasRequestedPage->exists() 194 // The build alias is the file system metadata alias 195 // it may be null if the replication in the database was not successful 196 && $aliasRequestedPage->getBuildAlias() !== null 197 ) { 198 $buildAlias = $aliasRequestedPage->getBuildAlias(); 199 $builder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_ALIAS) 200 ->setTargetMarkupPath($aliasRequestedPage); 201 switch ($buildAlias->getType()) { 202 case AliasType::REDIRECT: 203 return $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)->build(); 204 case AliasType::SYNONYM: 205 return $builder->setType(RouterRedirection::REDIRECT_TRANSPARENT_METHOD)->build(); 206 default: 207 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 208 return $builder->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD)->build(); 209 } 210 } 211 212 /** 213 * Do we have a page rules 214 * If there is a redirection defined in the page rules 215 */ 216 try { 217 return $this->getRedirectionFromPageRules(); 218 } catch (ExceptionNotFound $e) { 219 // no pages rules redirection 220 } 221 222 /** 223 * No redirection found in the database by id 224 */ 225 226 /** 227 * Edit mode 228 */ 229 $conf = ExecutionContext::getActualOrCreateFromEnv()->getConfig(); 230 if (Identity::isWriter() && $conf->getBooleanValue(self::GO_TO_EDIT_MODE, true)) { 231 232 // Stop here 233 return RouterRedirectionBuilder::createFromOrigin(self::GO_TO_EDIT_MODE) 234 ->build(); 235 236 } 237 238 /** 239 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 240 */ 241 $actionReaderFirst = $conf->getValue('ActionReaderFirst'); 242 if ($actionReaderFirst == self::NOTHING) { 243 throw new ExceptionNotFound(); 244 } 245 246 // We are reader and their is no redirection set, we apply the algorithm 247 $readerAlgorithms = array(); 248 $readerAlgorithms[0] = $actionReaderFirst; 249 $readerAlgorithms[1] = $conf->getValue('ActionReaderSecond'); 250 $readerAlgorithms[2] = $conf->getValue('ActionReaderThird'); 251 252 while ( 253 ($algorithm = array_shift($readerAlgorithms)) != null 254 ) { 255 256 switch ($algorithm) { 257 258 case self::NOTHING: 259 throw new ExceptionNotFound(); 260 261 case self::GO_TO_BEST_END_PAGE_NAME: 262 263 /** 264 * @var MarkupPath $bestEndPage 265 */ 266 list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath); 267 if ($bestEndPage != null) { 268 try { 269 $notSamePage = $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId(); 270 } catch (ExceptionBadArgument $e) { 271 LogUtility::error("The path should be wiki markup path", LogUtility::SUPPORT_CANONICAL, $e); 272 $notSamePage = false; 273 } 274 if ($notSamePage) { 275 $redirectionBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_END_PAGE_NAME) 276 ->setTargetMarkupPath($bestEndPage); 277 switch ($method) { 278 case RouterRedirection::REDIRECT_PERMANENT_METHOD: 279 return $redirectionBuilder 280 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 281 ->build(); 282 case RouterRedirection::REDIRECT_NOTFOUND_METHOD: 283 return $redirectionBuilder 284 ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD) 285 ->build(); 286 default: 287 LogUtility::error("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 288 } 289 } 290 291 } 292 break; 293 294 case self::GO_TO_NS_START_PAGE: 295 296 $redirectBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_START_PAGE) 297 ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD); 298 299 // Start page with the conf['start'] parameter 300 $startPage = getNS($identifier) . ':' . $conf['start']; 301 $startPath = MarkupPath::createMarkupFromId($startPage); 302 if (FileSystems::exists($startPath)) { 303 return $redirectBuilder->setTargetMarkupPath($startPath)->build(); 304 } 305 306 // Start page with the same name than the namespace 307 $startPage = getNS($identifier) . ':' . curNS($identifier); 308 $startPath = MarkupPath::createMarkupFromId($startPage); 309 if (FileSystems::exists($startPath)) { 310 return $redirectBuilder->setTargetMarkupPath($startPath)->build(); 311 } 312 313 break; 314 315 case self::GO_TO_BEST_PAGE_NAME: 316 317 $bestPageId = null; 318 319 $bestPage = $this->getBestPage($identifier); 320 $bestPageId = $bestPage['id']; 321 $scorePageName = $bestPage['score']; 322 323 // Get Score from a Namespace 324 $bestNamespace = $this->scoreBestNamespace($identifier); 325 $bestNamespaceId = $bestNamespace['namespace']; 326 $namespaceScore = $bestNamespace['score']; 327 328 // Compare the two score 329 if ($scorePageName > 0 or $namespaceScore > 0) { 330 $redirectionBuilder = RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_PAGE_NAME) 331 ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD); 332 if ($scorePageName > $namespaceScore) { 333 return $redirectionBuilder 334 ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestPageId)) 335 ->build(); 336 } 337 return $redirectionBuilder 338 ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestNamespaceId)) 339 ->build(); 340 } 341 break; 342 343 case self::GO_TO_BEST_NAMESPACE: 344 345 $scoreNamespace = $this->scoreBestNamespace($identifier); 346 $bestNamespaceId = $scoreNamespace['namespace']; 347 $score = $scoreNamespace['score']; 348 349 if ($score > 0) { 350 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_BEST_NAMESPACE) 351 ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD) 352 ->setTargetMarkupPath(MarkupPath::createMarkupFromId($bestNamespaceId)) 353 ->build(); 354 } 355 break; 356 357 case self::GO_TO_SEARCH_ENGINE: 358 359 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_SEARCH_ENGINE) 360 ->setType(RouterRedirection::REDIRECT_NOTFOUND_METHOD) 361 ->build(); 362 363 } 364 365 } 366 367 throw new ExceptionNotFound(); 368 369 } 370 371 372 /** 373 * @return string|null 374 * 375 * Return the original id from the request 376 * ie `howto:how-to-get-started-with-combostrap-m3i8vga8` 377 * if `/howto/how-to-get-started-with-combostrap-m3i8vga8` 378 * 379 * Unfortunately, DOKUWIKI_STARTED is not the first event 380 * The id may have been changed by 381 * {@link action_plugin_combo_lang::load_lang()} 382 * function, that's why we have this function 383 * to get the original requested id 384 */ 385 static function getOriginalIdFromRequest(): ?string 386 { 387 $originalId = $_GET["id"] ?? null; 388 if ($originalId === null) { 389 return null; 390 } 391 // We may get a `/` as first character 392 // because we return an id, we need to delete it 393 if (substr($originalId, 0, 1) === "/") { 394 $originalId = substr($originalId, 1); 395 } 396 // transform / to : 397 return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId); 398 } 399 400 /** 401 * Return a redirection declared in the redirection table or throw if not found 402 * @throws ExceptionNotFound 403 */ 404 private function getRedirectionFromPageRules(): RouterRedirection 405 { 406 global $ID; 407 408 $calculatedTarget = null; 409 $ruleMatcher = null; // Used in a warning message if the target page does not exist 410 // Known redirection in the table 411 // Get the page from redirection data 412 $rules = $this->pageRules->getRules(); 413 foreach ($rules as $rule) { 414 415 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 416 $ruleTarget = $rule[PageRules::TARGET_NAME]; 417 418 // Glob to Rexgexp 419 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i'; 420 421 // Match ? 422 // https://www.php.net/manual/en/function.preg-match.php 423 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 424 if ($pregMatchResult === false) { 425 // The `if` to take into account this problem 426 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 427 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 428 throw new ExceptionNotFound(); 429 } 430 if ($pregMatchResult) { 431 $calculatedTarget = $ruleTarget; 432 foreach ($matches as $key => $match) { 433 if ($key == 0) { 434 continue; 435 } else { 436 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 437 } 438 } 439 break; 440 } 441 } 442 443 if ($calculatedTarget == null) { 444 throw new ExceptionNotFound(); 445 } 446 447 // If this is an external redirect (other domain) 448 try { 449 $url = Url::createFromString($calculatedTarget); 450 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PAGE_RULES) 451 ->setTargetUrl($url) 452 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 453 ->build(); 454 } catch (ExceptionBadSyntax|ExceptionBadArgument $e) { 455 // not an URL 456 } 457 458 459 // If the page exist 460 // This is DokuWiki Id and should always be lowercase 461 // The page rule may have change that 462 $calculatedTarget = strtolower($calculatedTarget); 463 $markupPath = MarkupPath::createMarkupFromId($calculatedTarget); 464 if (FileSystems::exists($markupPath)) { 465 466 return RouterRedirectionBuilder::createFromOrigin(RouterRedirection::TARGET_ORIGIN_PAGE_RULES) 467 ->setTargetMarkupPath($markupPath) 468 ->setType(RouterRedirection::REDIRECT_PERMANENT_METHOD) 469 ->build(); 470 471 } 472 473 LogUtility::error("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist"); 474 throw new ExceptionNotFound(); 475 476 } 477 478 479 /** 480 * @param $id 481 * @return array 482 */ 483 private 484 function getBestPage($id): array 485 { 486 487 // The return parameters 488 $bestPageId = null; 489 $scorePageName = null; 490 491 // Get Score from a page 492 $pageName = noNS($id); 493 $pagesWithSameName = ft_pageLookup($pageName); 494 if (count($pagesWithSameName) > 0) { 495 496 // Search same namespace in the page found than in the Id page asked. 497 $bestNbWordFound = 0; 498 499 500 $wordsInPageSourceId = explode(':', $id); 501 foreach ($pagesWithSameName as $targetPageId => $title) { 502 503 // Nb of word found in the target page id 504 // that are in the source page id 505 $nbWordFound = 0; 506 foreach ($wordsInPageSourceId as $word) { 507 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 508 } 509 510 if ($bestPageId == null) { 511 512 $bestNbWordFound = $nbWordFound; 513 $bestPageId = $targetPageId; 514 515 } else { 516 517 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 518 519 $bestNbWordFound = $nbWordFound; 520 $bestPageId = $targetPageId; 521 522 } 523 524 } 525 526 } 527 $config = ExecutionContext::getActualOrCreateFromEnv()->getConfig(); 528 $weightFactorForSamePageName = $config->getValue('WeightFactorForSamePageName'); 529 $weightFactorForSameNamespace = $config->getValue('WeightFactorForSameNamespace'); 530 $scorePageName = $weightFactorForSamePageName + ($bestNbWordFound - 1) * $weightFactorForSameNamespace; 531 return array( 532 'id' => $bestPageId, 533 'score' => $scorePageName); 534 } 535 return array( 536 'id' => $bestPageId, 537 'score' => $scorePageName 538 ); 539 540 } 541 542 /** 543 * getBestNamespace 544 * Return a list with 'BestNamespaceId Score' 545 * @param $id 546 * @return array 547 */ 548 private 549 function scoreBestNamespace($id): array 550 { 551 552 $nameSpaces = array(); 553 $pathNames = array(); 554 555 // Parameters 556 $requestedPath = MarkupPath::createMarkupFromId($id); 557 try { 558 $pageNameSpace = $requestedPath->getParent(); 559 $pathNames = array_slice($pageNameSpace->getNames(), 0, -1); 560 if (FileSystems::exists($pageNameSpace)) { 561 $nameSpaces = array($pageNameSpace->toAbsoluteId()); 562 } else { 563 global $conf; 564 $nameSpaces = ft_pageLookup($conf['start']); 565 } 566 } catch (ExceptionNotFound $e) { 567 // no parent, root 568 } 569 570 // Parameters and search the best namespace 571 $bestNbWordFound = 0; 572 $bestNamespaceId = null; 573 foreach ($nameSpaces as $nameSpace) { 574 575 $nbWordFound = 0; 576 foreach ($pathNames as $pathName) { 577 if (strlen($pathName) > 2) { 578 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 579 } 580 } 581 if ($nbWordFound > $bestNbWordFound) { 582 // Take only the smallest namespace 583 if ($bestNbWordFound == null || strlen($nameSpace) < strlen($bestNamespaceId)) { 584 $bestNbWordFound = $nbWordFound; 585 $bestNamespaceId = $nameSpace; 586 } 587 } 588 } 589 $config = ExecutionContext::getActualOrCreateFromEnv()->getConfig(); 590 $startPageFactor = $config->getValue('WeightFactorForStartPage'); 591 $nameSpaceFactor = $config->getValue('WeightFactorForSameNamespace'); 592 if ($bestNbWordFound > 0) { 593 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 594 } else { 595 $bestNamespaceScore = 0; 596 } 597 598 599 return array( 600 'namespace' => $bestNamespaceId, 601 'score' => $bestNamespaceScore 602 ); 603 604 } 605 606 607} 608