1<?php 2 3require_once(__DIR__ . '/../ComboStrap/PluginUtility.php'); 4 5 6use ComboStrap\Alias; 7use ComboStrap\AliasType; 8use ComboStrap\DatabasePageRow; 9use ComboStrap\DokuPath; 10use ComboStrap\ExceptionCombo; 11use ComboStrap\HttpResponse; 12use ComboStrap\Identity; 13use ComboStrap\LogUtility; 14use ComboStrap\Mime; 15use ComboStrap\Page; 16use ComboStrap\PageId; 17use ComboStrap\PageRules; 18use ComboStrap\PageUrlPath; 19use ComboStrap\PluginUtility; 20use ComboStrap\Site; 21use ComboStrap\Sqlite; 22use ComboStrap\Url; 23use ComboStrap\UrlManagerBestEndPage; 24use ComboStrap\PageUrlType; 25 26 27/** 28 * Class action_plugin_combo_url 29 * 30 * The actual URL manager 31 * 32 * 33 */ 34class action_plugin_combo_router extends DokuWiki_Action_Plugin 35{ 36 37 /** 38 * @deprecated 39 */ 40 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 41 const ROUTER_ENABLE_CONF = "enableRouter"; 42 43 // The redirect type 44 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 45 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 46 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 47 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 48 49 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 50 51 // Where the target id value comes from 52 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 53 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 54 /** 55 * Named Permalink (canonical) 56 */ 57 const TARGET_ORIGIN_CANONICAL = 'canonical'; 58 const TARGET_ORIGIN_ALIAS = 'alias'; 59 /** 60 * Identifier Permalink (full page id) 61 */ 62 const TARGET_ORIGIN_PERMALINK = "permalink"; 63 /** 64 * Extended Permalink (abbreviated page id at the end) 65 */ 66 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 67 const TARGET_ORIGIN_START_PAGE = 'startPage'; 68 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 69 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 70 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 71 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 72 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 73 74 75 // The constant parameters 76 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 77 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 78 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 79 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 80 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 81 const GO_TO_EDIT_MODE = 'GoToEditMode'; 82 const NOTHING = 'Nothing'; 83 84 /** @var string - a name used in log and other places */ 85 const NAME = 'Url Manager'; 86 const CANONICAL = 'router'; 87 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 88 const REFRESH_HEADER_NAME = "Refresh"; 89 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 90 const LOCATION_HEADER_NAME = "Location"; 91 const LOCATION_HEADER_PREFIX = self::LOCATION_HEADER_NAME . ": "; 92 public const URL_MANAGER_NAME = "Router"; 93 94 95 /** 96 * @var PageRules 97 */ 98 private $pageRules; 99 100 101 function __construct() 102 { 103 // enable direct access to language strings 104 // ie $this->lang 105 $this->setupLocale(); 106 107 } 108 109 /** 110 * @param $refreshHeader 111 * @return false|string 112 */ 113 public static function getUrlFromRefresh($refreshHeader) 114 { 115 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 116 } 117 118 public static function getUrlFromLocation($refreshHeader) 119 { 120 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 121 } 122 123 /** 124 * @return array|mixed|string|string[] 125 * 126 * Unfortunately, DOKUWIKI_STARTED is not the first event 127 * The id may have been changed by 128 * {@link action_plugin_combo_metalang::load_lang()} 129 * function, that's why we have this function 130 * to get the original requested id 131 */ 132 private static function getOriginalIdFromRequest() 133 { 134 $originalId = $_GET["id"]; 135 return str_replace("/", DokuPath::PATH_SEPARATOR, $originalId); 136 } 137 138 /** 139 * Determine if the request should be banned based on the id 140 * 141 * @param string $id 142 * @return bool 143 * 144 * See also {@link https://perishablepress.com/7g-firewall/#features} 145 * for blocking rules on http request data such as: 146 * * query_string 147 * * user_agent, 148 * * remote host 149 */ 150 public static function isShadowBanned(string $id): bool 151 { 152 /** 153 * ie 154 * wp-json:api:flutter_woo:config_file 155 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 156 * wp-admin 157 * 2020:wp-includes:wlwmanifest.xml 158 * wp-content:start 159 * wp-admin:css:start 160 * sito:wp-includes:wlwmanifest.xml 161 * site:wp-includes:wlwmanifest.xml 162 * cms:wp-includes:wlwmanifest.xml 163 * test:wp-includes:wlwmanifest.xml 164 * media:wp-includes:wlwmanifest.xml 165 * wp2:wp-includes:wlwmanifest.xml 166 * 2019:wp-includes:wlwmanifest.xml 167 * shop:wp-includes:wlwmanifest.xml 168 * wp1:wp-includes:wlwmanifest.xml 169 * news:wp-includes:wlwmanifest.xml 170 * 2018:wp-includes:wlwmanifest.xml 171 */ 172 if (strpos($id, 'wp-') !== false) { 173 return true; 174 } 175 176 /** 177 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 178 * db:oracle:999999.9:union:all:select_null:from_dual 179 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 180 */ 181 if (preg_match('/_chr_|_0_0/', $id) === 1) { 182 return true; 183 } 184 185 186 /** 187 * ie 188 * git:objects: 189 * git:refs:heads:stable 190 * git:logs:refs:heads:main 191 * git:logs:refs:heads:stable 192 * git:hooks:pre-push.sample 193 * git:hooks:pre-receive.sample 194 */ 195 if (strpos($id, "git:") === 0) { 196 return true; 197 } 198 199 return false; 200 201 } 202 203 /** 204 * @param string $id 205 * @return bool 206 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 207 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 208 * well-known:dnt-policy.txt 209 */ 210 public static function isWellKnownFile(string $id): bool 211 { 212 return strpos($id, "well-known") === 0; 213 } 214 215 216 function register(Doku_Event_Handler $controller) 217 { 218 219 if (PluginUtility::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 220 /** 221 * This will call the function {@link action_plugin_combo_router::_router()} 222 * The event is not DOKUWIKI_STARTED because this is not the first one 223 * 224 * https://www.dokuwiki.org/devel:event:init_lang_load 225 */ 226 $controller->register_hook('DOKUWIKI_STARTED', 227 'AFTER', 228 $this, 229 'router', 230 array()); 231 232 /** 233 * This is the real first call of Dokuwiki 234 * Unfortunately, it does not create the environment 235 * We just ban to spare server resources 236 * 237 * https://www.dokuwiki.org/devel:event:init_lang_load 238 */ 239 $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array()); 240 241 } 242 243 244 } 245 246 /** 247 * 248 * We have created a spacial ban function that is 249 * called before the first function 250 * {@link action_plugin_combo_metalang::load_lang()} 251 * to spare CPU. 252 * 253 * @param $event 254 * @throws Exception 255 */ 256 function ban(&$event) 257 { 258 259 $id = self::getOriginalIdFromRequest(); 260 $page = Page::createPageFromId($id); 261 if (!$page->exists()) { 262 // Well known 263 if (self::isWellKnownFile($id)) { 264 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 265 HttpResponse::create(HttpResponse::STATUS_NOT_FOUND) 266 ->send(); 267 return; 268 } 269 270 // Shadow banned 271 if (self::isShadowBanned($id)) { 272 $webSiteHomePage = Site::getHomePageName(); 273 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 274 } 275 } 276 } 277 278 /** 279 * @param $event Doku_Event 280 * @param $param 281 * @return void 282 * @throws Exception 283 */ 284 function router(&$event, $param) 285 { 286 287 global $ACT; 288 if ($ACT !== 'show') return; 289 290 291 global $ID; 292 293 /** 294 * Without SQLite, this module does not work further 295 */ 296 $sqlite = Sqlite::createOrGetSqlite(); 297 if ($sqlite == null) { 298 return; 299 } else { 300 $this->pageRules = new PageRules(); 301 } 302 303 /** 304 * Unfortunately, DOKUWIKI_STARTED is not the first event 305 * The id may have been changed by 306 * {@link action_plugin_combo_metalang::load_lang()} 307 * function, that's why we check against the {@link $_REQUEST} 308 * and not the global ID 309 */ 310 $originalId = self::getOriginalIdFromRequest(); 311 312 /** 313 * Page is an existing id ? 314 */ 315 $targetPage = Page::createPageFromId($ID); 316 if ($targetPage->exists()) { 317 318 /** 319 * If this is not the root home page 320 * and if the canonical id is the not the same, 321 * and if this is not a historical page (revision) 322 * redirect 323 */ 324 if ( 325 $originalId !== $targetPage->getUrlId() // The id may have been changed 326 && $ID != Site::getHomePageName() 327 && !isset($_REQUEST["rev"]) 328 ) { 329 $this->executePermanentRedirect( 330 $targetPage->getCanonicalUrl([], true), 331 self::TARGET_ORIGIN_PERMALINK_EXTENDED 332 ); 333 } 334 return; 335 } 336 337 338 $identifier = $ID; 339 340 341 /** 342 * Page Id Website / root Permalink ? 343 */ 344 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($targetPage->getPath()->getLastName()); 345 if ($shortPageId !== null) { 346 $pageId = PageUrlPath::decodePageId($shortPageId); 347 if ($targetPage->getParentPage() === null && $pageId !== null) { 348 $page = DatabasePageRow::createFromPageId($pageId)->getPage(); 349 if ($page !== null && $page->exists()) { 350 $this->executePermanentRedirect($page->getCanonicalUrl(), self::TARGET_ORIGIN_PERMALINK); 351 } 352 } 353 354 /** 355 * Page Id Abbr ? 356 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 357 */ 358 if ( 359 $pageId !== null 360 ) { 361 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getPage(); 362 if ($page === null) { 363 // or the length of the abbr has changed 364 $databasePage = new DatabasePageRow(); 365 $row = $databasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 366 if ($row != null) { 367 $databasePage->setRow($row); 368 $page = $databasePage->getPage(); 369 } 370 } 371 if ($page !== null && $page->exists()) { 372 /** 373 * If the url canonical id has changed, we show it 374 * to the writer by performing a permanent redirect 375 */ 376 if ($identifier != $page->getUrlId()) { 377 // Google asks for a redirect 378 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 379 // People access your site through several different URLs. 380 // If, for example, your home page can be reached in multiple ways 381 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 382 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 383 // and use redirects to send traffic from the other URLs to your preferred URL. 384 $this->executePermanentRedirect( 385 $page->getCanonicalUrl([], true), 386 self::TARGET_ORIGIN_PERMALINK_EXTENDED 387 ); 388 return; 389 } 390 $this->executeTransparentRedirect($page->getDokuwikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 391 return; 392 393 } 394 // permanent url not yet in the database 395 // Other permanent such as permanent canonical ? 396 // We let the process go with the new identifier 397 398 } 399 400 } 401 402 // Global variable needed in the process 403 global $conf; 404 405 /** 406 * Identifier is a Canonical ? 407 */ 408 $databasePage = DatabasePageRow::createFromCanonical($identifier); 409 $targetPage = $databasePage->getPage(); 410 if ($targetPage !== null && $targetPage->exists()) { 411 $res = $this->executePermanentRedirect($targetPage->getDokuwikiId(), self::TARGET_ORIGIN_CANONICAL); 412 if ($res) { 413 return; 414 } 415 } 416 417 /** 418 * Identifier is an alias 419 */ 420 $targetPage = DatabasePageRow::createFromAlias($identifier)->getPage(); 421 if ( 422 $targetPage !== null 423 && $targetPage->exists() 424 // The build alias is the file system metadata alias 425 // it may be null if the replication in the database was not successful 426 && $targetPage->getBuildAlias() !== null 427 ) { 428 $buildAlias = $targetPage->getBuildAlias(); 429 switch ($buildAlias->getType()) { 430 case AliasType::REDIRECT: 431 $res = $this->executePermanentRedirect($targetPage->getCanonicalUrl(), self::TARGET_ORIGIN_ALIAS); 432 if ($res) { 433 return; 434 } 435 break; 436 case AliasType::SYNONYM: 437 $res = $this->executeTransparentRedirect($targetPage->getDokuwikiId(), self::TARGET_ORIGIN_ALIAS); 438 if ($res) { 439 return; 440 } 441 break; 442 default: 443 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 444 $res = $this->executePermanentRedirect($targetPage->getCanonicalUrl(), self::TARGET_ORIGIN_ALIAS); 445 if ($res) { 446 return; 447 } 448 break; 449 } 450 } 451 452 453 // If there is a redirection defined in the page rules 454 $result = $this->processingPageRules(); 455 if ($result) { 456 // A redirection has occurred 457 // finish the process 458 return; 459 } 460 461 /** 462 * 463 * There was no redirection found, redirect to edit mode if writer 464 * 465 */ 466 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 467 468 $this->gotToEditMode($event); 469 // Stop here 470 return; 471 472 } 473 474 /* 475 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 476 */ 477 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 478 return; 479 } 480 481 // We are reader and their is no redirection set, we apply the algorithm 482 $readerAlgorithms = array(); 483 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 484 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 485 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 486 487 while ( 488 ($algorithm = array_shift($readerAlgorithms)) != null 489 ) { 490 491 switch ($algorithm) { 492 493 case self::NOTHING: 494 return; 495 496 case self::GO_TO_BEST_END_PAGE_NAME: 497 498 list($targetPage, $method) = UrlManagerBestEndPage::process($identifier); 499 if ($targetPage != null) { 500 $res = false; 501 switch ($method) { 502 case self::REDIRECT_PERMANENT_METHOD: 503 $res = $this->executePermanentRedirect($targetPage, self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 504 break; 505 case self::REDIRECT_NOTFOUND_METHOD: 506 $res = $this->performNotFoundRedirect($targetPage, self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 507 break; 508 default: 509 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 510 } 511 if ($res) { 512 // Redirection has succeeded 513 return; 514 } 515 } 516 break; 517 518 case self::GO_TO_NS_START_PAGE: 519 520 // Start page with the conf['start'] parameter 521 $startPage = getNS($identifier) . ':' . $conf['start']; 522 if (page_exists($startPage)) { 523 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 524 if ($res) { 525 return; 526 } 527 } 528 529 // Start page with the same name than the namespace 530 $startPage = getNS($identifier) . ':' . curNS($identifier); 531 if (page_exists($startPage)) { 532 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 533 if ($res) { 534 return; 535 } 536 } 537 break; 538 539 case self::GO_TO_BEST_PAGE_NAME: 540 541 $bestPageId = null; 542 543 $bestPage = $this->getBestPage($identifier); 544 $bestPageId = $bestPage['id']; 545 $scorePageName = $bestPage['score']; 546 547 // Get Score from a Namespace 548 $bestNamespace = $this->scoreBestNamespace($identifier); 549 $bestNamespaceId = $bestNamespace['namespace']; 550 $namespaceScore = $bestNamespace['score']; 551 552 // Compare the two score 553 if ($scorePageName > 0 or $namespaceScore > 0) { 554 if ($scorePageName > $namespaceScore) { 555 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 556 } else { 557 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 558 } 559 return; 560 } 561 break; 562 563 case self::GO_TO_BEST_NAMESPACE: 564 565 $scoreNamespace = $this->scoreBestNamespace($identifier); 566 $bestNamespaceId = $scoreNamespace['namespace']; 567 $score = $scoreNamespace['score']; 568 569 if ($score > 0) { 570 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 571 return; 572 } 573 break; 574 575 case self::GO_TO_SEARCH_ENGINE: 576 577 $this->redirectToSearchEngine(); 578 579 return; 580 break; 581 582 // End Switch Action 583 } 584 585 // End While Action 586 } 587 588 589 } 590 591 592 /** 593 * getBestNamespace 594 * Return a list with 'BestNamespaceId Score' 595 * @param $id 596 * @return array 597 */ 598 private 599 function scoreBestNamespace($id) 600 { 601 602 global $conf; 603 604 // Parameters 605 $pageNameSpace = getNS($id); 606 607 // If the page has an existing namespace start page take it, other search other namespace 608 $startPageNameSpace = $pageNameSpace . ":"; 609 $dateAt = ''; 610 // $startPageNameSpace will get a full path (ie with start or the namespace 611 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 612 if (page_exists($startPageNameSpace)) { 613 $nameSpaces = array($startPageNameSpace); 614 } else { 615 $nameSpaces = ft_pageLookup($conf['start']); 616 } 617 618 // Parameters and search the best namespace 619 $pathNames = explode(':', $pageNameSpace); 620 $bestNbWordFound = 0; 621 $bestNamespaceId = ''; 622 foreach ($nameSpaces as $nameSpace) { 623 624 $nbWordFound = 0; 625 foreach ($pathNames as $pathName) { 626 if (strlen($pathName) > 2) { 627 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 628 } 629 } 630 if ($nbWordFound > $bestNbWordFound) { 631 // Take only the smallest namespace 632 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 633 $bestNbWordFound = $nbWordFound; 634 $bestNamespaceId = $nameSpace; 635 } 636 } 637 } 638 639 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 640 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 641 if ($bestNbWordFound > 0) { 642 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 643 } else { 644 $bestNamespaceScore = 0; 645 } 646 647 648 return array( 649 'namespace' => $bestNamespaceId, 650 'score' => $bestNamespaceScore 651 ); 652 653 } 654 655 /** 656 * @param $event 657 */ 658 private 659 function gotToEditMode(&$event) 660 { 661 global $ACT; 662 $ACT = 'edit'; 663 664 } 665 666 667 /** 668 * Redirect to an internal page ie: 669 * * on the same domain 670 * * no HTTP redirect 671 * * id rewrite 672 * @param string $targetPageId - target page id 673 * @param string $targetOriginId - the source of the target (redirect) 674 * @return bool - return true if the user has the permission and that the redirect was done 675 * @throws Exception 676 */ 677 private 678 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 679 { 680 /** 681 * Because we set the ID globally for the ID redirect 682 * we make sure that this is not a {@link Page} 683 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 684 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 685 */ 686 if (is_object($targetPageId)) { 687 $class = get_class($targetPageId); 688 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 689 } 690 691 if (is_object($targetOriginId)) { 692 $class = get_class($targetOriginId); 693 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 694 } 695 696 // If the user does not have the right to see the target page 697 // don't do anything 698 if (!(Identity::isReader($targetPageId))) { 699 return false; 700 } 701 702 // Change the id 703 global $ID; 704 global $INFO; 705 $sourceId = $ID; 706 $ID = $targetPageId; 707 // Change the info id for the sidebar 708 $INFO['id'] = $targetPageId; 709 /** 710 * otherwise there is: 711 * * a meta robot = noindex,follow 712 * See {@link tpl_metaheaders()} 713 */ 714 $INFO['exists'] = true; 715 716 /** 717 * Not compatible with 718 * https://www.dokuwiki.org/config:send404 is enabled 719 * 720 * This check happens before that dokuwiki is started 721 * and send an header in doku.php 722 * 723 * We send a warning 724 */ 725 global $conf; 726 if ($conf['send404'] == true) { 727 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 728 } 729 730 // Redirection 731 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 732 733 return true; 734 735 } 736 737 private function executePermanentRedirect(string $target, $targetOrigin): bool 738 { 739 return $this->executeHttpRedirect($target, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 740 } 741 742 /** 743 * The general HTTP Redirect method to an internal page 744 * where the redirection method decide which type of redirection 745 * @param string $target - a dokuwiki id or an url 746 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 747 * @param string $method - the redirection method 748 */ 749 private 750 function executeHttpRedirect(string $target, string $targetOrigin, string $method): bool 751 { 752 753 global $ID; 754 755 756 // Log the redirections 757 $this->logRedirection($ID, $target, $targetOrigin, $method); 758 759 760 // An external url ? 761 if (Url::isValidURL($target)) { 762 763 // defend against HTTP Response Splitting 764 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 765 $targetUrl = stripctl($target); 766 767 } else { 768 769 770 // Explode the page ID and the anchor (#) 771 $link = explode('#', $target, 2); 772 773 // Query String to pass the message 774 $urlParams = []; 775 if ($targetOrigin != self::TARGET_ORIGIN_PERMALINK) { 776 $urlParams = array( 777 action_plugin_combo_routermessage::ORIGIN_PAGE => $ID, 778 action_plugin_combo_routermessage::ORIGIN_TYPE => $targetOrigin 779 ); 780 } 781 782 // if this is search engine redirect 783 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 784 $replacementPart = array(':', '_', '-'); 785 $query = str_replace($replacementPart, ' ', $ID); 786 $urlParams["do"] = "search"; 787 $urlParams["q"] = $query; 788 } 789 790 $targetUrl = wl($link[0], $urlParams, true, '&'); 791 // %3A back to : 792 $targetUrl = str_replace("%3A", ":", $targetUrl); 793 if ($link[1]) { 794 $targetUrl .= '#' . rawurlencode($link[1]); 795 } 796 797 } 798 799 /** 800 * The dokuwiki function {@link send_redirect()} 801 * set the `Location header` and in php, the header function 802 * in this case change the status code to 302 Arghhhh. 803 * The code below is adapted from this function {@link send_redirect()} 804 */ 805 global $MSG; // are there any undisplayed messages? keep them in session for display 806 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 807 //reopen session, store data and close session again 808 @session_start(); 809 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 810 } 811 session_write_close(); // always close the session 812 813 switch ($method) { 814 case self::REDIRECT_PERMANENT_METHOD: 815 HttpResponse::create(HttpResponse::STATUS_PERMANENT_REDIRECT) 816 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 817 ->send(); 818 return true; 819 case self::REDIRECT_NOTFOUND_METHOD: 820 821 // Empty 404 body to not get the standard 404 page of the browser 822 // but a blank page to avoid a sort of FOUC. 823 // ie the user see a page briefly 824 HttpResponse::create(HttpResponse::STATUS_NOT_FOUND) 825 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 826 ->send(self::PAGE_404, Mime::HTML); 827 return true; 828 829 default: 830 LogUtility::msg("The method ($method) is not an http redirection"); 831 return false; 832 } 833 834 835 } 836 837 /** 838 * @param $id 839 * @return array 840 */ 841 private 842 function getBestPage($id): array 843 { 844 845 // The return parameters 846 $bestPageId = null; 847 $scorePageName = null; 848 849 // Get Score from a page 850 $pageName = noNS($id); 851 $pagesWithSameName = ft_pageLookup($pageName); 852 if (count($pagesWithSameName) > 0) { 853 854 // Search same namespace in the page found than in the Id page asked. 855 $bestNbWordFound = 0; 856 857 858 $wordsInPageSourceId = explode(':', $id); 859 foreach ($pagesWithSameName as $targetPageId => $title) { 860 861 // Nb of word found in the target page id 862 // that are in the source page id 863 $nbWordFound = 0; 864 foreach ($wordsInPageSourceId as $word) { 865 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 866 } 867 868 if ($bestPageId == null) { 869 870 $bestNbWordFound = $nbWordFound; 871 $bestPageId = $targetPageId; 872 873 } else { 874 875 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 876 877 $bestNbWordFound = $nbWordFound; 878 $bestPageId = $targetPageId; 879 880 } 881 882 } 883 884 } 885 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 886 return array( 887 'id' => $bestPageId, 888 'score' => $scorePageName); 889 } 890 return array( 891 'id' => $bestPageId, 892 'score' => $scorePageName 893 ); 894 895 } 896 897 898 /** 899 * Redirect to the search engine 900 */ 901 private 902 function redirectToSearchEngine() 903 { 904 905 global $ID; 906 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 907 908 } 909 910 911 /** 912 * 913 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 914 * * For a SQlite database, it will add a row into the log 915 * 916 * @param string $sourcePageId 917 * @param $targetPageId 918 * @param $algorithmic 919 * @param $method - http or rewrite 920 */ 921 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 922 { 923 924 $row = array( 925 "TIMESTAMP" => date("c"), 926 "SOURCE" => $sourcePageId, 927 "TARGET" => $targetPageId, 928 "REFERRER" => $_SERVER['HTTP_REFERER'], 929 "TYPE" => $algorithmic, 930 "METHOD" => $method 931 ); 932 $request = Sqlite::createOrGetBackendSqlite() 933 ->createRequest() 934 ->setTableRow('redirections_log', $row); 935 try { 936 $request 937 ->execute(); 938 } catch (ExceptionCombo $e) { 939 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 940 } finally { 941 $request->close(); 942 } 943 944 945 } 946 947 /** 948 * This function check if there is a redirection declared 949 * in the redirection table 950 * @return bool - true if a rewrite or redirection occurs 951 * @throws Exception 952 */ 953 private function processingPageRules(): bool 954 { 955 global $ID; 956 957 $calculatedTarget = null; 958 $ruleMatcher = null; // Used in a warning message if the target page does not exist 959 // Known redirection in the table 960 // Get the page from redirection data 961 $rules = $this->pageRules->getRules(); 962 foreach ($rules as $rule) { 963 964 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 965 $ruleTarget = $rule[PageRules::TARGET_NAME]; 966 967 // Glob to Rexgexp 968 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/'; 969 970 // Match ? 971 // https://www.php.net/manual/en/function.preg-match.php 972 if (preg_match($regexpPattern, $ID, $matches)) { 973 $calculatedTarget = $ruleTarget; 974 foreach ($matches as $key => $match) { 975 if ($key == 0) { 976 continue; 977 } else { 978 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 979 } 980 } 981 break; 982 } 983 } 984 985 if ($calculatedTarget == null) { 986 return false; 987 } 988 989 // If this is an external redirect (other domain) 990 if (Url::isValidURL($calculatedTarget)) { 991 992 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 993 return true; 994 995 } 996 997 // If the page exist 998 if (page_exists($calculatedTarget)) { 999 1000 // This is DokuWiki Id and should always be lowercase 1001 // The page rule may have change that 1002 $calculatedTarget = strtolower($calculatedTarget); 1003 $res = $this->executeTransparentRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES); 1004 if ($res) { 1005 return true; 1006 } else { 1007 return false; 1008 } 1009 1010 } else { 1011 1012 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1013 return false; 1014 1015 } 1016 1017 } 1018 1019 private function performNotFoundRedirect(string $targetId, string $origin): bool 1020 { 1021 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1022 } 1023 1024 1025} 1026