1<?php 2 3require_once(__DIR__ . '/../ComboStrap/PluginUtility.php'); 4 5 6use ComboStrap\AliasType; 7use ComboStrap\DatabasePageRow; 8use ComboStrap\DokuPath; 9use ComboStrap\ExceptionCombo; 10use ComboStrap\HttpResponse; 11use ComboStrap\Identity; 12use ComboStrap\LogUtility; 13use ComboStrap\Mime; 14use ComboStrap\Page; 15use ComboStrap\PageId; 16use ComboStrap\PageRules; 17use ComboStrap\PageUrlPath; 18use ComboStrap\PageUrlType; 19use ComboStrap\PluginUtility; 20use ComboStrap\Site; 21use ComboStrap\Sqlite; 22use ComboStrap\Url; 23use ComboStrap\UrlManagerBestEndPage; 24 25 26/** 27 * Class action_plugin_combo_url 28 * 29 * The actual URL manager 30 * 31 * 32 */ 33class action_plugin_combo_router extends DokuWiki_Action_Plugin 34{ 35 36 /** 37 * @deprecated 38 */ 39 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 40 const ROUTER_ENABLE_CONF = "enableRouter"; 41 42 // The redirect type 43 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 44 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 45 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 46 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 47 48 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 49 50 // Where the target id value comes from 51 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 52 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 53 /** 54 * Named Permalink (canonical) 55 */ 56 const TARGET_ORIGIN_CANONICAL = 'canonical'; 57 const TARGET_ORIGIN_ALIAS = 'alias'; 58 /** 59 * Identifier Permalink (full page id) 60 */ 61 const TARGET_ORIGIN_PERMALINK = "permalink"; 62 /** 63 * Extended Permalink (abbreviated page id at the end) 64 */ 65 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 66 const TARGET_ORIGIN_START_PAGE = 'startPage'; 67 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 68 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 69 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 70 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 71 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 72 73 74 // The constant parameters 75 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 76 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 77 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 78 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 79 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 80 const GO_TO_EDIT_MODE = 'GoToEditMode'; 81 const NOTHING = 'Nothing'; 82 83 /** @var string - a name used in log and other places */ 84 const NAME = 'Url Manager'; 85 const CANONICAL = 'router'; 86 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 87 const REFRESH_HEADER_NAME = "Refresh"; 88 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 89 const LOCATION_HEADER_NAME = "Location"; 90 const LOCATION_HEADER_PREFIX = self::LOCATION_HEADER_NAME . ": "; 91 public const URL_MANAGER_NAME = "Router"; 92 93 94 /** 95 * @var PageRules 96 */ 97 private $pageRules; 98 99 100 function __construct() 101 { 102 // enable direct access to language strings 103 // ie $this->lang 104 $this->setupLocale(); 105 106 } 107 108 /** 109 * @param $refreshHeader 110 * @return false|string 111 */ 112 public static function getUrlFromRefresh($refreshHeader) 113 { 114 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 115 } 116 117 public static function getUrlFromLocation($refreshHeader) 118 { 119 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 120 } 121 122 /** 123 * @return array|mixed|string|string[] 124 * 125 * Unfortunately, DOKUWIKI_STARTED is not the first event 126 * The id may have been changed by 127 * {@link action_plugin_combo_metalang::load_lang()} 128 * function, that's why we have this function 129 * to get the original requested id 130 */ 131 private static function getOriginalIdFromRequest() 132 { 133 $originalId = $_GET["id"]; 134 return str_replace("/", DokuPath::PATH_SEPARATOR, $originalId); 135 } 136 137 /** 138 * Determine if the request should be banned based on the id 139 * 140 * @param string $id 141 * @return bool 142 * 143 * See also {@link https://perishablepress.com/7g-firewall/#features} 144 * for blocking rules on http request data such as: 145 * * query_string 146 * * user_agent, 147 * * remote host 148 */ 149 public static function isShadowBanned(string $id): bool 150 { 151 /** 152 * ie 153 * wp-json:api:flutter_woo:config_file 154 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 155 * wp-admin 156 * 2020:wp-includes:wlwmanifest.xml 157 * wp-content:start 158 * wp-admin:css:start 159 * sito:wp-includes:wlwmanifest.xml 160 * site:wp-includes:wlwmanifest.xml 161 * cms:wp-includes:wlwmanifest.xml 162 * test:wp-includes:wlwmanifest.xml 163 * media:wp-includes:wlwmanifest.xml 164 * wp2:wp-includes:wlwmanifest.xml 165 * 2019:wp-includes:wlwmanifest.xml 166 * shop:wp-includes:wlwmanifest.xml 167 * wp1:wp-includes:wlwmanifest.xml 168 * news:wp-includes:wlwmanifest.xml 169 * 2018:wp-includes:wlwmanifest.xml 170 */ 171 if (strpos($id, 'wp-') !== false) { 172 return true; 173 } 174 175 /** 176 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 177 * db:oracle:999999.9:union:all:select_null:from_dual 178 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 179 */ 180 if (preg_match('/_chr_|_0_0/', $id) === 1) { 181 return true; 182 } 183 184 185 /** 186 * ie 187 * git:objects: 188 * git:refs:heads:stable 189 * git:logs:refs:heads:main 190 * git:logs:refs:heads:stable 191 * git:hooks:pre-push.sample 192 * git:hooks:pre-receive.sample 193 */ 194 if (strpos($id, "git:") === 0) { 195 return true; 196 } 197 198 return false; 199 200 } 201 202 /** 203 * @param string $id 204 * @return bool 205 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 206 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 207 * well-known:dnt-policy.txt 208 */ 209 public static function isWellKnownFile(string $id): bool 210 { 211 return strpos($id, "well-known") === 0; 212 } 213 214 215 function register(Doku_Event_Handler $controller) 216 { 217 218 if (PluginUtility::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 219 /** 220 * This will call the function {@link action_plugin_combo_router::_router()} 221 * The event is not DOKUWIKI_STARTED because this is not the first one 222 * 223 * https://www.dokuwiki.org/devel:event:init_lang_load 224 */ 225 $controller->register_hook('DOKUWIKI_STARTED', 226 'AFTER', 227 $this, 228 'router', 229 array()); 230 231 /** 232 * This is the real first call of Dokuwiki 233 * Unfortunately, it does not create the environment 234 * We just ban to spare server resources 235 * 236 * https://www.dokuwiki.org/devel:event:init_lang_load 237 */ 238 $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array()); 239 240 } 241 242 243 } 244 245 /** 246 * 247 * We have created a spacial ban function that is 248 * called before the first function 249 * {@link action_plugin_combo_metalang::load_lang()} 250 * to spare CPU. 251 * 252 * @param $event 253 * @throws Exception 254 */ 255 function ban(&$event) 256 { 257 258 $id = self::getOriginalIdFromRequest(); 259 $page = Page::createPageFromId($id); 260 if (!$page->exists()) { 261 // Well known 262 if (self::isWellKnownFile($id)) { 263 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 264 HttpResponse::create(HttpResponse::STATUS_NOT_FOUND) 265 ->send(); 266 return; 267 } 268 269 // Shadow banned 270 if (self::isShadowBanned($id)) { 271 $webSiteHomePage = Site::getHomePageName(); 272 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 273 } 274 } 275 } 276 277 /** 278 * @param $event Doku_Event 279 * @param $param 280 * @return void 281 * @throws Exception 282 */ 283 function router(&$event, $param) 284 { 285 286 global $ACT; 287 if ($ACT !== 'show') return; 288 289 290 global $ID; 291 292 /** 293 * Without SQLite, this module does not work further 294 */ 295 $sqlite = Sqlite::createOrGetSqlite(); 296 if ($sqlite == null) { 297 return; 298 } else { 299 $this->pageRules = new PageRules(); 300 } 301 302 /** 303 * Unfortunately, DOKUWIKI_STARTED is not the first event 304 * The id may have been changed by 305 * {@link action_plugin_combo_metalang::load_lang()} 306 * function, that's why we check against the {@link $_REQUEST} 307 * and not the global ID 308 */ 309 $originalId = self::getOriginalIdFromRequest(); 310 311 /** 312 * Page is an existing id ? 313 */ 314 $targetPage = Page::createPageFromId($ID); 315 if ($targetPage->exists()) { 316 317 /** 318 * If this is not the root home page 319 * and if the canonical id is the not the same, 320 * and if this is not a historical page (revision) 321 * redirect 322 */ 323 if ( 324 $originalId !== $targetPage->getUrlId() // The id may have been changed 325 && $ID != Site::getHomePageName() 326 && !isset($_REQUEST["rev"]) 327 ) { 328 /** 329 * TODO: When saving for the first time, the page is not stored in the database 330 * but that's not the case actually 331 */ 332 if ($targetPage->getDatabasePage()->exists()) { 333 $this->executePermanentRedirect( 334 $targetPage->getCanonicalUrl([], true), 335 self::TARGET_ORIGIN_PERMALINK_EXTENDED 336 ); 337 } 338 } 339 return; 340 } 341 342 343 $identifier = $ID; 344 345 346 /** 347 * Page Id Website / root Permalink ? 348 */ 349 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($targetPage->getPath()->getLastName()); 350 if ($shortPageId !== null) { 351 $pageId = PageUrlPath::decodePageId($shortPageId); 352 if ($targetPage->getParentPage() === null && $pageId !== null) { 353 $page = DatabasePageRow::createFromPageId($pageId)->getPage(); 354 if ($page !== null && $page->exists()) { 355 $this->executePermanentRedirect( 356 $page->getCanonicalUrl([], true), 357 self::TARGET_ORIGIN_PERMALINK 358 ); 359 } 360 } 361 362 /** 363 * Page Id Abbr ? 364 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 365 */ 366 if ( 367 $pageId !== null 368 ) { 369 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getPage(); 370 if ($page === null) { 371 // or the length of the abbr has changed 372 $databasePage = new DatabasePageRow(); 373 $row = $databasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 374 if ($row !== null) { 375 $databasePage->setRow($row); 376 $page = $databasePage->getPage(); 377 } 378 } 379 if ($page !== null && $page->exists()) { 380 /** 381 * If the url canonical id has changed, we show it 382 * to the writer by performing a permanent redirect 383 */ 384 if ($identifier != $page->getUrlId()) { 385 // Google asks for a redirect 386 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 387 // People access your site through several different URLs. 388 // If, for example, your home page can be reached in multiple ways 389 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 390 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 391 // and use redirects to send traffic from the other URLs to your preferred URL. 392 $this->executePermanentRedirect( 393 $page->getCanonicalUrl([], true), 394 self::TARGET_ORIGIN_PERMALINK_EXTENDED 395 ); 396 return; 397 } 398 399 $this->executeTransparentRedirect($page->getDokuwikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 400 return; 401 402 } 403 // permanent url not yet in the database 404 // Other permanent such as permanent canonical ? 405 // We let the process go with the new identifier 406 407 } 408 409 } 410 411 // Global variable needed in the process 412 global $conf; 413 414 /** 415 * Identifier is a Canonical ? 416 */ 417 $databasePage = DatabasePageRow::createFromCanonical($identifier); 418 $targetPage = $databasePage->getPage(); 419 if ($targetPage !== null && $targetPage->exists()) { 420 /** 421 * Does the canonical url is canonical name based 422 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 423 */ 424 if ($targetPage->getUrlId() === $identifier) { 425 $res = $this->executeTransparentRedirect( 426 $targetPage->getDokuwikiId(), 427 self::TARGET_ORIGIN_CANONICAL 428 ); 429 } else { 430 $res = $this->executePermanentRedirect( 431 $targetPage->getDokuwikiId(), // not the url because, it allows to add url query redirection property 432 self::TARGET_ORIGIN_CANONICAL 433 ); 434 } 435 if ($res) { 436 return; 437 } 438 } 439 440 /** 441 * Identifier is an alias 442 */ 443 $targetPage = DatabasePageRow::createFromAlias($identifier)->getPage(); 444 if ( 445 $targetPage !== null 446 && $targetPage->exists() 447 // The build alias is the file system metadata alias 448 // it may be null if the replication in the database was not successful 449 && $targetPage->getBuildAlias() !== null 450 ) { 451 $buildAlias = $targetPage->getBuildAlias(); 452 switch ($buildAlias->getType()) { 453 case AliasType::REDIRECT: 454 $res = $this->executePermanentRedirect($targetPage->getCanonicalUrl([], true), self::TARGET_ORIGIN_ALIAS); 455 if ($res) { 456 return; 457 } 458 break; 459 case AliasType::SYNONYM: 460 $res = $this->executeTransparentRedirect($targetPage->getDokuwikiId(), self::TARGET_ORIGIN_ALIAS); 461 if ($res) { 462 return; 463 } 464 break; 465 default: 466 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 467 $res = $this->executePermanentRedirect($targetPage->getCanonicalUrl([], true), self::TARGET_ORIGIN_ALIAS); 468 if ($res) { 469 return; 470 } 471 break; 472 } 473 } 474 475 476 // If there is a redirection defined in the page rules 477 $result = $this->processingPageRules(); 478 if ($result) { 479 // A redirection has occurred 480 // finish the process 481 return; 482 } 483 484 /** 485 * 486 * There was no redirection found, redirect to edit mode if writer 487 * 488 */ 489 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 490 491 $this->gotToEditMode($event); 492 // Stop here 493 return; 494 495 } 496 497 /* 498 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 499 */ 500 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 501 return; 502 } 503 504 // We are reader and their is no redirection set, we apply the algorithm 505 $readerAlgorithms = array(); 506 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 507 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 508 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 509 510 while ( 511 ($algorithm = array_shift($readerAlgorithms)) != null 512 ) { 513 514 switch ($algorithm) { 515 516 case self::NOTHING: 517 return; 518 519 case self::GO_TO_BEST_END_PAGE_NAME: 520 521 list($targetPage, $method) = UrlManagerBestEndPage::process($identifier); 522 if ($targetPage != null) { 523 $res = false; 524 switch ($method) { 525 case self::REDIRECT_PERMANENT_METHOD: 526 $res = $this->executePermanentRedirect($targetPage, self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 527 break; 528 case self::REDIRECT_NOTFOUND_METHOD: 529 $res = $this->performNotFoundRedirect($targetPage, self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 530 break; 531 default: 532 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 533 } 534 if ($res) { 535 // Redirection has succeeded 536 return; 537 } 538 } 539 break; 540 541 case self::GO_TO_NS_START_PAGE: 542 543 // Start page with the conf['start'] parameter 544 $startPage = getNS($identifier) . ':' . $conf['start']; 545 if (page_exists($startPage)) { 546 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 547 if ($res) { 548 return; 549 } 550 } 551 552 // Start page with the same name than the namespace 553 $startPage = getNS($identifier) . ':' . curNS($identifier); 554 if (page_exists($startPage)) { 555 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 556 if ($res) { 557 return; 558 } 559 } 560 break; 561 562 case self::GO_TO_BEST_PAGE_NAME: 563 564 $bestPageId = null; 565 566 $bestPage = $this->getBestPage($identifier); 567 $bestPageId = $bestPage['id']; 568 $scorePageName = $bestPage['score']; 569 570 // Get Score from a Namespace 571 $bestNamespace = $this->scoreBestNamespace($identifier); 572 $bestNamespaceId = $bestNamespace['namespace']; 573 $namespaceScore = $bestNamespace['score']; 574 575 // Compare the two score 576 if ($scorePageName > 0 or $namespaceScore > 0) { 577 if ($scorePageName > $namespaceScore) { 578 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 579 } else { 580 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 581 } 582 return; 583 } 584 break; 585 586 case self::GO_TO_BEST_NAMESPACE: 587 588 $scoreNamespace = $this->scoreBestNamespace($identifier); 589 $bestNamespaceId = $scoreNamespace['namespace']; 590 $score = $scoreNamespace['score']; 591 592 if ($score > 0) { 593 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 594 return; 595 } 596 break; 597 598 case self::GO_TO_SEARCH_ENGINE: 599 600 $this->redirectToSearchEngine(); 601 602 return; 603 604 // End Switch Action 605 } 606 607 // End While Action 608 } 609 610 611 } 612 613 614 /** 615 * getBestNamespace 616 * Return a list with 'BestNamespaceId Score' 617 * @param $id 618 * @return array 619 */ 620 private 621 function scoreBestNamespace($id) 622 { 623 624 global $conf; 625 626 // Parameters 627 $pageNameSpace = getNS($id); 628 629 // If the page has an existing namespace start page take it, other search other namespace 630 $startPageNameSpace = $pageNameSpace . ":"; 631 $dateAt = ''; 632 // $startPageNameSpace will get a full path (ie with start or the namespace 633 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 634 if (page_exists($startPageNameSpace)) { 635 $nameSpaces = array($startPageNameSpace); 636 } else { 637 $nameSpaces = ft_pageLookup($conf['start']); 638 } 639 640 // Parameters and search the best namespace 641 $pathNames = explode(':', $pageNameSpace); 642 $bestNbWordFound = 0; 643 $bestNamespaceId = ''; 644 foreach ($nameSpaces as $nameSpace) { 645 646 $nbWordFound = 0; 647 foreach ($pathNames as $pathName) { 648 if (strlen($pathName) > 2) { 649 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 650 } 651 } 652 if ($nbWordFound > $bestNbWordFound) { 653 // Take only the smallest namespace 654 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 655 $bestNbWordFound = $nbWordFound; 656 $bestNamespaceId = $nameSpace; 657 } 658 } 659 } 660 661 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 662 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 663 if ($bestNbWordFound > 0) { 664 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 665 } else { 666 $bestNamespaceScore = 0; 667 } 668 669 670 return array( 671 'namespace' => $bestNamespaceId, 672 'score' => $bestNamespaceScore 673 ); 674 675 } 676 677 /** 678 * @param $event 679 */ 680 private 681 function gotToEditMode(&$event) 682 { 683 global $ACT; 684 $ACT = 'edit'; 685 686 } 687 688 689 /** 690 * Redirect to an internal page ie: 691 * * on the same domain 692 * * no HTTP redirect 693 * * id rewrite 694 * @param string $targetPageId - target page id 695 * @param string $targetOriginId - the source of the target (redirect) 696 * @return bool - return true if the user has the permission and that the redirect was done 697 * @throws Exception 698 */ 699 private 700 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 701 { 702 /** 703 * Because we set the ID globally for the ID redirect 704 * we make sure that this is not a {@link Page} 705 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 706 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 707 */ 708 if (is_object($targetPageId)) { 709 $class = get_class($targetPageId); 710 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 711 } 712 713 if (is_object($targetOriginId)) { 714 $class = get_class($targetOriginId); 715 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 716 } 717 718 // If the user does not have the right to see the target page 719 // don't do anything 720 if (!(Identity::isReader($targetPageId))) { 721 return false; 722 } 723 724 // Change the id 725 global $ID; 726 global $INFO; 727 $sourceId = $ID; 728 $ID = $targetPageId; 729 730 /** 731 * Refresh the $INFO data 732 * 733 * the info attributes are used elsewhere 734 * 'id': for the sidebar 735 * 'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()} 736 * 'rev' : for the edit button to be sure that the page is still the same 737 */ 738 $INFO = pageinfo(); 739 740 /** 741 * Not compatible with 742 * https://www.dokuwiki.org/config:send404 is enabled 743 * 744 * This check happens before that dokuwiki is started 745 * and send an header in doku.php 746 * 747 * We send a warning 748 */ 749 global $conf; 750 if ($conf['send404'] == true) { 751 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 752 } 753 754 // Redirection 755 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 756 757 return true; 758 759 } 760 761 private function executePermanentRedirect(string $target, $targetOrigin): bool 762 { 763 return $this->executeHttpRedirect($target, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 764 } 765 766 /** 767 * The general HTTP Redirect method to an internal page 768 * where the redirection method decide which type of redirection 769 * @param string $target - a dokuwiki id or an url 770 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 771 * @param string $method - the redirection method 772 */ 773 private 774 function executeHttpRedirect(string $target, string $targetOrigin, string $method): bool 775 { 776 777 global $ID; 778 779 780 // Log the redirections 781 $this->logRedirection($ID, $target, $targetOrigin, $method); 782 783 784 // An external url ? 785 $isValid = Url::isValid($target); 786 // If there is a bug in the isValid function for an internal url 787 // We get a loop. 788 // The Url becomes the id, the id is unknown and we do a redirect again 789 // 790 // We check then if the target starts with the base url 791 // if this is the case, it's valid 792 if (!$isValid && strpos($target, DOKU_URL) === 0) { 793 $isValid = true; 794 } 795 if ($isValid) { 796 797 // defend against HTTP Response Splitting 798 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 799 $targetUrl = stripctl($target); 800 801 } else { 802 803 804 // Explode the page ID and the anchor (#) 805 $link = explode('#', $target, 2); 806 807 // Query String to pass the message 808 $urlParams = []; 809 if ($targetOrigin != self::TARGET_ORIGIN_PERMALINK) { 810 $urlParams = array( 811 action_plugin_combo_routermessage::ORIGIN_PAGE => $ID, 812 action_plugin_combo_routermessage::ORIGIN_TYPE => $targetOrigin 813 ); 814 } 815 816 // if this is search engine redirect 817 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 818 $replacementPart = array(':', '_', '-'); 819 $query = str_replace($replacementPart, ' ', $ID); 820 $urlParams["do"] = "search"; 821 $urlParams["q"] = $query; 822 } 823 824 $targetUrl = wl($link[0], $urlParams, true, '&'); 825 // %3A back to : 826 $targetUrl = str_replace("%3A", ":", $targetUrl); 827 if ($link[1]) { 828 $targetUrl .= '#' . rawurlencode($link[1]); 829 } 830 831 } 832 833 /** 834 * The dokuwiki function {@link send_redirect()} 835 * set the `Location header` and in php, the header function 836 * in this case change the status code to 302 Arghhhh. 837 * The code below is adapted from this function {@link send_redirect()} 838 */ 839 global $MSG; // are there any undisplayed messages? keep them in session for display 840 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 841 //reopen session, store data and close session again 842 @session_start(); 843 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 844 } 845 session_write_close(); // always close the session 846 847 switch ($method) { 848 case self::REDIRECT_PERMANENT_METHOD: 849 HttpResponse::create(HttpResponse::STATUS_PERMANENT_REDIRECT) 850 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 851 ->send(); 852 return true; 853 case self::REDIRECT_NOTFOUND_METHOD: 854 855 // Empty 404 body to not get the standard 404 page of the browser 856 // but a blank page to avoid a sort of FOUC. 857 // ie the user see a page briefly 858 HttpResponse::create(HttpResponse::STATUS_NOT_FOUND) 859 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 860 ->send(self::PAGE_404, Mime::HTML); 861 return true; 862 863 default: 864 LogUtility::msg("The method ($method) is not an http redirection"); 865 return false; 866 } 867 868 869 } 870 871 /** 872 * @param $id 873 * @return array 874 */ 875 private 876 function getBestPage($id): array 877 { 878 879 // The return parameters 880 $bestPageId = null; 881 $scorePageName = null; 882 883 // Get Score from a page 884 $pageName = noNS($id); 885 $pagesWithSameName = ft_pageLookup($pageName); 886 if (count($pagesWithSameName) > 0) { 887 888 // Search same namespace in the page found than in the Id page asked. 889 $bestNbWordFound = 0; 890 891 892 $wordsInPageSourceId = explode(':', $id); 893 foreach ($pagesWithSameName as $targetPageId => $title) { 894 895 // Nb of word found in the target page id 896 // that are in the source page id 897 $nbWordFound = 0; 898 foreach ($wordsInPageSourceId as $word) { 899 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 900 } 901 902 if ($bestPageId == null) { 903 904 $bestNbWordFound = $nbWordFound; 905 $bestPageId = $targetPageId; 906 907 } else { 908 909 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 910 911 $bestNbWordFound = $nbWordFound; 912 $bestPageId = $targetPageId; 913 914 } 915 916 } 917 918 } 919 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 920 return array( 921 'id' => $bestPageId, 922 'score' => $scorePageName); 923 } 924 return array( 925 'id' => $bestPageId, 926 'score' => $scorePageName 927 ); 928 929 } 930 931 932 /** 933 * Redirect to the search engine 934 */ 935 private 936 function redirectToSearchEngine() 937 { 938 939 global $ID; 940 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 941 942 } 943 944 945 /** 946 * 947 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 948 * * For a SQlite database, it will add a row into the log 949 * 950 * @param string $sourcePageId 951 * @param $targetPageId 952 * @param $algorithmic 953 * @param $method - http or rewrite 954 */ 955 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 956 { 957 958 $row = array( 959 "TIMESTAMP" => date("c"), 960 "SOURCE" => $sourcePageId, 961 "TARGET" => $targetPageId, 962 "REFERRER" => $_SERVER['HTTP_REFERER'], 963 "TYPE" => $algorithmic, 964 "METHOD" => $method 965 ); 966 $request = Sqlite::createOrGetBackendSqlite() 967 ->createRequest() 968 ->setTableRow('redirections_log', $row); 969 try { 970 $request 971 ->execute(); 972 } catch (ExceptionCombo $e) { 973 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 974 } finally { 975 $request->close(); 976 } 977 978 979 } 980 981 /** 982 * This function check if there is a redirection declared 983 * in the redirection table 984 * @return bool - true if a rewrite or redirection occurs 985 * @throws Exception 986 */ 987 private function processingPageRules(): bool 988 { 989 global $ID; 990 991 $calculatedTarget = null; 992 $ruleMatcher = null; // Used in a warning message if the target page does not exist 993 // Known redirection in the table 994 // Get the page from redirection data 995 $rules = $this->pageRules->getRules(); 996 foreach ($rules as $rule) { 997 998 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 999 $ruleTarget = $rule[PageRules::TARGET_NAME]; 1000 1001 // Glob to Rexgexp 1002 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/'; 1003 1004 // Match ? 1005 // https://www.php.net/manual/en/function.preg-match.php 1006 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 1007 if ($pregMatchResult === false) { 1008 // The `if` to take into account this problem 1009 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1010 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1011 return false; 1012 } 1013 if ($pregMatchResult) { 1014 $calculatedTarget = $ruleTarget; 1015 foreach ($matches as $key => $match) { 1016 if ($key == 0) { 1017 continue; 1018 } else { 1019 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1020 } 1021 } 1022 break; 1023 } 1024 } 1025 1026 if ($calculatedTarget == null) { 1027 return false; 1028 } 1029 1030 // If this is an external redirect (other domain) 1031 if (Url::isValid($calculatedTarget)) { 1032 1033 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1034 return true; 1035 1036 } 1037 1038 // If the page exist 1039 if (page_exists($calculatedTarget)) { 1040 1041 // This is DokuWiki Id and should always be lowercase 1042 // The page rule may have change that 1043 $calculatedTarget = strtolower($calculatedTarget); 1044 $res = $this->executeHttpRedirect($calculatedTarget,self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1045 if ($res) { 1046 return true; 1047 } else { 1048 return false; 1049 } 1050 1051 } else { 1052 1053 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1054 return false; 1055 1056 } 1057 1058 } 1059 1060 private function performNotFoundRedirect(string $targetId, string $origin): bool 1061 { 1062 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1063 } 1064 1065 1066} 1067