1<?php 2 3require_once(__DIR__ . '/../ComboStrap/PluginUtility.php'); 4 5 6use ComboStrap\AliasType; 7use ComboStrap\DatabasePageRow; 8use ComboStrap\DokuPath; 9use ComboStrap\ExceptionCombo; 10use ComboStrap\HttpResponse; 11use ComboStrap\Identity; 12use ComboStrap\LogUtility; 13use ComboStrap\Mime; 14use ComboStrap\Page; 15use ComboStrap\PageId; 16use ComboStrap\PageRules; 17use ComboStrap\PageUrlPath; 18use ComboStrap\PageUrlType; 19use ComboStrap\PluginUtility; 20use ComboStrap\Site; 21use ComboStrap\Sqlite; 22use ComboStrap\Url; 23use ComboStrap\UrlManagerBestEndPage; 24 25 26/** 27 * Class action_plugin_combo_url 28 * 29 * The actual URL manager 30 * 31 * 32 */ 33class action_plugin_combo_router extends DokuWiki_Action_Plugin 34{ 35 36 /** 37 * @deprecated 38 */ 39 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 40 const ROUTER_ENABLE_CONF = "enableRouter"; 41 42 // The redirect type 43 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 44 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 45 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 46 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 47 48 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 49 50 // Where the target id value comes from 51 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 52 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 53 /** 54 * Named Permalink (canonical) 55 */ 56 const TARGET_ORIGIN_CANONICAL = 'canonical'; 57 const TARGET_ORIGIN_ALIAS = 'alias'; 58 /** 59 * Identifier Permalink (full page id) 60 */ 61 const TARGET_ORIGIN_PERMALINK = "permalink"; 62 /** 63 * Extended Permalink (abbreviated page id at the end) 64 */ 65 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 66 const TARGET_ORIGIN_START_PAGE = 'startPage'; 67 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 68 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 69 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 70 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 71 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 72 73 74 // The constant parameters 75 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 76 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 77 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 78 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 79 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 80 const GO_TO_EDIT_MODE = 'GoToEditMode'; 81 const NOTHING = 'Nothing'; 82 83 /** @var string - a name used in log and other places */ 84 const NAME = 'Url Manager'; 85 const CANONICAL = 'router'; 86 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 87 const REFRESH_HEADER_NAME = "Refresh"; 88 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 89 const LOCATION_HEADER_NAME = "Location"; 90 const LOCATION_HEADER_PREFIX = self::LOCATION_HEADER_NAME . ": "; 91 public const URL_MANAGER_NAME = "Router"; 92 93 94 /** 95 * @var PageRules 96 */ 97 private $pageRules; 98 99 100 function __construct() 101 { 102 // enable direct access to language strings 103 // ie $this->lang 104 $this->setupLocale(); 105 106 } 107 108 /** 109 * @param $refreshHeader 110 * @return false|string 111 */ 112 public static function getUrlFromRefresh($refreshHeader) 113 { 114 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 115 } 116 117 public static function getUrlFromLocation($refreshHeader) 118 { 119 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 120 } 121 122 /** 123 * @return array|mixed|string|string[] 124 * 125 * Unfortunately, DOKUWIKI_STARTED is not the first event 126 * The id may have been changed by 127 * {@link action_plugin_combo_metalang::load_lang()} 128 * function, that's why we have this function 129 * to get the original requested id 130 */ 131 private static function getOriginalIdFromRequest() 132 { 133 $originalId = $_GET["id"]; 134 return str_replace("/", DokuPath::PATH_SEPARATOR, $originalId); 135 } 136 137 /** 138 * Determine if the request should be banned based on the id 139 * 140 * @param string $id 141 * @return bool 142 * 143 * See also {@link https://perishablepress.com/7g-firewall/#features} 144 * for blocking rules on http request data such as: 145 * * query_string 146 * * user_agent, 147 * * remote host 148 */ 149 public static function isShadowBanned(string $id): bool 150 { 151 /** 152 * ie 153 * wp-json:api:flutter_woo:config_file 154 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 155 * wp-admin 156 * 2020:wp-includes:wlwmanifest.xml 157 * wp-content:start 158 * wp-admin:css:start 159 * sito:wp-includes:wlwmanifest.xml 160 * site:wp-includes:wlwmanifest.xml 161 * cms:wp-includes:wlwmanifest.xml 162 * test:wp-includes:wlwmanifest.xml 163 * media:wp-includes:wlwmanifest.xml 164 * wp2:wp-includes:wlwmanifest.xml 165 * 2019:wp-includes:wlwmanifest.xml 166 * shop:wp-includes:wlwmanifest.xml 167 * wp1:wp-includes:wlwmanifest.xml 168 * news:wp-includes:wlwmanifest.xml 169 * 2018:wp-includes:wlwmanifest.xml 170 */ 171 if (strpos($id, 'wp-') !== false) { 172 return true; 173 } 174 175 /** 176 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 177 * db:oracle:999999.9:union:all:select_null:from_dual 178 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 179 */ 180 if (preg_match('/_chr_|_0_0/', $id) === 1) { 181 return true; 182 } 183 184 185 /** 186 * ie 187 * git:objects: 188 * git:refs:heads:stable 189 * git:logs:refs:heads:main 190 * git:logs:refs:heads:stable 191 * git:hooks:pre-push.sample 192 * git:hooks:pre-receive.sample 193 */ 194 if (strpos($id, "git:") === 0) { 195 return true; 196 } 197 198 return false; 199 200 } 201 202 /** 203 * @param string $id 204 * @return bool 205 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 206 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 207 * well-known:dnt-policy.txt 208 */ 209 public static function isWellKnownFile(string $id): bool 210 { 211 return strpos($id, "well-known") === 0; 212 } 213 214 215 function register(Doku_Event_Handler $controller) 216 { 217 218 if (PluginUtility::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 219 /** 220 * This will call the function {@link action_plugin_combo_router::_router()} 221 * The event is not DOKUWIKI_STARTED because this is not the first one 222 * 223 * https://www.dokuwiki.org/devel:event:init_lang_load 224 */ 225 $controller->register_hook('DOKUWIKI_STARTED', 226 'AFTER', 227 $this, 228 'router', 229 array()); 230 231 /** 232 * This is the real first call of Dokuwiki 233 * Unfortunately, it does not create the environment 234 * We just ban to spare server resources 235 * 236 * https://www.dokuwiki.org/devel:event:init_lang_load 237 */ 238 $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array()); 239 240 } 241 242 243 } 244 245 /** 246 * 247 * We have created a spacial ban function that is 248 * called before the first function 249 * {@link action_plugin_combo_metalang::load_lang()} 250 * to spare CPU. 251 * 252 * @param $event 253 * @throws Exception 254 */ 255 function ban(&$event) 256 { 257 258 $id = self::getOriginalIdFromRequest(); 259 $page = Page::createPageFromId($id); 260 if (!$page->exists()) { 261 // Well known 262 if (self::isWellKnownFile($id)) { 263 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 264 HttpResponse::create(HttpResponse::STATUS_NOT_FOUND) 265 ->send(); 266 return; 267 } 268 269 // Shadow banned 270 if (self::isShadowBanned($id)) { 271 $webSiteHomePage = Site::getHomePageName(); 272 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 273 } 274 } 275 } 276 277 /** 278 * @param $event Doku_Event 279 * @param $param 280 * @return void 281 * @throws Exception 282 */ 283 function router(&$event, $param) 284 { 285 286 global $ACT; 287 if ($ACT !== 'show') return; 288 289 290 global $ID; 291 292 /** 293 * Without SQLite, this module does not work further 294 */ 295 $sqlite = Sqlite::createOrGetSqlite(); 296 if ($sqlite == null) { 297 return; 298 } else { 299 $this->pageRules = new PageRules(); 300 } 301 302 /** 303 * Unfortunately, DOKUWIKI_STARTED is not the first event 304 * The id may have been changed by 305 * {@link action_plugin_combo_metalang::load_lang()} 306 * function, that's why we check against the {@link $_REQUEST} 307 * and not the global ID 308 */ 309 $originalId = self::getOriginalIdFromRequest(); 310 311 /** 312 * Page is an existing id ? 313 */ 314 $targetPage = Page::createPageFromId($ID); 315 if ($targetPage->exists()) { 316 317 /** 318 * If this is not the root home page 319 * and if the canonical id is the not the same, 320 * and if this is not a historical page (revision) 321 * redirect 322 */ 323 if ( 324 $originalId !== $targetPage->getUrlId() // The id may have been changed 325 && $ID != Site::getHomePageName() 326 && !isset($_REQUEST["rev"]) 327 ) { 328 $this->executePermanentRedirect( 329 $targetPage->getCanonicalUrl([], true), 330 self::TARGET_ORIGIN_PERMALINK_EXTENDED 331 ); 332 } 333 return; 334 } 335 336 337 $identifier = $ID; 338 339 340 /** 341 * Page Id Website / root Permalink ? 342 */ 343 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($targetPage->getPath()->getLastName()); 344 if ($shortPageId !== null) { 345 $pageId = PageUrlPath::decodePageId($shortPageId); 346 if ($targetPage->getParentPage() === null && $pageId !== null) { 347 $page = DatabasePageRow::createFromPageId($pageId)->getPage(); 348 if ($page !== null && $page->exists()) { 349 $this->executePermanentRedirect( 350 $page->getCanonicalUrl([], true), 351 self::TARGET_ORIGIN_PERMALINK 352 ); 353 } 354 } 355 356 /** 357 * Page Id Abbr ? 358 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 359 */ 360 if ( 361 $pageId !== null 362 ) { 363 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getPage(); 364 if ($page === null) { 365 // or the length of the abbr has changed 366 $databasePage = new DatabasePageRow(); 367 $row = $databasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 368 if ($row != null) { 369 $databasePage->setRow($row); 370 $page = $databasePage->getPage(); 371 } 372 } 373 if ($page !== null && $page->exists()) { 374 /** 375 * If the url canonical id has changed, we show it 376 * to the writer by performing a permanent redirect 377 */ 378 if ($identifier != $page->getUrlId()) { 379 // Google asks for a redirect 380 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 381 // People access your site through several different URLs. 382 // If, for example, your home page can be reached in multiple ways 383 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 384 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 385 // and use redirects to send traffic from the other URLs to your preferred URL. 386 $this->executePermanentRedirect( 387 $page->getCanonicalUrl([], true), 388 self::TARGET_ORIGIN_PERMALINK_EXTENDED 389 ); 390 return; 391 } 392 $this->executeTransparentRedirect($page->getDokuwikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 393 return; 394 395 } 396 // permanent url not yet in the database 397 // Other permanent such as permanent canonical ? 398 // We let the process go with the new identifier 399 400 } 401 402 } 403 404 // Global variable needed in the process 405 global $conf; 406 407 /** 408 * Identifier is a Canonical ? 409 */ 410 $databasePage = DatabasePageRow::createFromCanonical($identifier); 411 $targetPage = $databasePage->getPage(); 412 if ($targetPage !== null && $targetPage->exists()) { 413 /** 414 * Does the canonical url is canonical name based 415 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 416 */ 417 if ($targetPage->getUrlId() === $identifier) { 418 $res = $this->executeTransparentRedirect( 419 $targetPage->getDokuwikiId(), 420 self::TARGET_ORIGIN_CANONICAL 421 ); 422 } else { 423 $res = $this->executePermanentRedirect( 424 $targetPage->getDokuwikiId(), // not the url because, it allows to add url query redirection property 425 self::TARGET_ORIGIN_CANONICAL 426 ); 427 } 428 if ($res) { 429 return; 430 } 431 } 432 433 /** 434 * Identifier is an alias 435 */ 436 $targetPage = DatabasePageRow::createFromAlias($identifier)->getPage(); 437 if ( 438 $targetPage !== null 439 && $targetPage->exists() 440 // The build alias is the file system metadata alias 441 // it may be null if the replication in the database was not successful 442 && $targetPage->getBuildAlias() !== null 443 ) { 444 $buildAlias = $targetPage->getBuildAlias(); 445 switch ($buildAlias->getType()) { 446 case AliasType::REDIRECT: 447 $res = $this->executePermanentRedirect($targetPage->getCanonicalUrl([], true), self::TARGET_ORIGIN_ALIAS); 448 if ($res) { 449 return; 450 } 451 break; 452 case AliasType::SYNONYM: 453 $res = $this->executeTransparentRedirect($targetPage->getDokuwikiId(), self::TARGET_ORIGIN_ALIAS); 454 if ($res) { 455 return; 456 } 457 break; 458 default: 459 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 460 $res = $this->executePermanentRedirect($targetPage->getCanonicalUrl([], true), self::TARGET_ORIGIN_ALIAS); 461 if ($res) { 462 return; 463 } 464 break; 465 } 466 } 467 468 469 // If there is a redirection defined in the page rules 470 $result = $this->processingPageRules(); 471 if ($result) { 472 // A redirection has occurred 473 // finish the process 474 return; 475 } 476 477 /** 478 * 479 * There was no redirection found, redirect to edit mode if writer 480 * 481 */ 482 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 483 484 $this->gotToEditMode($event); 485 // Stop here 486 return; 487 488 } 489 490 /* 491 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 492 */ 493 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 494 return; 495 } 496 497 // We are reader and their is no redirection set, we apply the algorithm 498 $readerAlgorithms = array(); 499 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 500 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 501 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 502 503 while ( 504 ($algorithm = array_shift($readerAlgorithms)) != null 505 ) { 506 507 switch ($algorithm) { 508 509 case self::NOTHING: 510 return; 511 512 case self::GO_TO_BEST_END_PAGE_NAME: 513 514 list($targetPage, $method) = UrlManagerBestEndPage::process($identifier); 515 if ($targetPage != null) { 516 $res = false; 517 switch ($method) { 518 case self::REDIRECT_PERMANENT_METHOD: 519 $res = $this->executePermanentRedirect($targetPage, self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 520 break; 521 case self::REDIRECT_NOTFOUND_METHOD: 522 $res = $this->performNotFoundRedirect($targetPage, self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 523 break; 524 default: 525 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 526 } 527 if ($res) { 528 // Redirection has succeeded 529 return; 530 } 531 } 532 break; 533 534 case self::GO_TO_NS_START_PAGE: 535 536 // Start page with the conf['start'] parameter 537 $startPage = getNS($identifier) . ':' . $conf['start']; 538 if (page_exists($startPage)) { 539 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 540 if ($res) { 541 return; 542 } 543 } 544 545 // Start page with the same name than the namespace 546 $startPage = getNS($identifier) . ':' . curNS($identifier); 547 if (page_exists($startPage)) { 548 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 549 if ($res) { 550 return; 551 } 552 } 553 break; 554 555 case self::GO_TO_BEST_PAGE_NAME: 556 557 $bestPageId = null; 558 559 $bestPage = $this->getBestPage($identifier); 560 $bestPageId = $bestPage['id']; 561 $scorePageName = $bestPage['score']; 562 563 // Get Score from a Namespace 564 $bestNamespace = $this->scoreBestNamespace($identifier); 565 $bestNamespaceId = $bestNamespace['namespace']; 566 $namespaceScore = $bestNamespace['score']; 567 568 // Compare the two score 569 if ($scorePageName > 0 or $namespaceScore > 0) { 570 if ($scorePageName > $namespaceScore) { 571 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 572 } else { 573 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 574 } 575 return; 576 } 577 break; 578 579 case self::GO_TO_BEST_NAMESPACE: 580 581 $scoreNamespace = $this->scoreBestNamespace($identifier); 582 $bestNamespaceId = $scoreNamespace['namespace']; 583 $score = $scoreNamespace['score']; 584 585 if ($score > 0) { 586 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 587 return; 588 } 589 break; 590 591 case self::GO_TO_SEARCH_ENGINE: 592 593 $this->redirectToSearchEngine(); 594 595 return; 596 break; 597 598 // End Switch Action 599 } 600 601 // End While Action 602 } 603 604 605 } 606 607 608 /** 609 * getBestNamespace 610 * Return a list with 'BestNamespaceId Score' 611 * @param $id 612 * @return array 613 */ 614 private 615 function scoreBestNamespace($id) 616 { 617 618 global $conf; 619 620 // Parameters 621 $pageNameSpace = getNS($id); 622 623 // If the page has an existing namespace start page take it, other search other namespace 624 $startPageNameSpace = $pageNameSpace . ":"; 625 $dateAt = ''; 626 // $startPageNameSpace will get a full path (ie with start or the namespace 627 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 628 if (page_exists($startPageNameSpace)) { 629 $nameSpaces = array($startPageNameSpace); 630 } else { 631 $nameSpaces = ft_pageLookup($conf['start']); 632 } 633 634 // Parameters and search the best namespace 635 $pathNames = explode(':', $pageNameSpace); 636 $bestNbWordFound = 0; 637 $bestNamespaceId = ''; 638 foreach ($nameSpaces as $nameSpace) { 639 640 $nbWordFound = 0; 641 foreach ($pathNames as $pathName) { 642 if (strlen($pathName) > 2) { 643 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 644 } 645 } 646 if ($nbWordFound > $bestNbWordFound) { 647 // Take only the smallest namespace 648 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 649 $bestNbWordFound = $nbWordFound; 650 $bestNamespaceId = $nameSpace; 651 } 652 } 653 } 654 655 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 656 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 657 if ($bestNbWordFound > 0) { 658 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 659 } else { 660 $bestNamespaceScore = 0; 661 } 662 663 664 return array( 665 'namespace' => $bestNamespaceId, 666 'score' => $bestNamespaceScore 667 ); 668 669 } 670 671 /** 672 * @param $event 673 */ 674 private 675 function gotToEditMode(&$event) 676 { 677 global $ACT; 678 $ACT = 'edit'; 679 680 } 681 682 683 /** 684 * Redirect to an internal page ie: 685 * * on the same domain 686 * * no HTTP redirect 687 * * id rewrite 688 * @param string $targetPageId - target page id 689 * @param string $targetOriginId - the source of the target (redirect) 690 * @return bool - return true if the user has the permission and that the redirect was done 691 * @throws Exception 692 */ 693 private 694 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 695 { 696 /** 697 * Because we set the ID globally for the ID redirect 698 * we make sure that this is not a {@link Page} 699 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 700 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 701 */ 702 if (is_object($targetPageId)) { 703 $class = get_class($targetPageId); 704 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 705 } 706 707 if (is_object($targetOriginId)) { 708 $class = get_class($targetOriginId); 709 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 710 } 711 712 // If the user does not have the right to see the target page 713 // don't do anything 714 if (!(Identity::isReader($targetPageId))) { 715 return false; 716 } 717 718 // Change the id 719 global $ID; 720 global $INFO; 721 $sourceId = $ID; 722 $ID = $targetPageId; 723 // Change the info id for the sidebar 724 $INFO['id'] = $targetPageId; 725 /** 726 * otherwise there is: 727 * * a meta robot = noindex,follow 728 * See {@link tpl_metaheaders()} 729 */ 730 $INFO['exists'] = true; 731 732 /** 733 * Not compatible with 734 * https://www.dokuwiki.org/config:send404 is enabled 735 * 736 * This check happens before that dokuwiki is started 737 * and send an header in doku.php 738 * 739 * We send a warning 740 */ 741 global $conf; 742 if ($conf['send404'] == true) { 743 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 744 } 745 746 // Redirection 747 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 748 749 return true; 750 751 } 752 753 private function executePermanentRedirect(string $target, $targetOrigin): bool 754 { 755 return $this->executeHttpRedirect($target, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 756 } 757 758 /** 759 * The general HTTP Redirect method to an internal page 760 * where the redirection method decide which type of redirection 761 * @param string $target - a dokuwiki id or an url 762 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 763 * @param string $method - the redirection method 764 */ 765 private 766 function executeHttpRedirect(string $target, string $targetOrigin, string $method): bool 767 { 768 769 global $ID; 770 771 772 // Log the redirections 773 $this->logRedirection($ID, $target, $targetOrigin, $method); 774 775 776 // An external url ? 777 $isValid = Url::isValid($target); 778 // If there is a bug in the isValid function for an internal url 779 // We get a loop. 780 // The Url becomes the id, the id is unknown and we do a redirect again 781 // 782 // We check then if the target starts with the base url 783 // if this is the case, it's valid 784 if (!$isValid && strpos($target, DOKU_URL) === 0) { 785 $isValid = true; 786 } 787 if ($isValid) { 788 789 // defend against HTTP Response Splitting 790 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 791 $targetUrl = stripctl($target); 792 793 } else { 794 795 796 // Explode the page ID and the anchor (#) 797 $link = explode('#', $target, 2); 798 799 // Query String to pass the message 800 $urlParams = []; 801 if ($targetOrigin != self::TARGET_ORIGIN_PERMALINK) { 802 $urlParams = array( 803 action_plugin_combo_routermessage::ORIGIN_PAGE => $ID, 804 action_plugin_combo_routermessage::ORIGIN_TYPE => $targetOrigin 805 ); 806 } 807 808 // if this is search engine redirect 809 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 810 $replacementPart = array(':', '_', '-'); 811 $query = str_replace($replacementPart, ' ', $ID); 812 $urlParams["do"] = "search"; 813 $urlParams["q"] = $query; 814 } 815 816 $targetUrl = wl($link[0], $urlParams, true, '&'); 817 // %3A back to : 818 $targetUrl = str_replace("%3A", ":", $targetUrl); 819 if ($link[1]) { 820 $targetUrl .= '#' . rawurlencode($link[1]); 821 } 822 823 } 824 825 /** 826 * The dokuwiki function {@link send_redirect()} 827 * set the `Location header` and in php, the header function 828 * in this case change the status code to 302 Arghhhh. 829 * The code below is adapted from this function {@link send_redirect()} 830 */ 831 global $MSG; // are there any undisplayed messages? keep them in session for display 832 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 833 //reopen session, store data and close session again 834 @session_start(); 835 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 836 } 837 session_write_close(); // always close the session 838 839 switch ($method) { 840 case self::REDIRECT_PERMANENT_METHOD: 841 HttpResponse::create(HttpResponse::STATUS_PERMANENT_REDIRECT) 842 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 843 ->send(); 844 return true; 845 case self::REDIRECT_NOTFOUND_METHOD: 846 847 // Empty 404 body to not get the standard 404 page of the browser 848 // but a blank page to avoid a sort of FOUC. 849 // ie the user see a page briefly 850 HttpResponse::create(HttpResponse::STATUS_NOT_FOUND) 851 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 852 ->send(self::PAGE_404, Mime::HTML); 853 return true; 854 855 default: 856 LogUtility::msg("The method ($method) is not an http redirection"); 857 return false; 858 } 859 860 861 } 862 863 /** 864 * @param $id 865 * @return array 866 */ 867 private 868 function getBestPage($id): array 869 { 870 871 // The return parameters 872 $bestPageId = null; 873 $scorePageName = null; 874 875 // Get Score from a page 876 $pageName = noNS($id); 877 $pagesWithSameName = ft_pageLookup($pageName); 878 if (count($pagesWithSameName) > 0) { 879 880 // Search same namespace in the page found than in the Id page asked. 881 $bestNbWordFound = 0; 882 883 884 $wordsInPageSourceId = explode(':', $id); 885 foreach ($pagesWithSameName as $targetPageId => $title) { 886 887 // Nb of word found in the target page id 888 // that are in the source page id 889 $nbWordFound = 0; 890 foreach ($wordsInPageSourceId as $word) { 891 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 892 } 893 894 if ($bestPageId == null) { 895 896 $bestNbWordFound = $nbWordFound; 897 $bestPageId = $targetPageId; 898 899 } else { 900 901 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 902 903 $bestNbWordFound = $nbWordFound; 904 $bestPageId = $targetPageId; 905 906 } 907 908 } 909 910 } 911 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 912 return array( 913 'id' => $bestPageId, 914 'score' => $scorePageName); 915 } 916 return array( 917 'id' => $bestPageId, 918 'score' => $scorePageName 919 ); 920 921 } 922 923 924 /** 925 * Redirect to the search engine 926 */ 927 private 928 function redirectToSearchEngine() 929 { 930 931 global $ID; 932 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 933 934 } 935 936 937 /** 938 * 939 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 940 * * For a SQlite database, it will add a row into the log 941 * 942 * @param string $sourcePageId 943 * @param $targetPageId 944 * @param $algorithmic 945 * @param $method - http or rewrite 946 */ 947 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 948 { 949 950 $row = array( 951 "TIMESTAMP" => date("c"), 952 "SOURCE" => $sourcePageId, 953 "TARGET" => $targetPageId, 954 "REFERRER" => $_SERVER['HTTP_REFERER'], 955 "TYPE" => $algorithmic, 956 "METHOD" => $method 957 ); 958 $request = Sqlite::createOrGetBackendSqlite() 959 ->createRequest() 960 ->setTableRow('redirections_log', $row); 961 try { 962 $request 963 ->execute(); 964 } catch (ExceptionCombo $e) { 965 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 966 } finally { 967 $request->close(); 968 } 969 970 971 } 972 973 /** 974 * This function check if there is a redirection declared 975 * in the redirection table 976 * @return bool - true if a rewrite or redirection occurs 977 * @throws Exception 978 */ 979 private function processingPageRules(): bool 980 { 981 global $ID; 982 983 $calculatedTarget = null; 984 $ruleMatcher = null; // Used in a warning message if the target page does not exist 985 // Known redirection in the table 986 // Get the page from redirection data 987 $rules = $this->pageRules->getRules(); 988 foreach ($rules as $rule) { 989 990 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 991 $ruleTarget = $rule[PageRules::TARGET_NAME]; 992 993 // Glob to Rexgexp 994 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/'; 995 996 // Match ? 997 // https://www.php.net/manual/en/function.preg-match.php 998 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 999 if ($pregMatchResult === false) { 1000 // The `if` to take into account this problem 1001 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1002 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1003 return false; 1004 } 1005 if ($pregMatchResult) { 1006 $calculatedTarget = $ruleTarget; 1007 foreach ($matches as $key => $match) { 1008 if ($key == 0) { 1009 continue; 1010 } else { 1011 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1012 } 1013 } 1014 break; 1015 } 1016 } 1017 1018 if ($calculatedTarget == null) { 1019 return false; 1020 } 1021 1022 // If this is an external redirect (other domain) 1023 if (Url::isValid($calculatedTarget)) { 1024 1025 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1026 return true; 1027 1028 } 1029 1030 // If the page exist 1031 if (page_exists($calculatedTarget)) { 1032 1033 // This is DokuWiki Id and should always be lowercase 1034 // The page rule may have change that 1035 $calculatedTarget = strtolower($calculatedTarget); 1036 $res = $this->executeTransparentRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES); 1037 if ($res) { 1038 return true; 1039 } else { 1040 return false; 1041 } 1042 1043 } else { 1044 1045 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1046 return false; 1047 1048 } 1049 1050 } 1051 1052 private function performNotFoundRedirect(string $targetId, string $origin): bool 1053 { 1054 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1055 } 1056 1057 1058} 1059