1<?php 2 3require_once(__DIR__ . '/../ComboStrap/PluginUtility.php'); 4 5 6use ComboStrap\AliasType; 7use ComboStrap\DatabasePageRow; 8use ComboStrap\DokuPath; 9use ComboStrap\ExceptionCombo; 10use ComboStrap\HttpResponse; 11use ComboStrap\Identity; 12use ComboStrap\LogUtility; 13use ComboStrap\Mime; 14use ComboStrap\Page; 15use ComboStrap\PageId; 16use ComboStrap\PageRules; 17use ComboStrap\PageUrlPath; 18use ComboStrap\PageUrlType; 19use ComboStrap\PluginUtility; 20use ComboStrap\Site; 21use ComboStrap\Sqlite; 22use ComboStrap\Url; 23use ComboStrap\UrlManagerBestEndPage; 24 25 26/** 27 * Class action_plugin_combo_url 28 * 29 * The actual URL manager 30 * 31 * 32 */ 33class action_plugin_combo_router extends DokuWiki_Action_Plugin 34{ 35 36 /** 37 * @deprecated 38 */ 39 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 40 const ROUTER_ENABLE_CONF = "enableRouter"; 41 42 // The redirect type 43 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 44 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 45 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 46 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 47 48 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 49 50 // Where the target id value comes from 51 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 52 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 53 /** 54 * Named Permalink (canonical) 55 */ 56 const TARGET_ORIGIN_CANONICAL = 'canonical'; 57 const TARGET_ORIGIN_ALIAS = 'alias'; 58 /** 59 * Identifier Permalink (full page id) 60 */ 61 const TARGET_ORIGIN_PERMALINK = "permalink"; 62 /** 63 * Extended Permalink (abbreviated page id at the end) 64 */ 65 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 66 const TARGET_ORIGIN_START_PAGE = 'startPage'; 67 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 68 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 69 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 70 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 71 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 72 73 74 // The constant parameters 75 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 76 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 77 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 78 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 79 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 80 const GO_TO_EDIT_MODE = 'GoToEditMode'; 81 const NOTHING = 'Nothing'; 82 83 /** @var string - a name used in log and other places */ 84 const NAME = 'Url Manager'; 85 const CANONICAL = 'router'; 86 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 87 const REFRESH_HEADER_NAME = "Refresh"; 88 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 89 const LOCATION_HEADER_NAME = "Location"; 90 const LOCATION_HEADER_PREFIX = self::LOCATION_HEADER_NAME . ": "; 91 public const URL_MANAGER_NAME = "Router"; 92 93 94 /** 95 * @var PageRules 96 */ 97 private $pageRules; 98 99 100 function __construct() 101 { 102 // enable direct access to language strings 103 // ie $this->lang 104 $this->setupLocale(); 105 106 } 107 108 /** 109 * @param $refreshHeader 110 * @return false|string 111 */ 112 public static function getUrlFromRefresh($refreshHeader) 113 { 114 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 115 } 116 117 public static function getUrlFromLocation($refreshHeader) 118 { 119 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 120 } 121 122 /** 123 * @return array|mixed|string|string[] 124 * 125 * Unfortunately, DOKUWIKI_STARTED is not the first event 126 * The id may have been changed by 127 * {@link action_plugin_combo_metalang::load_lang()} 128 * function, that's why we have this function 129 * to get the original requested id 130 */ 131 private static function getOriginalIdFromRequest() 132 { 133 $originalId = $_GET["id"]; 134 return str_replace("/", DokuPath::PATH_SEPARATOR, $originalId); 135 } 136 137 /** 138 * Determine if the request should be banned based on the id 139 * 140 * @param string $id 141 * @return bool 142 * 143 * See also {@link https://perishablepress.com/7g-firewall/#features} 144 * for blocking rules on http request data such as: 145 * * query_string 146 * * user_agent, 147 * * remote host 148 */ 149 public static function isShadowBanned(string $id): bool 150 { 151 /** 152 * ie 153 * wp-json:api:flutter_woo:config_file 154 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 155 * wp-admin 156 * 2020:wp-includes:wlwmanifest.xml 157 * wp-content:start 158 * wp-admin:css:start 159 * sito:wp-includes:wlwmanifest.xml 160 * site:wp-includes:wlwmanifest.xml 161 * cms:wp-includes:wlwmanifest.xml 162 * test:wp-includes:wlwmanifest.xml 163 * media:wp-includes:wlwmanifest.xml 164 * wp2:wp-includes:wlwmanifest.xml 165 * 2019:wp-includes:wlwmanifest.xml 166 * shop:wp-includes:wlwmanifest.xml 167 * wp1:wp-includes:wlwmanifest.xml 168 * news:wp-includes:wlwmanifest.xml 169 * 2018:wp-includes:wlwmanifest.xml 170 */ 171 if (strpos($id, 'wp-') !== false) { 172 return true; 173 } 174 175 /** 176 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 177 * db:oracle:999999.9:union:all:select_null:from_dual 178 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 179 */ 180 if (preg_match('/_chr_|_0_0/', $id) === 1) { 181 return true; 182 } 183 184 185 /** 186 * ie 187 * git:objects: 188 * git:refs:heads:stable 189 * git:logs:refs:heads:main 190 * git:logs:refs:heads:stable 191 * git:hooks:pre-push.sample 192 * git:hooks:pre-receive.sample 193 */ 194 if (strpos($id, "git:") === 0) { 195 return true; 196 } 197 198 return false; 199 200 } 201 202 /** 203 * @param string $id 204 * @return bool 205 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 206 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 207 * well-known:dnt-policy.txt 208 */ 209 public static function isWellKnownFile(string $id): bool 210 { 211 return strpos($id, "well-known") === 0; 212 } 213 214 215 function register(Doku_Event_Handler $controller) 216 { 217 218 if (PluginUtility::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 219 /** 220 * This will call the function {@link action_plugin_combo_router::_router()} 221 * The event is not DOKUWIKI_STARTED because this is not the first one 222 * 223 * https://www.dokuwiki.org/devel:event:init_lang_load 224 */ 225 $controller->register_hook('DOKUWIKI_STARTED', 226 'AFTER', 227 $this, 228 'router', 229 array()); 230 231 /** 232 * This is the real first call of Dokuwiki 233 * Unfortunately, it does not create the environment 234 * We just ban to spare server resources 235 * 236 * https://www.dokuwiki.org/devel:event:init_lang_load 237 */ 238 $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array()); 239 240 } 241 242 243 } 244 245 /** 246 * 247 * We have created a spacial ban function that is 248 * called before the first function 249 * {@link action_plugin_combo_metalang::load_lang()} 250 * to spare CPU. 251 * 252 * @param $event 253 * @throws Exception 254 */ 255 function ban(&$event) 256 { 257 258 $id = self::getOriginalIdFromRequest(); 259 $page = Page::createPageFromId($id); 260 if (!$page->exists()) { 261 // Well known 262 if (self::isWellKnownFile($id)) { 263 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 264 HttpResponse::create(HttpResponse::STATUS_NOT_FOUND) 265 ->send(); 266 return; 267 } 268 269 // Shadow banned 270 if (self::isShadowBanned($id)) { 271 $webSiteHomePage = Site::getHomePageName(); 272 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 273 } 274 } 275 } 276 277 /** 278 * @param $event Doku_Event 279 * @param $param 280 * @return void 281 * @throws Exception 282 */ 283 function router(&$event, $param) 284 { 285 286 global $ACT; 287 if ($ACT !== 'show') return; 288 289 290 global $ID; 291 292 /** 293 * Without SQLite, this module does not work further 294 */ 295 $sqlite = Sqlite::createOrGetSqlite(); 296 if ($sqlite == null) { 297 return; 298 } else { 299 $this->pageRules = new PageRules(); 300 } 301 302 /** 303 * Unfortunately, DOKUWIKI_STARTED is not the first event 304 * The id may have been changed by 305 * {@link action_plugin_combo_metalang::load_lang()} 306 * function, that's why we check against the {@link $_REQUEST} 307 * and not the global ID 308 */ 309 $originalId = self::getOriginalIdFromRequest(); 310 311 /** 312 * Page is an existing id ? 313 */ 314 $targetPage = Page::createPageFromId($ID); 315 if ($targetPage->exists()) { 316 317 /** 318 * If this is not the root home page 319 * and if the canonical id is the not the same, 320 * and if this is not a historical page (revision) 321 * redirect 322 */ 323 if ( 324 $originalId !== $targetPage->getUrlId() // The id may have been changed 325 && $ID != Site::getHomePageName() 326 && !isset($_REQUEST["rev"]) 327 ) { 328 /** 329 * TODO: When saving for the first time, the page is not stored in the database 330 * but that's not the case actually 331 */ 332 if ($targetPage->getDatabasePage()->exists()) { 333 $this->executePermanentRedirect( 334 $targetPage->getCanonicalUrl([], true), 335 self::TARGET_ORIGIN_PERMALINK_EXTENDED 336 ); 337 } 338 } 339 return; 340 } 341 342 343 $identifier = $ID; 344 345 346 /** 347 * Page Id Website / root Permalink ? 348 */ 349 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($targetPage->getPath()->getLastName()); 350 if ($shortPageId !== null) { 351 $pageId = PageUrlPath::decodePageId($shortPageId); 352 if ($targetPage->getParentPage() === null && $pageId !== null) { 353 $page = DatabasePageRow::createFromPageId($pageId)->getPage(); 354 if ($page !== null && $page->exists()) { 355 $this->executePermanentRedirect( 356 $page->getCanonicalUrl([], true), 357 self::TARGET_ORIGIN_PERMALINK 358 ); 359 } 360 } 361 362 /** 363 * Page Id Abbr ? 364 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 365 */ 366 if ( 367 $pageId !== null 368 ) { 369 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getPage(); 370 if ($page === null) { 371 // or the length of the abbr has changed 372 $databasePage = new DatabasePageRow(); 373 $row = $databasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 374 if ($row !== null) { 375 $databasePage->setRow($row); 376 $page = $databasePage->getPage(); 377 } 378 } 379 if ($page !== null && $page->exists()) { 380 /** 381 * If the url canonical id has changed, we show it 382 * to the writer by performing a permanent redirect 383 */ 384 if ($identifier != $page->getUrlId()) { 385 // Google asks for a redirect 386 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 387 // People access your site through several different URLs. 388 // If, for example, your home page can be reached in multiple ways 389 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 390 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 391 // and use redirects to send traffic from the other URLs to your preferred URL. 392 $this->executePermanentRedirect( 393 $page->getCanonicalUrl([], true), 394 self::TARGET_ORIGIN_PERMALINK_EXTENDED 395 ); 396 return; 397 } 398 399 $this->executeTransparentRedirect($page->getDokuwikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 400 return; 401 402 } 403 // permanent url not yet in the database 404 // Other permanent such as permanent canonical ? 405 // We let the process go with the new identifier 406 407 } 408 409 } 410 411 // Global variable needed in the process 412 global $conf; 413 414 /** 415 * Identifier is a Canonical ? 416 */ 417 $databasePage = DatabasePageRow::createFromCanonical($identifier); 418 $targetPage = $databasePage->getPage(); 419 if ($targetPage !== null && $targetPage->exists()) { 420 /** 421 * Does the canonical url is canonical name based 422 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 423 */ 424 if ($targetPage->getUrlId() === $identifier) { 425 $res = $this->executeTransparentRedirect( 426 $targetPage->getDokuwikiId(), 427 self::TARGET_ORIGIN_CANONICAL 428 ); 429 } else { 430 $res = $this->executePermanentRedirect( 431 $targetPage->getDokuwikiId(), // not the url because, it allows to add url query redirection property 432 self::TARGET_ORIGIN_CANONICAL 433 ); 434 } 435 if ($res) { 436 return; 437 } 438 } 439 440 /** 441 * Identifier is an alias 442 */ 443 $targetPage = DatabasePageRow::createFromAlias($identifier)->getPage(); 444 if ( 445 $targetPage !== null 446 && $targetPage->exists() 447 // The build alias is the file system metadata alias 448 // it may be null if the replication in the database was not successful 449 && $targetPage->getBuildAlias() !== null 450 ) { 451 $buildAlias = $targetPage->getBuildAlias(); 452 switch ($buildAlias->getType()) { 453 case AliasType::REDIRECT: 454 $res = $this->executePermanentRedirect($targetPage->getCanonicalUrl([], true), self::TARGET_ORIGIN_ALIAS); 455 if ($res) { 456 return; 457 } 458 break; 459 case AliasType::SYNONYM: 460 $res = $this->executeTransparentRedirect($targetPage->getDokuwikiId(), self::TARGET_ORIGIN_ALIAS); 461 if ($res) { 462 return; 463 } 464 break; 465 default: 466 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 467 $res = $this->executePermanentRedirect($targetPage->getCanonicalUrl([], true), self::TARGET_ORIGIN_ALIAS); 468 if ($res) { 469 return; 470 } 471 break; 472 } 473 } 474 475 476 // If there is a redirection defined in the page rules 477 $result = $this->processingPageRules(); 478 if ($result) { 479 // A redirection has occurred 480 // finish the process 481 return; 482 } 483 484 /** 485 * 486 * There was no redirection found, redirect to edit mode if writer 487 * 488 */ 489 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 490 491 $this->gotToEditMode($event); 492 // Stop here 493 return; 494 495 } 496 497 /* 498 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 499 */ 500 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 501 return; 502 } 503 504 // We are reader and their is no redirection set, we apply the algorithm 505 $readerAlgorithms = array(); 506 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 507 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 508 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 509 510 while ( 511 ($algorithm = array_shift($readerAlgorithms)) != null 512 ) { 513 514 switch ($algorithm) { 515 516 case self::NOTHING: 517 return; 518 519 case self::GO_TO_BEST_END_PAGE_NAME: 520 521 list($targetPage, $method) = UrlManagerBestEndPage::process($identifier); 522 if ($targetPage != null) { 523 $res = false; 524 switch ($method) { 525 case self::REDIRECT_PERMANENT_METHOD: 526 $res = $this->executePermanentRedirect($targetPage, self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 527 break; 528 case self::REDIRECT_NOTFOUND_METHOD: 529 $res = $this->performNotFoundRedirect($targetPage, self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 530 break; 531 default: 532 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 533 } 534 if ($res) { 535 // Redirection has succeeded 536 return; 537 } 538 } 539 break; 540 541 case self::GO_TO_NS_START_PAGE: 542 543 // Start page with the conf['start'] parameter 544 $startPage = getNS($identifier) . ':' . $conf['start']; 545 if (page_exists($startPage)) { 546 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 547 if ($res) { 548 return; 549 } 550 } 551 552 // Start page with the same name than the namespace 553 $startPage = getNS($identifier) . ':' . curNS($identifier); 554 if (page_exists($startPage)) { 555 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 556 if ($res) { 557 return; 558 } 559 } 560 break; 561 562 case self::GO_TO_BEST_PAGE_NAME: 563 564 $bestPageId = null; 565 566 $bestPage = $this->getBestPage($identifier); 567 $bestPageId = $bestPage['id']; 568 $scorePageName = $bestPage['score']; 569 570 // Get Score from a Namespace 571 $bestNamespace = $this->scoreBestNamespace($identifier); 572 $bestNamespaceId = $bestNamespace['namespace']; 573 $namespaceScore = $bestNamespace['score']; 574 575 // Compare the two score 576 if ($scorePageName > 0 or $namespaceScore > 0) { 577 if ($scorePageName > $namespaceScore) { 578 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 579 } else { 580 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 581 } 582 return; 583 } 584 break; 585 586 case self::GO_TO_BEST_NAMESPACE: 587 588 $scoreNamespace = $this->scoreBestNamespace($identifier); 589 $bestNamespaceId = $scoreNamespace['namespace']; 590 $score = $scoreNamespace['score']; 591 592 if ($score > 0) { 593 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 594 return; 595 } 596 break; 597 598 case self::GO_TO_SEARCH_ENGINE: 599 600 $this->redirectToSearchEngine(); 601 602 return; 603 break; 604 605 // End Switch Action 606 } 607 608 // End While Action 609 } 610 611 612 } 613 614 615 /** 616 * getBestNamespace 617 * Return a list with 'BestNamespaceId Score' 618 * @param $id 619 * @return array 620 */ 621 private 622 function scoreBestNamespace($id) 623 { 624 625 global $conf; 626 627 // Parameters 628 $pageNameSpace = getNS($id); 629 630 // If the page has an existing namespace start page take it, other search other namespace 631 $startPageNameSpace = $pageNameSpace . ":"; 632 $dateAt = ''; 633 // $startPageNameSpace will get a full path (ie with start or the namespace 634 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 635 if (page_exists($startPageNameSpace)) { 636 $nameSpaces = array($startPageNameSpace); 637 } else { 638 $nameSpaces = ft_pageLookup($conf['start']); 639 } 640 641 // Parameters and search the best namespace 642 $pathNames = explode(':', $pageNameSpace); 643 $bestNbWordFound = 0; 644 $bestNamespaceId = ''; 645 foreach ($nameSpaces as $nameSpace) { 646 647 $nbWordFound = 0; 648 foreach ($pathNames as $pathName) { 649 if (strlen($pathName) > 2) { 650 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 651 } 652 } 653 if ($nbWordFound > $bestNbWordFound) { 654 // Take only the smallest namespace 655 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 656 $bestNbWordFound = $nbWordFound; 657 $bestNamespaceId = $nameSpace; 658 } 659 } 660 } 661 662 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 663 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 664 if ($bestNbWordFound > 0) { 665 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 666 } else { 667 $bestNamespaceScore = 0; 668 } 669 670 671 return array( 672 'namespace' => $bestNamespaceId, 673 'score' => $bestNamespaceScore 674 ); 675 676 } 677 678 /** 679 * @param $event 680 */ 681 private 682 function gotToEditMode(&$event) 683 { 684 global $ACT; 685 $ACT = 'edit'; 686 687 } 688 689 690 /** 691 * Redirect to an internal page ie: 692 * * on the same domain 693 * * no HTTP redirect 694 * * id rewrite 695 * @param string $targetPageId - target page id 696 * @param string $targetOriginId - the source of the target (redirect) 697 * @return bool - return true if the user has the permission and that the redirect was done 698 * @throws Exception 699 */ 700 private 701 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 702 { 703 /** 704 * Because we set the ID globally for the ID redirect 705 * we make sure that this is not a {@link Page} 706 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 707 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 708 */ 709 if (is_object($targetPageId)) { 710 $class = get_class($targetPageId); 711 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 712 } 713 714 if (is_object($targetOriginId)) { 715 $class = get_class($targetOriginId); 716 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 717 } 718 719 // If the user does not have the right to see the target page 720 // don't do anything 721 if (!(Identity::isReader($targetPageId))) { 722 return false; 723 } 724 725 // Change the id 726 global $ID; 727 global $INFO; 728 $sourceId = $ID; 729 $ID = $targetPageId; 730 731 /** 732 * Refresh the $INFO data 733 * 734 * the info attributes are used elsewhere 735 * 'id': for the sidebar 736 * 'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()} 737 * 'rev' : for the edit button to be sure that the page is still the same 738 */ 739 $INFO = pageinfo(); 740 741 /** 742 * Not compatible with 743 * https://www.dokuwiki.org/config:send404 is enabled 744 * 745 * This check happens before that dokuwiki is started 746 * and send an header in doku.php 747 * 748 * We send a warning 749 */ 750 global $conf; 751 if ($conf['send404'] == true) { 752 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 753 } 754 755 // Redirection 756 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 757 758 return true; 759 760 } 761 762 private function executePermanentRedirect(string $target, $targetOrigin): bool 763 { 764 return $this->executeHttpRedirect($target, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 765 } 766 767 /** 768 * The general HTTP Redirect method to an internal page 769 * where the redirection method decide which type of redirection 770 * @param string $target - a dokuwiki id or an url 771 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 772 * @param string $method - the redirection method 773 */ 774 private 775 function executeHttpRedirect(string $target, string $targetOrigin, string $method): bool 776 { 777 778 global $ID; 779 780 781 // Log the redirections 782 $this->logRedirection($ID, $target, $targetOrigin, $method); 783 784 785 // An external url ? 786 $isValid = Url::isValid($target); 787 // If there is a bug in the isValid function for an internal url 788 // We get a loop. 789 // The Url becomes the id, the id is unknown and we do a redirect again 790 // 791 // We check then if the target starts with the base url 792 // if this is the case, it's valid 793 if (!$isValid && strpos($target, DOKU_URL) === 0) { 794 $isValid = true; 795 } 796 if ($isValid) { 797 798 // defend against HTTP Response Splitting 799 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 800 $targetUrl = stripctl($target); 801 802 } else { 803 804 805 // Explode the page ID and the anchor (#) 806 $link = explode('#', $target, 2); 807 808 // Query String to pass the message 809 $urlParams = []; 810 if ($targetOrigin != self::TARGET_ORIGIN_PERMALINK) { 811 $urlParams = array( 812 action_plugin_combo_routermessage::ORIGIN_PAGE => $ID, 813 action_plugin_combo_routermessage::ORIGIN_TYPE => $targetOrigin 814 ); 815 } 816 817 // if this is search engine redirect 818 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 819 $replacementPart = array(':', '_', '-'); 820 $query = str_replace($replacementPart, ' ', $ID); 821 $urlParams["do"] = "search"; 822 $urlParams["q"] = $query; 823 } 824 825 $targetUrl = wl($link[0], $urlParams, true, '&'); 826 // %3A back to : 827 $targetUrl = str_replace("%3A", ":", $targetUrl); 828 if ($link[1]) { 829 $targetUrl .= '#' . rawurlencode($link[1]); 830 } 831 832 } 833 834 /** 835 * The dokuwiki function {@link send_redirect()} 836 * set the `Location header` and in php, the header function 837 * in this case change the status code to 302 Arghhhh. 838 * The code below is adapted from this function {@link send_redirect()} 839 */ 840 global $MSG; // are there any undisplayed messages? keep them in session for display 841 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 842 //reopen session, store data and close session again 843 @session_start(); 844 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 845 } 846 session_write_close(); // always close the session 847 848 switch ($method) { 849 case self::REDIRECT_PERMANENT_METHOD: 850 HttpResponse::create(HttpResponse::STATUS_PERMANENT_REDIRECT) 851 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 852 ->send(); 853 return true; 854 case self::REDIRECT_NOTFOUND_METHOD: 855 856 // Empty 404 body to not get the standard 404 page of the browser 857 // but a blank page to avoid a sort of FOUC. 858 // ie the user see a page briefly 859 HttpResponse::create(HttpResponse::STATUS_NOT_FOUND) 860 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 861 ->send(self::PAGE_404, Mime::HTML); 862 return true; 863 864 default: 865 LogUtility::msg("The method ($method) is not an http redirection"); 866 return false; 867 } 868 869 870 } 871 872 /** 873 * @param $id 874 * @return array 875 */ 876 private 877 function getBestPage($id): array 878 { 879 880 // The return parameters 881 $bestPageId = null; 882 $scorePageName = null; 883 884 // Get Score from a page 885 $pageName = noNS($id); 886 $pagesWithSameName = ft_pageLookup($pageName); 887 if (count($pagesWithSameName) > 0) { 888 889 // Search same namespace in the page found than in the Id page asked. 890 $bestNbWordFound = 0; 891 892 893 $wordsInPageSourceId = explode(':', $id); 894 foreach ($pagesWithSameName as $targetPageId => $title) { 895 896 // Nb of word found in the target page id 897 // that are in the source page id 898 $nbWordFound = 0; 899 foreach ($wordsInPageSourceId as $word) { 900 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 901 } 902 903 if ($bestPageId == null) { 904 905 $bestNbWordFound = $nbWordFound; 906 $bestPageId = $targetPageId; 907 908 } else { 909 910 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 911 912 $bestNbWordFound = $nbWordFound; 913 $bestPageId = $targetPageId; 914 915 } 916 917 } 918 919 } 920 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 921 return array( 922 'id' => $bestPageId, 923 'score' => $scorePageName); 924 } 925 return array( 926 'id' => $bestPageId, 927 'score' => $scorePageName 928 ); 929 930 } 931 932 933 /** 934 * Redirect to the search engine 935 */ 936 private 937 function redirectToSearchEngine() 938 { 939 940 global $ID; 941 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 942 943 } 944 945 946 /** 947 * 948 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 949 * * For a SQlite database, it will add a row into the log 950 * 951 * @param string $sourcePageId 952 * @param $targetPageId 953 * @param $algorithmic 954 * @param $method - http or rewrite 955 */ 956 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 957 { 958 959 $row = array( 960 "TIMESTAMP" => date("c"), 961 "SOURCE" => $sourcePageId, 962 "TARGET" => $targetPageId, 963 "REFERRER" => $_SERVER['HTTP_REFERER'], 964 "TYPE" => $algorithmic, 965 "METHOD" => $method 966 ); 967 $request = Sqlite::createOrGetBackendSqlite() 968 ->createRequest() 969 ->setTableRow('redirections_log', $row); 970 try { 971 $request 972 ->execute(); 973 } catch (ExceptionCombo $e) { 974 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 975 } finally { 976 $request->close(); 977 } 978 979 980 } 981 982 /** 983 * This function check if there is a redirection declared 984 * in the redirection table 985 * @return bool - true if a rewrite or redirection occurs 986 * @throws Exception 987 */ 988 private function processingPageRules(): bool 989 { 990 global $ID; 991 992 $calculatedTarget = null; 993 $ruleMatcher = null; // Used in a warning message if the target page does not exist 994 // Known redirection in the table 995 // Get the page from redirection data 996 $rules = $this->pageRules->getRules(); 997 foreach ($rules as $rule) { 998 999 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 1000 $ruleTarget = $rule[PageRules::TARGET_NAME]; 1001 1002 // Glob to Rexgexp 1003 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/'; 1004 1005 // Match ? 1006 // https://www.php.net/manual/en/function.preg-match.php 1007 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 1008 if ($pregMatchResult === false) { 1009 // The `if` to take into account this problem 1010 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1011 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1012 return false; 1013 } 1014 if ($pregMatchResult) { 1015 $calculatedTarget = $ruleTarget; 1016 foreach ($matches as $key => $match) { 1017 if ($key == 0) { 1018 continue; 1019 } else { 1020 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1021 } 1022 } 1023 break; 1024 } 1025 } 1026 1027 if ($calculatedTarget == null) { 1028 return false; 1029 } 1030 1031 // If this is an external redirect (other domain) 1032 if (Url::isValid($calculatedTarget)) { 1033 1034 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1035 return true; 1036 1037 } 1038 1039 // If the page exist 1040 if (page_exists($calculatedTarget)) { 1041 1042 // This is DokuWiki Id and should always be lowercase 1043 // The page rule may have change that 1044 $calculatedTarget = strtolower($calculatedTarget); 1045 $res = $this->executeTransparentRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES); 1046 if ($res) { 1047 return true; 1048 } else { 1049 return false; 1050 } 1051 1052 } else { 1053 1054 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1055 return false; 1056 1057 } 1058 1059 } 1060 1061 private function performNotFoundRedirect(string $targetId, string $origin): bool 1062 { 1063 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1064 } 1065 1066 1067} 1068