1<?php 2 3 4 5 6use ComboStrap\DatabasePageRow; 7use ComboStrap\ExceptionBadArgument; 8use ComboStrap\ExceptionBadSyntax; 9use ComboStrap\ExceptionCompile; 10use ComboStrap\ExceptionSqliteNotAvailable; 11use ComboStrap\ExecutionContext; 12use ComboStrap\FileSystems; 13use ComboStrap\HttpResponse; 14use ComboStrap\HttpResponseStatus; 15use ComboStrap\Identity; 16use ComboStrap\LogUtility; 17use ComboStrap\MarkupPath; 18use ComboStrap\Meta\Field\AliasType; 19use ComboStrap\Mime; 20use ComboStrap\PageId; 21use ComboStrap\PageRules; 22use ComboStrap\PageUrlPath; 23use ComboStrap\PageUrlType; 24use ComboStrap\RouterBestEndPage; 25use ComboStrap\Site; 26use ComboStrap\SiteConfig; 27use ComboStrap\Sqlite; 28use ComboStrap\Web\Url; 29use ComboStrap\WikiPath; 30 31require_once(__DIR__ . '/../vendor/autoload.php'); 32 33/** 34 * Class action_plugin_combo_url 35 * 36 * The actual URL manager 37 * 38 * 39 */ 40class action_plugin_combo_router extends DokuWiki_Action_Plugin 41{ 42 43 /** 44 * @deprecated 45 */ 46 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 47 const ROUTER_ENABLE_CONF = "enableRouter"; 48 49 // The redirect type 50 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 51 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 52 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 53 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 54 55 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 56 57 // Where the target id value comes from 58 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 59 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 60 /** 61 * Named Permalink (canonical) 62 */ 63 const TARGET_ORIGIN_CANONICAL = 'canonical'; 64 const TARGET_ORIGIN_ALIAS = 'alias'; 65 /** 66 * Identifier Permalink (full page id) 67 */ 68 const TARGET_ORIGIN_PERMALINK = "permalink"; 69 /** 70 * Extended Permalink (abbreviated page id at the end) 71 */ 72 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 73 const TARGET_ORIGIN_START_PAGE = 'startPage'; 74 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 75 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 76 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 77 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 78 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 79 80 81 // The constant parameters 82 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 83 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 84 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 85 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 86 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 87 const GO_TO_EDIT_MODE = 'GoToEditMode'; 88 const NOTHING = 'Nothing'; 89 90 /** @var string - a name used in log and other places */ 91 const NAME = 'Url Manager'; 92 const CANONICAL = 'router'; 93 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 94 const REFRESH_HEADER_NAME = "Refresh"; 95 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 96 const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": "; 97 public const URL_MANAGER_NAME = "Router"; 98 99 100 /** 101 * @var PageRules 102 */ 103 private $pageRules; 104 105 106 function __construct() 107 { 108 // enable direct access to language strings 109 // ie $this->lang 110 $this->setupLocale(); 111 112 } 113 114 /** 115 * @param string $refreshHeader 116 * @return false|string 117 */ 118 public static function getUrlFromRefresh(string $refreshHeader) 119 { 120 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 121 } 122 123 public static function getUrlFromLocation($refreshHeader) 124 { 125 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 126 } 127 128 /** 129 * @return array|mixed|string|string[] 130 * 131 * Unfortunately, DOKUWIKI_STARTED is not the first event 132 * The id may have been changed by 133 * {@link action_plugin_combo_metalang::load_lang()} 134 * function, that's why we have this function 135 * to get the original requested id 136 */ 137 private static function getOriginalIdFromRequest() 138 { 139 $originalId = $_GET["id"]; 140 return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId); 141 } 142 143 /** 144 * Determine if the request should be banned based on the id 145 * 146 * @param string $id 147 * @return bool 148 * 149 * See also {@link https://perishablepress.com/7g-firewall/#features} 150 * for blocking rules on http request data such as: 151 * * query_string 152 * * user_agent, 153 * * remote host 154 */ 155 public static function isShadowBanned(string $id): bool 156 { 157 /** 158 * ie 159 * wp-json:api:flutter_woo:config_file 160 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 161 * wp-admin 162 * 2020:wp-includes:wlwmanifest.xml 163 * wp-content:start 164 * wp-admin:css:start 165 * sito:wp-includes:wlwmanifest.xml 166 * site:wp-includes:wlwmanifest.xml 167 * cms:wp-includes:wlwmanifest.xml 168 * test:wp-includes:wlwmanifest.xml 169 * media:wp-includes:wlwmanifest.xml 170 * wp2:wp-includes:wlwmanifest.xml 171 * 2019:wp-includes:wlwmanifest.xml 172 * shop:wp-includes:wlwmanifest.xml 173 * wp1:wp-includes:wlwmanifest.xml 174 * news:wp-includes:wlwmanifest.xml 175 * 2018:wp-includes:wlwmanifest.xml 176 */ 177 if (strpos($id, 'wp-') !== false) { 178 return true; 179 } 180 181 /** 182 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 183 * db:oracle:999999.9:union:all:select_null:from_dual 184 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 185 */ 186 if (preg_match('/_chr_|_0_0/', $id) === 1) { 187 return true; 188 } 189 190 191 /** 192 * ie 193 * git:objects: 194 * git:refs:heads:stable 195 * git:logs:refs:heads:main 196 * git:logs:refs:heads:stable 197 * git:hooks:pre-push.sample 198 * git:hooks:pre-receive.sample 199 */ 200 if (strpos($id, "git:") === 0) { 201 return true; 202 } 203 204 return false; 205 206 } 207 208 /** 209 * @param string $id 210 * @return bool 211 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 212 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 213 * well-known:dnt-policy.txt 214 */ 215 public static function isWellKnownFile(string $id): bool 216 { 217 return strpos($id, "well-known") === 0; 218 } 219 220 221 function register(Doku_Event_Handler $controller) 222 { 223 224 if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 225 226 /** 227 * This will call the function {@link action_plugin_combo_router::_router()} 228 * The event is not DOKUWIKI_STARTED because this is not the first one 229 * 230 * https://www.dokuwiki.org/devel:event:init_lang_load 231 */ 232 $controller->register_hook('DOKUWIKI_STARTED', 233 'BEFORE', 234 $this, 235 'router', 236 array()); 237 238 /** 239 * This is the real first call of Dokuwiki 240 * Unfortunately, it does not create the environment 241 * We just ban to spare server resources 242 * 243 * https://www.dokuwiki.org/devel:event:init_lang_load 244 */ 245 $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array()); 246 247 } 248 249 250 } 251 252 /** 253 * 254 * We have created a spacial ban function that is 255 * called before the first function 256 * {@link action_plugin_combo_metalang::load_lang()} 257 * to spare CPU. 258 * 259 * @param $event 260 * @throws Exception 261 */ 262 function ban(&$event) 263 { 264 265 $id = self::getOriginalIdFromRequest(); 266 $page = MarkupPath::createMarkupFromId($id); 267 if (!FileSystems::exists($page)) { 268 // Well known 269 if (self::isWellKnownFile($id)) { 270 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 271 ExecutionContext::getActualOrCreateFromEnv() 272 ->response() 273 ->setStatus(HttpResponseStatus::NOT_FOUND) 274 ->end(); 275 return; 276 } 277 278 // Shadow banned 279 if (self::isShadowBanned($id)) { 280 $webSiteHomePage = Site::getIndexPageName(); 281 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 282 } 283 } 284 } 285 286 /** 287 * @param $event Doku_Event 288 * @param $param 289 * @return void 290 * @throws Exception 291 */ 292 function router(&$event, $param) 293 { 294 295 /** 296 * Just the {@link ExecutionContext::SHOW_ACTION} 297 * may be redirected 298 */ 299 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 300 if ($executionContext->getExecutingAction()!==ExecutionContext::SHOW_ACTION) { 301 return; 302 } 303 304 305 global $ID; 306 307 /** 308 * Without SQLite, this module does not work further 309 */ 310 try { 311 Sqlite::createOrGetSqlite(); 312 } catch (ExceptionSqliteNotAvailable $e) { 313 return; 314 } 315 316 $this->pageRules = new PageRules(); 317 318 319 /** 320 * Unfortunately, DOKUWIKI_STARTED is not the first event 321 * The id may have been changed by 322 * {@link action_plugin_combo_lang::load_lang()} 323 * function, that's why we check against the {@link $_REQUEST} 324 * and not the global ID 325 */ 326 $originalId = self::getOriginalIdFromRequest(); 327 328 /** 329 * Page is an existing id ? 330 */ 331 $requestedMarkupPath = MarkupPath::createMarkupFromId($ID); 332 if (FileSystems::exists($requestedMarkupPath)) { 333 334 /** 335 * If this is not the root home page 336 * and if the canonical id is the not the same, 337 * and if this is not a historical page (revision) 338 * redirect 339 */ 340 if ( 341 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed 342 && $ID != Site::getIndexPageName() 343 && !isset($_REQUEST["rev"]) 344 ) { 345 /** 346 * TODO: When saving for the first time, the page is not stored in the database 347 * but that's not the case actually 348 */ 349 $databasePageRow = $requestedMarkupPath->getDatabasePage(); 350 if ($databasePageRow->exists()) { 351 /** 352 * A move may leave the database in a bad state, 353 * unfortunately (ie page is not in index, unable to update, ...) 354 * We test therefore if the database page id exists 355 */ 356 $targetPageId = $databasePageRow->getFromRow("id"); 357 $targetPath = WikiPath::createMarkupPathFromId($targetPageId); 358 if(FileSystems::exists($targetPath)) { 359 $this->executePermanentRedirect( 360 $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(), 361 self::TARGET_ORIGIN_PERMALINK_EXTENDED 362 ); 363 } 364 } 365 } 366 return; 367 } 368 369 370 $identifier = $ID; 371 372 373 /** 374 * Page Id Website / root Permalink ? 375 */ 376 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension()); 377 if ($shortPageId !== null) { 378 $pageId = PageUrlPath::decodePageId($shortPageId); 379 if ($requestedMarkupPath->getParent() === null && $pageId !== null) { 380 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath(); 381 if ($page !== null && $page->exists()) { 382 $this->executePermanentRedirect( 383 $page->getCanonicalUrl()->toAbsoluteUrlString(), 384 self::TARGET_ORIGIN_PERMALINK 385 ); 386 } 387 } 388 389 /** 390 * Page Id Abbr ? 391 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 392 */ 393 if ( 394 $pageId !== null 395 ) { 396 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath(); 397 if ($page === null) { 398 // or the length of the abbr has changed 399 $canonicalDatabasePage = new DatabasePageRow(); 400 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 401 if ($row !== null) { 402 $canonicalDatabasePage->setRow($row); 403 $page = $canonicalDatabasePage->getMarkupPath(); 404 } 405 } 406 if ($page !== null && $page->exists()) { 407 /** 408 * If the url canonical id has changed, we show it 409 * to the writer by performing a permanent redirect 410 */ 411 if ($identifier != $page->getUrlId()) { 412 // Google asks for a redirect 413 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 414 // People access your site through several different URLs. 415 // If, for example, your home page can be reached in multiple ways 416 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 417 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 418 // and use redirects to send traffic from the other URLs to your preferred URL. 419 $this->executePermanentRedirect( 420 $page->getCanonicalUrl()->toAbsoluteUrlString(), 421 self::TARGET_ORIGIN_PERMALINK_EXTENDED 422 ); 423 return; 424 } 425 426 $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 427 return; 428 429 } 430 // permanent url not yet in the database 431 // Other permanent such as permanent canonical ? 432 // We let the process go with the new identifier 433 434 } 435 436 } 437 438 // Global variable needed in the process 439 global $conf; 440 441 /** 442 * Identifier is a Canonical ? 443 */ 444 $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier); 445 $canonicalPage = $canonicalDatabasePage->getMarkupPath(); 446 if ($canonicalPage !== null && $canonicalPage->exists()) { 447 /** 448 * Does the canonical url is canonical name based 449 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 450 */ 451 if ($canonicalPage->getUrlId() === $identifier) { 452 $res = $this->executeTransparentRedirect( 453 $canonicalPage->getWikiId(), 454 self::TARGET_ORIGIN_CANONICAL 455 ); 456 } else { 457 $res = $this->executePermanentRedirect( 458 $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property 459 self::TARGET_ORIGIN_CANONICAL 460 ); 461 } 462 if ($res) { 463 return; 464 } 465 } 466 467 /** 468 * Identifier is an alias 469 */ 470 $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath(); 471 if ( 472 $aliasRequestedPage !== null 473 && $aliasRequestedPage->exists() 474 // The build alias is the file system metadata alias 475 // it may be null if the replication in the database was not successful 476 && $aliasRequestedPage->getBuildAlias() !== null 477 ) { 478 $buildAlias = $aliasRequestedPage->getBuildAlias(); 479 switch ($buildAlias->getType()) { 480 case AliasType::REDIRECT: 481 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 482 if ($res) { 483 return; 484 } 485 break; 486 case AliasType::SYNONYM: 487 $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS); 488 if ($res) { 489 return; 490 } 491 break; 492 default: 493 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 494 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 495 if ($res) { 496 return; 497 } 498 break; 499 } 500 } 501 502 503 // If there is a redirection defined in the page rules 504 $result = $this->processingPageRules(); 505 if ($result) { 506 // A redirection has occurred 507 // finish the process 508 return; 509 } 510 511 /** 512 * 513 * There was no redirection found, redirect to edit mode if writer 514 * 515 */ 516 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 517 518 $this->gotToEditMode($event); 519 // Stop here 520 return; 521 522 } 523 524 /** 525 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 526 */ 527 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 528 return; 529 } 530 531 // We are reader and their is no redirection set, we apply the algorithm 532 $readerAlgorithms = array(); 533 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 534 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 535 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 536 537 while ( 538 ($algorithm = array_shift($readerAlgorithms)) != null 539 ) { 540 541 switch ($algorithm) { 542 543 case self::NOTHING: 544 return; 545 546 case self::GO_TO_BEST_END_PAGE_NAME: 547 548 /** 549 * @var MarkupPath $bestEndPage 550 */ 551 list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath); 552 if ($bestEndPage != null) { 553 $res = false; 554 switch ($method) { 555 case self::REDIRECT_PERMANENT_METHOD: 556 $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 557 break; 558 case self::REDIRECT_NOTFOUND_METHOD: 559 $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 560 break; 561 default: 562 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 563 } 564 if ($res) { 565 // Redirection has succeeded 566 return; 567 } 568 } 569 break; 570 571 case self::GO_TO_NS_START_PAGE: 572 573 // Start page with the conf['start'] parameter 574 $startPage = getNS($identifier) . ':' . $conf['start']; 575 if (page_exists($startPage)) { 576 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 577 if ($res) { 578 return; 579 } 580 } 581 582 // Start page with the same name than the namespace 583 $startPage = getNS($identifier) . ':' . curNS($identifier); 584 if (page_exists($startPage)) { 585 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 586 if ($res) { 587 return; 588 } 589 } 590 break; 591 592 case self::GO_TO_BEST_PAGE_NAME: 593 594 $bestPageId = null; 595 596 $bestPage = $this->getBestPage($identifier); 597 $bestPageId = $bestPage['id']; 598 $scorePageName = $bestPage['score']; 599 600 // Get Score from a Namespace 601 $bestNamespace = $this->scoreBestNamespace($identifier); 602 $bestNamespaceId = $bestNamespace['namespace']; 603 $namespaceScore = $bestNamespace['score']; 604 605 // Compare the two score 606 if ($scorePageName > 0 or $namespaceScore > 0) { 607 if ($scorePageName > $namespaceScore) { 608 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 609 } else { 610 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 611 } 612 return; 613 } 614 break; 615 616 case self::GO_TO_BEST_NAMESPACE: 617 618 $scoreNamespace = $this->scoreBestNamespace($identifier); 619 $bestNamespaceId = $scoreNamespace['namespace']; 620 $score = $scoreNamespace['score']; 621 622 if ($score > 0) { 623 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 624 return; 625 } 626 break; 627 628 case self::GO_TO_SEARCH_ENGINE: 629 630 $this->redirectToSearchEngine(); 631 632 return; 633 634 // End Switch Action 635 } 636 637 // End While Action 638 } 639 640 641 } 642 643 644 /** 645 * getBestNamespace 646 * Return a list with 'BestNamespaceId Score' 647 * @param $id 648 * @return array 649 */ 650 private 651 function scoreBestNamespace($id) 652 { 653 654 global $conf; 655 656 // Parameters 657 $pageNameSpace = getNS($id); 658 659 // If the page has an existing namespace start page take it, other search other namespace 660 $startPageNameSpace = $pageNameSpace . ":"; 661 $dateAt = ''; 662 // $startPageNameSpace will get a full path (ie with start or the namespace 663 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 664 if (page_exists($startPageNameSpace)) { 665 $nameSpaces = array($startPageNameSpace); 666 } else { 667 $nameSpaces = ft_pageLookup($conf['start']); 668 } 669 670 // Parameters and search the best namespace 671 $pathNames = explode(':', $pageNameSpace); 672 $bestNbWordFound = 0; 673 $bestNamespaceId = ''; 674 foreach ($nameSpaces as $nameSpace) { 675 676 $nbWordFound = 0; 677 foreach ($pathNames as $pathName) { 678 if (strlen($pathName) > 2) { 679 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 680 } 681 } 682 if ($nbWordFound > $bestNbWordFound) { 683 // Take only the smallest namespace 684 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 685 $bestNbWordFound = $nbWordFound; 686 $bestNamespaceId = $nameSpace; 687 } 688 } 689 } 690 691 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 692 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 693 if ($bestNbWordFound > 0) { 694 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 695 } else { 696 $bestNamespaceScore = 0; 697 } 698 699 700 return array( 701 'namespace' => $bestNamespaceId, 702 'score' => $bestNamespaceScore 703 ); 704 705 } 706 707 /** 708 * @param $event 709 */ 710 private 711 function gotToEditMode(&$event) 712 { 713 global $ACT; 714 $ACT = 'edit'; 715 716 } 717 718 719 /** 720 * Redirect to an internal page ie: 721 * * on the same domain 722 * * no HTTP redirect 723 * * id rewrite 724 * @param string $targetPageId - target page id 725 * @param string $targetOriginId - the source of the target (redirect) 726 * @return bool - return true if the user has the permission and that the redirect was done 727 * @throws Exception 728 */ 729 private 730 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 731 { 732 /** 733 * Because we set the ID globally for the ID redirect 734 * we make sure that this is not a {@link MarkupPath} 735 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 736 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 737 */ 738 if (is_object($targetPageId)) { 739 $class = get_class($targetPageId); 740 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 741 } 742 743 if (is_object($targetOriginId)) { 744 $class = get_class($targetOriginId); 745 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 746 } 747 748 // If the user does not have the right to see the target page 749 // don't do anything 750 if (!(Identity::isReader($targetPageId))) { 751 return false; 752 } 753 754 // Change the id 755 global $ID; 756 global $INFO; 757 $sourceId = $ID; 758 $ID = $targetPageId; 759 if (isset($_REQUEST["id"])) { 760 $_REQUEST["id"] = $targetPageId; 761 } 762 if (isset($_GET["id"])) { 763 $_GET["id"] = $targetPageId; 764 } 765 766 /** 767 * Refresh the $INFO data 768 * 769 * the info attributes are used elsewhere 770 * 'id': for the sidebar 771 * 'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()} 772 * 'rev' : for the edit button to be sure that the page is still the same 773 */ 774 $INFO = pageinfo(); 775 776 /** 777 * Not compatible with 778 * https://www.dokuwiki.org/config:send404 is enabled 779 * 780 * This check happens before that dokuwiki is started 781 * and send an header in doku.php 782 * 783 * We send a warning 784 */ 785 global $conf; 786 if ($conf['send404'] == true) { 787 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 788 } 789 790 // Redirection 791 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 792 793 return true; 794 795 } 796 797 private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool 798 { 799 return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 800 } 801 802 /** 803 * The general HTTP Redirect method to an internal page 804 * where the redirection method decide which type of redirection 805 * @param string $targetIdOrUrl - a dokuwiki id or an url 806 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 807 * @param string $method - the redirection method 808 */ 809 private 810 function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool 811 { 812 813 global $ID; 814 815 816 // Log the redirections 817 $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method); 818 819 820 // An http external url ? 821 try { 822 $isValid = Url::createFromString($targetIdOrUrl)->isHttpUrl(); 823 } catch (ExceptionBadSyntax|ExceptionBadArgument $e) { 824 $isValid = false; 825 } 826 827 // If there is a bug in the isValid function for an internal url 828 // We get a loop. 829 // The Url becomes the id, the id is unknown and we do a redirect again 830 // 831 // We check then if the target starts with the base url 832 // if this is the case, it's valid 833 if (!$isValid && strpos($targetIdOrUrl, DOKU_URL) === 0) { 834 $isValid = true; 835 } 836 if ($isValid) { 837 838 // defend against HTTP Response Splitting 839 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 840 $targetUrl = stripctl($targetIdOrUrl); 841 842 } else { 843 844 845 // Explode the page ID and the anchor (#) 846 $link = explode('#', $targetIdOrUrl, 2); 847 848 // Query String to pass the message 849 $urlParams = []; 850 if ($targetOrigin != self::TARGET_ORIGIN_PERMALINK) { 851 $urlParams = array( 852 action_plugin_combo_routermessage::ORIGIN_PAGE => $ID, 853 action_plugin_combo_routermessage::ORIGIN_TYPE => $targetOrigin 854 ); 855 } 856 857 // if this is search engine redirect 858 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 859 $replacementPart = array(':', '_', '-'); 860 $query = str_replace($replacementPart, ' ', $ID); 861 $urlParams["do"] = "search"; 862 $urlParams["q"] = $query; 863 } 864 865 $targetUrl = wl($link[0], $urlParams, true, '&'); 866 // %3A back to : 867 $targetUrl = str_replace("%3A", ":", $targetUrl); 868 if ($link[1]) { 869 $targetUrl .= '#' . rawurlencode($link[1]); 870 } 871 872 } 873 874 /** 875 * The dokuwiki function {@link send_redirect()} 876 * set the `Location header` and in php, the header function 877 * in this case change the status code to 302 Arghhhh. 878 * The code below is adapted from this function {@link send_redirect()} 879 */ 880 global $MSG; // are there any undisplayed messages? keep them in session for display 881 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 882 //reopen session, store data and close session again 883 @session_start(); 884 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 885 } 886 session_write_close(); // always close the session 887 888 switch ($method) { 889 case self::REDIRECT_PERMANENT_METHOD: 890 ExecutionContext::getActualOrCreateFromEnv() 891 ->response() 892 ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT) 893 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 894 ->end(); 895 return true; 896 case self::REDIRECT_NOTFOUND_METHOD: 897 898 // Empty 404 body to not get the standard 404 page of the browser 899 // but a blank page to avoid a sort of FOUC. 900 // ie the user see a page briefly 901 ExecutionContext::getActualOrCreateFromEnv() 902 ->response() 903 ->setStatus(HttpResponseStatus::NOT_FOUND) 904 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 905 ->setBody(self::PAGE_404, Mime::getHtml()) 906 ->end(); 907 return true; 908 909 default: 910 LogUtility::msg("The method ($method) is not an http redirection"); 911 return false; 912 } 913 914 915 } 916 917 /** 918 * @param $id 919 * @return array 920 */ 921 private 922 function getBestPage($id): array 923 { 924 925 // The return parameters 926 $bestPageId = null; 927 $scorePageName = null; 928 929 // Get Score from a page 930 $pageName = noNS($id); 931 $pagesWithSameName = ft_pageLookup($pageName); 932 if (count($pagesWithSameName) > 0) { 933 934 // Search same namespace in the page found than in the Id page asked. 935 $bestNbWordFound = 0; 936 937 938 $wordsInPageSourceId = explode(':', $id); 939 foreach ($pagesWithSameName as $targetPageId => $title) { 940 941 // Nb of word found in the target page id 942 // that are in the source page id 943 $nbWordFound = 0; 944 foreach ($wordsInPageSourceId as $word) { 945 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 946 } 947 948 if ($bestPageId == null) { 949 950 $bestNbWordFound = $nbWordFound; 951 $bestPageId = $targetPageId; 952 953 } else { 954 955 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 956 957 $bestNbWordFound = $nbWordFound; 958 $bestPageId = $targetPageId; 959 960 } 961 962 } 963 964 } 965 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 966 return array( 967 'id' => $bestPageId, 968 'score' => $scorePageName); 969 } 970 return array( 971 'id' => $bestPageId, 972 'score' => $scorePageName 973 ); 974 975 } 976 977 978 /** 979 * Redirect to the search engine 980 */ 981 private 982 function redirectToSearchEngine() 983 { 984 985 global $ID; 986 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 987 988 } 989 990 991 /** 992 * 993 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 994 * * For a SQlite database, it will add a row into the log 995 * 996 * @param string $sourcePageId 997 * @param $targetPageId 998 * @param $algorithmic 999 * @param $method - http or rewrite 1000 */ 1001 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 1002 { 1003 1004 $row = array( 1005 "TIMESTAMP" => date("c"), 1006 "SOURCE" => $sourcePageId, 1007 "TARGET" => $targetPageId, 1008 "REFERRER" => $_SERVER['HTTP_REFERER'], 1009 "TYPE" => $algorithmic, 1010 "METHOD" => $method 1011 ); 1012 $request = Sqlite::createOrGetBackendSqlite() 1013 ->createRequest() 1014 ->setTableRow('redirections_log', $row); 1015 try { 1016 $request 1017 ->execute(); 1018 } catch (ExceptionCompile $e) { 1019 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 1020 } finally { 1021 $request->close(); 1022 } 1023 1024 1025 } 1026 1027 /** 1028 * This function check if there is a redirection declared 1029 * in the redirection table 1030 * @return bool - true if a rewrite or redirection occurs 1031 * @throws Exception 1032 */ 1033 private function processingPageRules(): bool 1034 { 1035 global $ID; 1036 1037 $calculatedTarget = null; 1038 $ruleMatcher = null; // Used in a warning message if the target page does not exist 1039 // Known redirection in the table 1040 // Get the page from redirection data 1041 $rules = $this->pageRules->getRules(); 1042 foreach ($rules as $rule) { 1043 1044 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 1045 $ruleTarget = $rule[PageRules::TARGET_NAME]; 1046 1047 // Glob to Rexgexp 1048 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i'; 1049 1050 // Match ? 1051 // https://www.php.net/manual/en/function.preg-match.php 1052 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 1053 if ($pregMatchResult === false) { 1054 // The `if` to take into account this problem 1055 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1056 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1057 return false; 1058 } 1059 if ($pregMatchResult) { 1060 $calculatedTarget = $ruleTarget; 1061 foreach ($matches as $key => $match) { 1062 if ($key == 0) { 1063 continue; 1064 } else { 1065 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1066 } 1067 } 1068 break; 1069 } 1070 } 1071 1072 if ($calculatedTarget == null) { 1073 return false; 1074 } 1075 1076 // If this is an external redirect (other domain) 1077 try { 1078 $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl(); 1079 } catch (ExceptionBadSyntax $e) { 1080 $isHttpUrl = false; 1081 } 1082 if ($isHttpUrl) { 1083 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1084 return true; 1085 } 1086 1087 // If the page exist 1088 if (page_exists($calculatedTarget)) { 1089 1090 // This is DokuWiki Id and should always be lowercase 1091 // The page rule may have change that 1092 $calculatedTarget = strtolower($calculatedTarget); 1093 $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1094 if ($res) { 1095 return true; 1096 } else { 1097 return false; 1098 } 1099 1100 } else { 1101 1102 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1103 return false; 1104 1105 } 1106 1107 } 1108 1109 private function performNotFoundRedirect(string $targetId, string $origin): bool 1110 { 1111 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1112 } 1113 1114 1115} 1116