1<?php 2 3 4use ComboStrap\DatabasePageRow; 5use ComboStrap\ExceptionBadArgument; 6use ComboStrap\ExceptionBadSyntax; 7use ComboStrap\ExceptionCompile; 8use ComboStrap\ExceptionSqliteNotAvailable; 9use ComboStrap\ExecutionContext; 10use ComboStrap\FileSystems; 11use ComboStrap\HttpResponse; 12use ComboStrap\HttpResponseStatus; 13use ComboStrap\Identity; 14use ComboStrap\LogUtility; 15use ComboStrap\MarkupPath; 16use ComboStrap\Meta\Field\AliasType; 17use ComboStrap\Mime; 18use ComboStrap\PageId; 19use ComboStrap\PageRules; 20use ComboStrap\PageUrlPath; 21use ComboStrap\PageUrlType; 22use ComboStrap\RouterBestEndPage; 23use ComboStrap\Site; 24use ComboStrap\SiteConfig; 25use ComboStrap\Sqlite; 26use ComboStrap\Web\Url; 27use ComboStrap\Web\UrlRewrite; 28use ComboStrap\WikiPath; 29 30require_once(__DIR__ . '/../vendor/autoload.php'); 31 32/** 33 * Class action_plugin_combo_url 34 * 35 * The actual URL manager 36 * 37 * 38 */ 39class action_plugin_combo_router extends DokuWiki_Action_Plugin 40{ 41 42 /** 43 * @deprecated 44 */ 45 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 46 const ROUTER_ENABLE_CONF = "enableRouter"; 47 48 // The redirect type 49 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 50 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 51 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 52 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 53 54 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 55 56 // Where the target id value comes from 57 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 58 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 59 /** 60 * Named Permalink (canonical) 61 */ 62 const TARGET_ORIGIN_CANONICAL = 'canonical'; 63 const TARGET_ORIGIN_ALIAS = 'alias'; 64 /** 65 * Identifier Permalink (full page id) 66 */ 67 const TARGET_ORIGIN_PERMALINK = "permalink"; 68 /** 69 * Extended Permalink (abbreviated page id at the end) 70 */ 71 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 72 const TARGET_ORIGIN_START_PAGE = 'startPage'; 73 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 74 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 75 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 76 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 77 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 78 79 80 // The constant parameters 81 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 82 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 83 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 84 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 85 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 86 const GO_TO_EDIT_MODE = 'GoToEditMode'; 87 const NOTHING = 'Nothing'; 88 89 /** @var string - a name used in log and other places */ 90 const NAME = 'Url Manager'; 91 const CANONICAL = 'router'; 92 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 93 const REFRESH_HEADER_NAME = "Refresh"; 94 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 95 const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": "; 96 public const URL_MANAGER_NAME = "Router"; 97 98 99 /** 100 * @var PageRules 101 */ 102 private $pageRules; 103 104 105 function __construct() 106 { 107 // enable direct access to language strings 108 // ie $this->lang 109 $this->setupLocale(); 110 111 } 112 113 /** 114 * @param string $refreshHeader 115 * @return false|string 116 */ 117 public static function getUrlFromRefresh(string $refreshHeader) 118 { 119 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 120 } 121 122 public static function getUrlFromLocation($refreshHeader) 123 { 124 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 125 } 126 127 /** 128 * @return array|mixed|string|string[] 129 * 130 * Unfortunately, DOKUWIKI_STARTED is not the first event 131 * The id may have been changed by 132 * {@link action_plugin_combo_metalang::load_lang()} 133 * function, that's why we have this function 134 * to get the original requested id 135 */ 136 private static function getOriginalIdFromRequest() 137 { 138 $originalId = $_GET["id"]; 139 return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId); 140 } 141 142 /** 143 * Determine if the request should be banned based on the id 144 * 145 * @param string $id 146 * @return bool 147 * 148 * See also {@link https://perishablepress.com/7g-firewall/#features} 149 * for blocking rules on http request data such as: 150 * * query_string 151 * * user_agent, 152 * * remote host 153 */ 154 public static function isShadowBanned(string $id): bool 155 { 156 /** 157 * ie 158 * wp-json:api:flutter_woo:config_file 159 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 160 * wp-admin 161 * 2020:wp-includes:wlwmanifest.xml 162 * wp-content:start 163 * wp-admin:css:start 164 * sito:wp-includes:wlwmanifest.xml 165 * site:wp-includes:wlwmanifest.xml 166 * cms:wp-includes:wlwmanifest.xml 167 * test:wp-includes:wlwmanifest.xml 168 * media:wp-includes:wlwmanifest.xml 169 * wp2:wp-includes:wlwmanifest.xml 170 * 2019:wp-includes:wlwmanifest.xml 171 * shop:wp-includes:wlwmanifest.xml 172 * wp1:wp-includes:wlwmanifest.xml 173 * news:wp-includes:wlwmanifest.xml 174 * 2018:wp-includes:wlwmanifest.xml 175 */ 176 if (strpos($id, 'wp-') !== false) { 177 return true; 178 } 179 180 /** 181 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 182 * db:oracle:999999.9:union:all:select_null:from_dual 183 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 184 */ 185 if (preg_match('/_chr_|_0_0/', $id) === 1) { 186 return true; 187 } 188 189 190 /** 191 * ie 192 * git:objects: 193 * git:refs:heads:stable 194 * git:logs:refs:heads:main 195 * git:logs:refs:heads:stable 196 * git:hooks:pre-push.sample 197 * git:hooks:pre-receive.sample 198 */ 199 if (strpos($id, "git:") === 0) { 200 return true; 201 } 202 203 return false; 204 205 } 206 207 /** 208 * @param string $id 209 * @return bool 210 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 211 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 212 * well-known:dnt-policy.txt 213 */ 214 public static function isWellKnownFile(string $id): bool 215 { 216 return strpos($id, "well-known") === 0; 217 } 218 219 220 function register(Doku_Event_Handler $controller) 221 { 222 223 if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 224 225 /** 226 * This will call the function {@link action_plugin_combo_router::_router()} 227 * The event is not DOKUWIKI_STARTED because this is not the first one 228 * 229 * https://www.dokuwiki.org/devel:event:init_lang_load 230 */ 231 $controller->register_hook('DOKUWIKI_STARTED', 232 'BEFORE', 233 $this, 234 'router', 235 array()); 236 237 /** 238 * This is the real first call of Dokuwiki 239 * Unfortunately, it does not create the environment 240 * We just ban to spare server resources 241 * 242 * https://www.dokuwiki.org/devel:event:init_lang_load 243 */ 244 $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array()); 245 246 } 247 248 249 } 250 251 /** 252 * 253 * We have created a spacial ban function that is 254 * called before the first function 255 * {@link action_plugin_combo_metalang::load_lang()} 256 * to spare CPU. 257 * 258 * @param $event 259 * @throws Exception 260 */ 261 function ban(&$event) 262 { 263 264 $id = self::getOriginalIdFromRequest(); 265 $page = MarkupPath::createMarkupFromId($id); 266 if (!FileSystems::exists($page)) { 267 // Well known 268 if (self::isWellKnownFile($id)) { 269 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 270 ExecutionContext::getActualOrCreateFromEnv() 271 ->response() 272 ->setStatus(HttpResponseStatus::NOT_FOUND) 273 ->end(); 274 return; 275 } 276 277 // Shadow banned 278 if (self::isShadowBanned($id)) { 279 $webSiteHomePage = Site::getIndexPageName(); 280 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 281 } 282 } 283 } 284 285 /** 286 * @param $event Doku_Event 287 * @param $param 288 * @return void 289 * @throws Exception 290 */ 291 function router(&$event, $param) 292 { 293 294 /** 295 * Just the {@link ExecutionContext::SHOW_ACTION} 296 * may be redirected 297 */ 298 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 299 if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) { 300 return; 301 } 302 303 $urlRewrite = Site::getUrlRewrite(); 304 if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) { 305 UrlRewrite::sendErrorMessage(); 306 return; 307 } 308 309 global $ID; 310 311 /** 312 * Without SQLite, this module does not work further 313 */ 314 try { 315 Sqlite::createOrGetSqlite(); 316 } catch (ExceptionSqliteNotAvailable $e) { 317 return; 318 } 319 320 $this->pageRules = new PageRules(); 321 322 323 /** 324 * Unfortunately, DOKUWIKI_STARTED is not the first event 325 * The id may have been changed by 326 * {@link action_plugin_combo_lang::load_lang()} 327 * function, that's why we check against the {@link $_REQUEST} 328 * and not the global ID 329 */ 330 $originalId = self::getOriginalIdFromRequest(); 331 332 /** 333 * Page is an existing id ? 334 */ 335 $requestedMarkupPath = MarkupPath::createMarkupFromId($ID); 336 if (FileSystems::exists($requestedMarkupPath)) { 337 338 /** 339 * If this is not the root home page 340 * and if the canonical id is the not the same, 341 * and if this is not a historical page (revision) 342 * redirect 343 */ 344 if ( 345 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed 346 && $ID != Site::getIndexPageName() 347 && !isset($_REQUEST["rev"]) 348 ) { 349 /** 350 * TODO: When saving for the first time, the page is not stored in the database 351 * but that's not the case actually 352 */ 353 $databasePageRow = $requestedMarkupPath->getDatabasePage(); 354 if ($databasePageRow->exists()) { 355 /** 356 * A move may leave the database in a bad state, 357 * unfortunately (ie page is not in index, unable to update, ...) 358 * We test therefore if the database page id exists 359 */ 360 $targetPageId = $databasePageRow->getFromRow("id"); 361 $targetPath = WikiPath::createMarkupPathFromId($targetPageId); 362 if (FileSystems::exists($targetPath)) { 363 $this->executePermanentRedirect( 364 $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(), 365 self::TARGET_ORIGIN_PERMALINK_EXTENDED 366 ); 367 } 368 } 369 } 370 return; 371 } 372 373 374 $identifier = $ID; 375 376 377 /** 378 * Page Id Website / root Permalink ? 379 */ 380 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension()); 381 if ($shortPageId !== null) { 382 $pageId = PageUrlPath::decodePageId($shortPageId); 383 if ($requestedMarkupPath->getParent() === null && $pageId !== null) { 384 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath(); 385 if ($page !== null && $page->exists()) { 386 $this->executePermanentRedirect( 387 $page->getCanonicalUrl()->toAbsoluteUrlString(), 388 self::TARGET_ORIGIN_PERMALINK 389 ); 390 } 391 } 392 393 /** 394 * Page Id Abbr ? 395 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 396 */ 397 if ( 398 $pageId !== null 399 ) { 400 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath(); 401 if ($page === null) { 402 // or the length of the abbr has changed 403 $canonicalDatabasePage = new DatabasePageRow(); 404 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 405 if ($row !== null) { 406 $canonicalDatabasePage->setRow($row); 407 $page = $canonicalDatabasePage->getMarkupPath(); 408 } 409 } 410 if ($page !== null && $page->exists()) { 411 /** 412 * If the url canonical id has changed, we show it 413 * to the writer by performing a permanent redirect 414 */ 415 if ($identifier != $page->getUrlId()) { 416 // Google asks for a redirect 417 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 418 // People access your site through several different URLs. 419 // If, for example, your home page can be reached in multiple ways 420 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 421 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 422 // and use redirects to send traffic from the other URLs to your preferred URL. 423 $this->executePermanentRedirect( 424 $page->getCanonicalUrl()->toAbsoluteUrlString(), 425 self::TARGET_ORIGIN_PERMALINK_EXTENDED 426 ); 427 return; 428 } 429 430 $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 431 return; 432 433 } 434 // permanent url not yet in the database 435 // Other permanent such as permanent canonical ? 436 // We let the process go with the new identifier 437 438 } 439 440 } 441 442 // Global variable needed in the process 443 global $conf; 444 445 /** 446 * Identifier is a Canonical ? 447 */ 448 $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier); 449 $canonicalPage = $canonicalDatabasePage->getMarkupPath(); 450 if ($canonicalPage !== null && $canonicalPage->exists()) { 451 /** 452 * Does the canonical url is canonical name based 453 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 454 */ 455 if ($canonicalPage->getUrlId() === $identifier) { 456 $res = $this->executeTransparentRedirect( 457 $canonicalPage->getWikiId(), 458 self::TARGET_ORIGIN_CANONICAL 459 ); 460 } else { 461 $res = $this->executePermanentRedirect( 462 $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property 463 self::TARGET_ORIGIN_CANONICAL 464 ); 465 } 466 if ($res) { 467 return; 468 } 469 } 470 471 /** 472 * Identifier is an alias 473 */ 474 $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath(); 475 if ( 476 $aliasRequestedPage !== null 477 && $aliasRequestedPage->exists() 478 // The build alias is the file system metadata alias 479 // it may be null if the replication in the database was not successful 480 && $aliasRequestedPage->getBuildAlias() !== null 481 ) { 482 $buildAlias = $aliasRequestedPage->getBuildAlias(); 483 switch ($buildAlias->getType()) { 484 case AliasType::REDIRECT: 485 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 486 if ($res) { 487 return; 488 } 489 break; 490 case AliasType::SYNONYM: 491 $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS); 492 if ($res) { 493 return; 494 } 495 break; 496 default: 497 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 498 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 499 if ($res) { 500 return; 501 } 502 break; 503 } 504 } 505 506 507 // If there is a redirection defined in the page rules 508 $result = $this->processingPageRules(); 509 if ($result) { 510 // A redirection has occurred 511 // finish the process 512 return; 513 } 514 515 /** 516 * 517 * There was no redirection found, redirect to edit mode if writer 518 * 519 */ 520 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 521 522 $this->gotToEditMode($event); 523 // Stop here 524 return; 525 526 } 527 528 /** 529 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 530 */ 531 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 532 return; 533 } 534 535 // We are reader and their is no redirection set, we apply the algorithm 536 $readerAlgorithms = array(); 537 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 538 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 539 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 540 541 while ( 542 ($algorithm = array_shift($readerAlgorithms)) != null 543 ) { 544 545 switch ($algorithm) { 546 547 case self::NOTHING: 548 return; 549 550 case self::GO_TO_BEST_END_PAGE_NAME: 551 552 /** 553 * @var MarkupPath $bestEndPage 554 */ 555 list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath); 556 if ($bestEndPage != null) { 557 $res = false; 558 switch ($method) { 559 case self::REDIRECT_PERMANENT_METHOD: 560 $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 561 break; 562 case self::REDIRECT_NOTFOUND_METHOD: 563 $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 564 break; 565 default: 566 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 567 } 568 if ($res) { 569 // Redirection has succeeded 570 return; 571 } 572 } 573 break; 574 575 case self::GO_TO_NS_START_PAGE: 576 577 // Start page with the conf['start'] parameter 578 $startPage = getNS($identifier) . ':' . $conf['start']; 579 if (page_exists($startPage)) { 580 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 581 if ($res) { 582 return; 583 } 584 } 585 586 // Start page with the same name than the namespace 587 $startPage = getNS($identifier) . ':' . curNS($identifier); 588 if (page_exists($startPage)) { 589 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 590 if ($res) { 591 return; 592 } 593 } 594 break; 595 596 case self::GO_TO_BEST_PAGE_NAME: 597 598 $bestPageId = null; 599 600 $bestPage = $this->getBestPage($identifier); 601 $bestPageId = $bestPage['id']; 602 $scorePageName = $bestPage['score']; 603 604 // Get Score from a Namespace 605 $bestNamespace = $this->scoreBestNamespace($identifier); 606 $bestNamespaceId = $bestNamespace['namespace']; 607 $namespaceScore = $bestNamespace['score']; 608 609 // Compare the two score 610 if ($scorePageName > 0 or $namespaceScore > 0) { 611 if ($scorePageName > $namespaceScore) { 612 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 613 } else { 614 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 615 } 616 return; 617 } 618 break; 619 620 case self::GO_TO_BEST_NAMESPACE: 621 622 $scoreNamespace = $this->scoreBestNamespace($identifier); 623 $bestNamespaceId = $scoreNamespace['namespace']; 624 $score = $scoreNamespace['score']; 625 626 if ($score > 0) { 627 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 628 return; 629 } 630 break; 631 632 case self::GO_TO_SEARCH_ENGINE: 633 634 $this->redirectToSearchEngine(); 635 636 return; 637 638 // End Switch Action 639 } 640 641 // End While Action 642 } 643 644 645 } 646 647 648 /** 649 * getBestNamespace 650 * Return a list with 'BestNamespaceId Score' 651 * @param $id 652 * @return array 653 */ 654 private 655 function scoreBestNamespace($id) 656 { 657 658 global $conf; 659 660 // Parameters 661 $pageNameSpace = getNS($id); 662 663 // If the page has an existing namespace start page take it, other search other namespace 664 $startPageNameSpace = $pageNameSpace . ":"; 665 $dateAt = ''; 666 // $startPageNameSpace will get a full path (ie with start or the namespace 667 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 668 if (page_exists($startPageNameSpace)) { 669 $nameSpaces = array($startPageNameSpace); 670 } else { 671 $nameSpaces = ft_pageLookup($conf['start']); 672 } 673 674 // Parameters and search the best namespace 675 $pathNames = explode(':', $pageNameSpace); 676 $bestNbWordFound = 0; 677 $bestNamespaceId = ''; 678 foreach ($nameSpaces as $nameSpace) { 679 680 $nbWordFound = 0; 681 foreach ($pathNames as $pathName) { 682 if (strlen($pathName) > 2) { 683 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 684 } 685 } 686 if ($nbWordFound > $bestNbWordFound) { 687 // Take only the smallest namespace 688 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 689 $bestNbWordFound = $nbWordFound; 690 $bestNamespaceId = $nameSpace; 691 } 692 } 693 } 694 695 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 696 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 697 if ($bestNbWordFound > 0) { 698 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 699 } else { 700 $bestNamespaceScore = 0; 701 } 702 703 704 return array( 705 'namespace' => $bestNamespaceId, 706 'score' => $bestNamespaceScore 707 ); 708 709 } 710 711 /** 712 * @param $event 713 */ 714 private 715 function gotToEditMode(&$event) 716 { 717 global $ACT; 718 $ACT = 'edit'; 719 720 } 721 722 723 /** 724 * Redirect to an internal page ie: 725 * * on the same domain 726 * * no HTTP redirect 727 * * id rewrite 728 * @param string $targetPageId - target page id 729 * @param string $targetOriginId - the source of the target (redirect) 730 * @return bool - return true if the user has the permission and that the redirect was done 731 * @throws Exception 732 */ 733 private 734 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 735 { 736 /** 737 * Because we set the ID globally for the ID redirect 738 * we make sure that this is not a {@link MarkupPath} 739 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 740 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 741 */ 742 if (is_object($targetPageId)) { 743 $class = get_class($targetPageId); 744 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 745 } 746 747 if (is_object($targetOriginId)) { 748 $class = get_class($targetOriginId); 749 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 750 } 751 752 // If the user does not have the right to see the target page 753 // don't do anything 754 if (!(Identity::isReader($targetPageId))) { 755 return false; 756 } 757 758 // Change the id 759 global $ID; 760 global $INFO; 761 $sourceId = $ID; 762 $ID = $targetPageId; 763 if (isset($_REQUEST["id"])) { 764 $_REQUEST["id"] = $targetPageId; 765 } 766 if (isset($_GET["id"])) { 767 $_GET["id"] = $targetPageId; 768 } 769 770 /** 771 * Refresh the $INFO data 772 * 773 * the info attributes are used elsewhere 774 * 'id': for the sidebar 775 * 'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()} 776 * 'rev' : for the edit button to be sure that the page is still the same 777 */ 778 $INFO = pageinfo(); 779 780 /** 781 * Not compatible with 782 * https://www.dokuwiki.org/config:send404 is enabled 783 * 784 * This check happens before that dokuwiki is started 785 * and send an header in doku.php 786 * 787 * We send a warning 788 */ 789 global $conf; 790 if ($conf['send404'] == true) { 791 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 792 } 793 794 // Redirection 795 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 796 797 return true; 798 799 } 800 801 private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool 802 { 803 return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 804 } 805 806 /** 807 * The general HTTP Redirect method to an internal page 808 * where the redirection method decide which type of redirection 809 * @param string $targetIdOrUrl - a dokuwiki id or an url 810 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 811 * @param string $method - the redirection method 812 */ 813 private 814 function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool 815 { 816 817 global $ID; 818 819 820 // Log the redirections 821 $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method); 822 823 824 // An http external url ? 825 try { 826 $isValid = Url::createFromString($targetIdOrUrl)->isHttpUrl(); 827 } catch (ExceptionBadSyntax|ExceptionBadArgument $e) { 828 $isValid = false; 829 } 830 831 // If there is a bug in the isValid function for an internal url 832 // We get a loop. 833 // The Url becomes the id, the id is unknown and we do a redirect again 834 // 835 // We check then if the target starts with the base url 836 // if this is the case, it's valid 837 if (!$isValid && strpos($targetIdOrUrl, DOKU_URL) === 0) { 838 $isValid = true; 839 } 840 if ($isValid) { 841 842 // defend against HTTP Response Splitting 843 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 844 $targetUrl = stripctl($targetIdOrUrl); 845 846 } else { 847 848 849 // Explode the page ID and the anchor (#) 850 $link = explode('#', $targetIdOrUrl, 2); 851 852 // Query String to pass the message 853 $urlParams = []; 854 if ($targetOrigin != self::TARGET_ORIGIN_PERMALINK) { 855 $urlParams = array( 856 action_plugin_combo_routermessage::ORIGIN_PAGE => $ID, 857 action_plugin_combo_routermessage::ORIGIN_TYPE => $targetOrigin 858 ); 859 } 860 861 // if this is search engine redirect 862 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 863 $replacementPart = array(':', '_', '-'); 864 $query = str_replace($replacementPart, ' ', $ID); 865 $urlParams["do"] = "search"; 866 $urlParams["q"] = $query; 867 } 868 869 $targetUrl = wl($link[0], $urlParams, true, '&'); 870 // %3A back to : 871 $targetUrl = str_replace("%3A", ":", $targetUrl); 872 if ($link[1]) { 873 $targetUrl .= '#' . rawurlencode($link[1]); 874 } 875 876 } 877 878 /** 879 * The dokuwiki function {@link send_redirect()} 880 * set the `Location header` and in php, the header function 881 * in this case change the status code to 302 Arghhhh. 882 * The code below is adapted from this function {@link send_redirect()} 883 */ 884 global $MSG; // are there any undisplayed messages? keep them in session for display 885 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 886 //reopen session, store data and close session again 887 @session_start(); 888 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 889 } 890 session_write_close(); // always close the session 891 892 switch ($method) { 893 case self::REDIRECT_PERMANENT_METHOD: 894 ExecutionContext::getActualOrCreateFromEnv() 895 ->response() 896 ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT) 897 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 898 ->end(); 899 return true; 900 case self::REDIRECT_NOTFOUND_METHOD: 901 902 // Empty 404 body to not get the standard 404 page of the browser 903 // but a blank page to avoid a sort of FOUC. 904 // ie the user see a page briefly 905 ExecutionContext::getActualOrCreateFromEnv() 906 ->response() 907 ->setStatus(HttpResponseStatus::NOT_FOUND) 908 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 909 ->setBody(self::PAGE_404, Mime::getHtml()) 910 ->end(); 911 return true; 912 913 default: 914 LogUtility::msg("The method ($method) is not an http redirection"); 915 return false; 916 } 917 918 919 } 920 921 /** 922 * @param $id 923 * @return array 924 */ 925 private 926 function getBestPage($id): array 927 { 928 929 // The return parameters 930 $bestPageId = null; 931 $scorePageName = null; 932 933 // Get Score from a page 934 $pageName = noNS($id); 935 $pagesWithSameName = ft_pageLookup($pageName); 936 if (count($pagesWithSameName) > 0) { 937 938 // Search same namespace in the page found than in the Id page asked. 939 $bestNbWordFound = 0; 940 941 942 $wordsInPageSourceId = explode(':', $id); 943 foreach ($pagesWithSameName as $targetPageId => $title) { 944 945 // Nb of word found in the target page id 946 // that are in the source page id 947 $nbWordFound = 0; 948 foreach ($wordsInPageSourceId as $word) { 949 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 950 } 951 952 if ($bestPageId == null) { 953 954 $bestNbWordFound = $nbWordFound; 955 $bestPageId = $targetPageId; 956 957 } else { 958 959 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 960 961 $bestNbWordFound = $nbWordFound; 962 $bestPageId = $targetPageId; 963 964 } 965 966 } 967 968 } 969 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 970 return array( 971 'id' => $bestPageId, 972 'score' => $scorePageName); 973 } 974 return array( 975 'id' => $bestPageId, 976 'score' => $scorePageName 977 ); 978 979 } 980 981 982 /** 983 * Redirect to the search engine 984 */ 985 private 986 function redirectToSearchEngine() 987 { 988 989 global $ID; 990 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 991 992 } 993 994 995 /** 996 * 997 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 998 * * For a SQlite database, it will add a row into the log 999 * 1000 * @param string $sourcePageId 1001 * @param $targetPageId 1002 * @param $algorithmic 1003 * @param $method - http or rewrite 1004 */ 1005 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 1006 { 1007 1008 $row = array( 1009 "TIMESTAMP" => date("c"), 1010 "SOURCE" => $sourcePageId, 1011 "TARGET" => $targetPageId, 1012 "REFERRER" => $_SERVER['HTTP_REFERER'], 1013 "TYPE" => $algorithmic, 1014 "METHOD" => $method 1015 ); 1016 $request = Sqlite::createOrGetBackendSqlite() 1017 ->createRequest() 1018 ->setTableRow('redirections_log', $row); 1019 try { 1020 $request 1021 ->execute(); 1022 } catch (ExceptionCompile $e) { 1023 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 1024 } finally { 1025 $request->close(); 1026 } 1027 1028 1029 } 1030 1031 /** 1032 * This function check if there is a redirection declared 1033 * in the redirection table 1034 * @return bool - true if a rewrite or redirection occurs 1035 * @throws Exception 1036 */ 1037 private function processingPageRules(): bool 1038 { 1039 global $ID; 1040 1041 $calculatedTarget = null; 1042 $ruleMatcher = null; // Used in a warning message if the target page does not exist 1043 // Known redirection in the table 1044 // Get the page from redirection data 1045 $rules = $this->pageRules->getRules(); 1046 foreach ($rules as $rule) { 1047 1048 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 1049 $ruleTarget = $rule[PageRules::TARGET_NAME]; 1050 1051 // Glob to Rexgexp 1052 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i'; 1053 1054 // Match ? 1055 // https://www.php.net/manual/en/function.preg-match.php 1056 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 1057 if ($pregMatchResult === false) { 1058 // The `if` to take into account this problem 1059 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1060 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1061 return false; 1062 } 1063 if ($pregMatchResult) { 1064 $calculatedTarget = $ruleTarget; 1065 foreach ($matches as $key => $match) { 1066 if ($key == 0) { 1067 continue; 1068 } else { 1069 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1070 } 1071 } 1072 break; 1073 } 1074 } 1075 1076 if ($calculatedTarget == null) { 1077 return false; 1078 } 1079 1080 // If this is an external redirect (other domain) 1081 try { 1082 $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl(); 1083 } catch (ExceptionBadSyntax $e) { 1084 $isHttpUrl = false; 1085 } 1086 if ($isHttpUrl) { 1087 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1088 return true; 1089 } 1090 1091 // If the page exist 1092 if (page_exists($calculatedTarget)) { 1093 1094 // This is DokuWiki Id and should always be lowercase 1095 // The page rule may have change that 1096 $calculatedTarget = strtolower($calculatedTarget); 1097 $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1098 if ($res) { 1099 return true; 1100 } else { 1101 return false; 1102 } 1103 1104 } else { 1105 1106 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1107 return false; 1108 1109 } 1110 1111 } 1112 1113 private function performNotFoundRedirect(string $targetId, string $origin): bool 1114 { 1115 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1116 } 1117 1118 1119} 1120