1<?php 2 3 4use ComboStrap\DatabasePageRow; 5use ComboStrap\DokuwikiId; 6use ComboStrap\ExceptionBadArgument; 7use ComboStrap\ExceptionBadSyntax; 8use ComboStrap\ExceptionCompile; 9use ComboStrap\ExceptionSqliteNotAvailable; 10use ComboStrap\ExecutionContext; 11use ComboStrap\FileSystems; 12use ComboStrap\HttpResponse; 13use ComboStrap\HttpResponseStatus; 14use ComboStrap\Identity; 15use ComboStrap\LogUtility; 16use ComboStrap\MarkupPath; 17use ComboStrap\Meta\Field\AliasType; 18use ComboStrap\Mime; 19use ComboStrap\PageId; 20use ComboStrap\PageRules; 21use ComboStrap\PageUrlPath; 22use ComboStrap\PageUrlType; 23use ComboStrap\RouterBestEndPage; 24use ComboStrap\Site; 25use ComboStrap\SiteConfig; 26use ComboStrap\Sqlite; 27use ComboStrap\Web\Url; 28use ComboStrap\Web\UrlEndpoint; 29use ComboStrap\Web\UrlRewrite; 30use ComboStrap\WikiPath; 31 32require_once(__DIR__ . '/../vendor/autoload.php'); 33 34/** 35 * Class action_plugin_combo_url 36 * 37 * The actual URL manager 38 * 39 * 40 */ 41class action_plugin_combo_router extends DokuWiki_Action_Plugin 42{ 43 44 /** 45 * @deprecated 46 */ 47 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 48 const ROUTER_ENABLE_CONF = "enableRouter"; 49 50 // The redirect type 51 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 52 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 53 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 54 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 55 56 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 57 58 // Where the target id value comes from 59 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 60 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 61 /** 62 * Named Permalink (canonical) 63 */ 64 const TARGET_ORIGIN_CANONICAL = 'canonical'; 65 const TARGET_ORIGIN_ALIAS = 'alias'; 66 /** 67 * Identifier Permalink (full page id) 68 */ 69 const TARGET_ORIGIN_PERMALINK = "permalink"; 70 /** 71 * Extended Permalink (abbreviated page id at the end) 72 */ 73 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 74 const TARGET_ORIGIN_START_PAGE = 'startPage'; 75 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 76 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 77 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 78 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 79 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 80 81 82 // The constant parameters 83 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 84 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 85 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 86 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 87 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 88 const GO_TO_EDIT_MODE = 'GoToEditMode'; 89 const NOTHING = 'Nothing'; 90 91 /** @var string - a name used in log and other places */ 92 const NAME = 'Url Manager'; 93 const CANONICAL = 'router'; 94 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 95 const REFRESH_HEADER_NAME = "Refresh"; 96 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 97 const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": "; 98 public const URL_MANAGER_NAME = "Router"; 99 100 101 /** 102 * @var PageRules 103 */ 104 private $pageRules; 105 106 107 function __construct() 108 { 109 // enable direct access to language strings 110 // ie $this->lang 111 $this->setupLocale(); 112 113 } 114 115 /** 116 * @param string $refreshHeader 117 * @return false|string 118 */ 119 public static function getUrlFromRefresh(string $refreshHeader) 120 { 121 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 122 } 123 124 public static function getUrlFromLocation($refreshHeader) 125 { 126 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 127 } 128 129 /** 130 * @return string|null 131 * 132 * Unfortunately, DOKUWIKI_STARTED is not the first event 133 * The id may have been changed by 134 * {@link action_plugin_combo_lang::load_lang()} 135 * function, that's why we have this function 136 * to get the original requested id 137 */ 138 private static function getOriginalIdFromRequest(): ?string 139 { 140 // and not $_GET["id"] otherwise we may get a `/` 141 $originalId = getID(); 142 if ($originalId === null) { 143 return null; 144 } 145 return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId); 146 } 147 148 /** 149 * Determine if the request should be banned based on the id 150 * 151 * @param string $id 152 * @return bool 153 * 154 * See also {@link https://perishablepress.com/7g-firewall/#features} 155 * for blocking rules on http request data such as: 156 * * query_string 157 * * user_agent, 158 * * remote host 159 */ 160 public static function isShadowBanned(string $id): bool 161 { 162 /** 163 * ie 164 * wp-json:api:flutter_woo:config_file 165 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 166 * wp-admin 167 * 2020:wp-includes:wlwmanifest.xml 168 * wp-content:start 169 * wp-admin:css:start 170 * sito:wp-includes:wlwmanifest.xml 171 * site:wp-includes:wlwmanifest.xml 172 * cms:wp-includes:wlwmanifest.xml 173 * test:wp-includes:wlwmanifest.xml 174 * media:wp-includes:wlwmanifest.xml 175 * wp2:wp-includes:wlwmanifest.xml 176 * 2019:wp-includes:wlwmanifest.xml 177 * shop:wp-includes:wlwmanifest.xml 178 * wp1:wp-includes:wlwmanifest.xml 179 * news:wp-includes:wlwmanifest.xml 180 * 2018:wp-includes:wlwmanifest.xml 181 */ 182 if (strpos($id, 'wp-') !== false) { 183 return true; 184 } 185 186 /** 187 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 188 * db:oracle:999999.9:union:all:select_null:from_dual 189 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 190 */ 191 if (preg_match('/_chr_|_0_0/', $id) === 1) { 192 return true; 193 } 194 195 196 /** 197 * ie 198 * git:objects: 199 * git:refs:heads:stable 200 * git:logs:refs:heads:main 201 * git:logs:refs:heads:stable 202 * git:hooks:pre-push.sample 203 * git:hooks:pre-receive.sample 204 */ 205 if (strpos($id, "git:") === 0) { 206 return true; 207 } 208 209 return false; 210 211 } 212 213 /** 214 * @param string $id 215 * @return bool 216 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 217 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 218 * well-known:dnt-policy.txt 219 */ 220 public static function isWellKnownFile(string $id): bool 221 { 222 return strpos($id, "well-known") === 0; 223 } 224 225 226 function register(Doku_Event_Handler $controller) 227 { 228 229 if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 230 231 /** 232 * This will call the function {@link action_plugin_combo_router::_router()} 233 * The event is not DOKUWIKI_STARTED because this is not the first one 234 * 235 * https://www.dokuwiki.org/devel:event:init_lang_load 236 */ 237 $controller->register_hook('DOKUWIKI_STARTED', 238 'BEFORE', 239 $this, 240 'router', 241 array()); 242 243 /** 244 * This is the real first call of Dokuwiki 245 * Unfortunately, it does not create the environment 246 * We just ban to spare server resources 247 * 248 * https://www.dokuwiki.org/devel:event:init_lang_load 249 */ 250 $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array()); 251 252 } 253 254 255 } 256 257 /** 258 * 259 * We have created a spacial ban function that is 260 * called before the first function 261 * {@link action_plugin_combo_metalang::load_lang()} 262 * to spare CPU. 263 * 264 * @param $event 265 * @throws Exception 266 */ 267 function ban(&$event) 268 { 269 270 $id = self::getOriginalIdFromRequest(); 271 if ($id === null) { 272 return; 273 } 274 $page = MarkupPath::createMarkupFromId($id); 275 if (!FileSystems::exists($page)) { 276 // Well known 277 if (self::isWellKnownFile($id)) { 278 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 279 ExecutionContext::getActualOrCreateFromEnv() 280 ->response() 281 ->setStatus(HttpResponseStatus::NOT_FOUND) 282 ->end(); 283 return; 284 } 285 286 // Shadow banned 287 if (self::isShadowBanned($id)) { 288 $webSiteHomePage = Site::getIndexPageName(); 289 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 290 } 291 } 292 } 293 294 /** 295 * @param $event Doku_Event 296 * @param $param 297 * @return void 298 * @throws Exception 299 */ 300 function router(&$event, $param) 301 { 302 303 /** 304 * Just the {@link ExecutionContext::SHOW_ACTION} 305 * may be redirected 306 */ 307 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 308 if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) { 309 return; 310 } 311 312 $urlRewrite = Site::getUrlRewrite(); 313 if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) { 314 UrlRewrite::sendErrorMessage(); 315 return; 316 } 317 318 global $ID; 319 320 /** 321 * Without SQLite, this module does not work further 322 */ 323 try { 324 Sqlite::createOrGetSqlite(); 325 } catch (ExceptionSqliteNotAvailable $e) { 326 return; 327 } 328 329 $this->pageRules = new PageRules(); 330 331 332 /** 333 * Unfortunately, DOKUWIKI_STARTED is not the first event 334 * The id may have been changed by 335 * {@link action_plugin_combo_lang::load_lang()} 336 * function, that's why we check against the {@link $_REQUEST} 337 * and not the global ID 338 */ 339 $originalId = self::getOriginalIdFromRequest(); 340 341 /** 342 * Page is an existing id ? 343 */ 344 $requestedMarkupPath = MarkupPath::createMarkupFromId($ID); 345 if (FileSystems::exists($requestedMarkupPath)) { 346 347 /** 348 * If this is not the root home page 349 * and if the canonical id is the not the same, 350 * and if this is not a historical page (revision) 351 * redirect 352 */ 353 if ( 354 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed 355 && $ID != Site::getIndexPageName() 356 && !isset($_REQUEST["rev"]) 357 ) { 358 /** 359 * TODO: When saving for the first time, the page is not stored in the database 360 * but that's not the case actually 361 */ 362 $databasePageRow = $requestedMarkupPath->getDatabasePage(); 363 if ($databasePageRow->exists()) { 364 /** 365 * A move may leave the database in a bad state, 366 * unfortunately (ie page is not in index, unable to update, ...) 367 * We test therefore if the database page id exists 368 */ 369 $targetPageId = $databasePageRow->getFromRow("id"); 370 $targetPath = WikiPath::createMarkupPathFromId($targetPageId); 371 if (FileSystems::exists($targetPath)) { 372 $this->executePermanentRedirect( 373 $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(), 374 self::TARGET_ORIGIN_PERMALINK_EXTENDED 375 ); 376 } 377 } 378 } 379 return; 380 } 381 382 383 $identifier = $ID; 384 385 386 /** 387 * Page Id in the url 388 */ 389 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension()); 390 if ($shortPageId != null) { 391 $pageId = PageUrlPath::decodePageId($shortPageId); 392 } else { 393 /** 394 * Permalink with id 395 */ 396 $pageId = PageUrlPath::decodePageId($identifier); 397 } 398 if ($pageId !== null) { 399 400 if ($requestedMarkupPath->getParent() === null) { 401 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath(); 402 if ($page !== null && $page->exists()) { 403 $this->executePermanentRedirect( 404 $page->getCanonicalUrl()->toAbsoluteUrlString(), 405 self::TARGET_ORIGIN_PERMALINK 406 ); 407 return; 408 } 409 } 410 411 /** 412 * Page Id Abbr ? 413 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 414 */ 415 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath(); 416 if ($page === null) { 417 // or the length of the abbr has changed 418 $canonicalDatabasePage = new DatabasePageRow(); 419 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 420 if ($row !== null) { 421 $canonicalDatabasePage->setRow($row); 422 $page = $canonicalDatabasePage->getMarkupPath(); 423 } 424 } 425 if ($page !== null && $page->exists()) { 426 /** 427 * If the url canonical id has changed, we show it 428 * to the writer by performing a permanent redirect 429 */ 430 if ($identifier != $page->getUrlId()) { 431 // Google asks for a redirect 432 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 433 // People access your site through several different URLs. 434 // If, for example, your home page can be reached in multiple ways 435 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 436 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 437 // and use redirects to send traffic from the other URLs to your preferred URL. 438 $this->executePermanentRedirect( 439 $page->getCanonicalUrl()->toAbsoluteUrlString(), 440 self::TARGET_ORIGIN_PERMALINK_EXTENDED 441 ); 442 return; 443 } 444 445 $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 446 return; 447 448 } 449 // permanent url not yet in the database 450 // Other permanent such as permanent canonical ? 451 // We let the process go with the new identifier 452 453 } 454 455 // Global variable needed in the process 456 global $conf; 457 458 /** 459 * Identifier is a Canonical ? 460 */ 461 $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier); 462 $canonicalPage = $canonicalDatabasePage->getMarkupPath(); 463 if ($canonicalPage !== null && $canonicalPage->exists()) { 464 /** 465 * Does the canonical url is canonical name based 466 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 467 */ 468 if ($canonicalPage->getUrlId() === $identifier) { 469 $res = $this->executeTransparentRedirect( 470 $canonicalPage->getWikiId(), 471 self::TARGET_ORIGIN_CANONICAL 472 ); 473 } else { 474 $res = $this->executePermanentRedirect( 475 $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property 476 self::TARGET_ORIGIN_CANONICAL 477 ); 478 } 479 if ($res) { 480 return; 481 } 482 } 483 484 /** 485 * Identifier is an alias 486 */ 487 $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath(); 488 if ( 489 $aliasRequestedPage !== null 490 && $aliasRequestedPage->exists() 491 // The build alias is the file system metadata alias 492 // it may be null if the replication in the database was not successful 493 && $aliasRequestedPage->getBuildAlias() !== null 494 ) { 495 $buildAlias = $aliasRequestedPage->getBuildAlias(); 496 switch ($buildAlias->getType()) { 497 case AliasType::REDIRECT: 498 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 499 if ($res) { 500 return; 501 } 502 break; 503 case AliasType::SYNONYM: 504 $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS); 505 if ($res) { 506 return; 507 } 508 break; 509 default: 510 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 511 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 512 if ($res) { 513 return; 514 } 515 break; 516 } 517 } 518 519 520 // If there is a redirection defined in the page rules 521 $result = $this->processingPageRules(); 522 if ($result) { 523 // A redirection has occurred 524 // finish the process 525 return; 526 } 527 528 /** 529 * 530 * There was no redirection found, redirect to edit mode if writer 531 * 532 */ 533 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 534 535 $this->gotToEditMode($event); 536 // Stop here 537 return; 538 539 } 540 541 /** 542 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 543 */ 544 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 545 return; 546 } 547 548 // We are reader and their is no redirection set, we apply the algorithm 549 $readerAlgorithms = array(); 550 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 551 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 552 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 553 554 while ( 555 ($algorithm = array_shift($readerAlgorithms)) != null 556 ) { 557 558 switch ($algorithm) { 559 560 case self::NOTHING: 561 return; 562 563 case self::GO_TO_BEST_END_PAGE_NAME: 564 565 /** 566 * @var MarkupPath $bestEndPage 567 */ 568 list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath); 569 if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) { 570 $res = false; 571 switch ($method) { 572 case self::REDIRECT_PERMANENT_METHOD: 573 $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 574 break; 575 case self::REDIRECT_NOTFOUND_METHOD: 576 $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 577 break; 578 default: 579 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 580 } 581 if ($res) { 582 // Redirection has succeeded 583 return; 584 } 585 } 586 break; 587 588 case self::GO_TO_NS_START_PAGE: 589 590 // Start page with the conf['start'] parameter 591 $startPage = getNS($identifier) . ':' . $conf['start']; 592 if (page_exists($startPage)) { 593 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 594 if ($res) { 595 return; 596 } 597 } 598 599 // Start page with the same name than the namespace 600 $startPage = getNS($identifier) . ':' . curNS($identifier); 601 if (page_exists($startPage)) { 602 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 603 if ($res) { 604 return; 605 } 606 } 607 break; 608 609 case self::GO_TO_BEST_PAGE_NAME: 610 611 $bestPageId = null; 612 613 $bestPage = $this->getBestPage($identifier); 614 $bestPageId = $bestPage['id']; 615 $scorePageName = $bestPage['score']; 616 617 // Get Score from a Namespace 618 $bestNamespace = $this->scoreBestNamespace($identifier); 619 $bestNamespaceId = $bestNamespace['namespace']; 620 $namespaceScore = $bestNamespace['score']; 621 622 // Compare the two score 623 if ($scorePageName > 0 or $namespaceScore > 0) { 624 if ($scorePageName > $namespaceScore) { 625 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 626 } else { 627 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 628 } 629 return; 630 } 631 break; 632 633 case self::GO_TO_BEST_NAMESPACE: 634 635 $scoreNamespace = $this->scoreBestNamespace($identifier); 636 $bestNamespaceId = $scoreNamespace['namespace']; 637 $score = $scoreNamespace['score']; 638 639 if ($score > 0) { 640 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 641 return; 642 } 643 break; 644 645 case self::GO_TO_SEARCH_ENGINE: 646 647 $this->redirectToSearchEngine(); 648 649 return; 650 651 // End Switch Action 652 } 653 654 // End While Action 655 } 656 657 658 } 659 660 661 /** 662 * getBestNamespace 663 * Return a list with 'BestNamespaceId Score' 664 * @param $id 665 * @return array 666 */ 667 private 668 function scoreBestNamespace($id) 669 { 670 671 global $conf; 672 673 // Parameters 674 $pageNameSpace = getNS($id); 675 676 // If the page has an existing namespace start page take it, other search other namespace 677 $startPageNameSpace = $pageNameSpace . ":"; 678 $dateAt = ''; 679 // $startPageNameSpace will get a full path (ie with start or the namespace 680 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 681 if (page_exists($startPageNameSpace)) { 682 $nameSpaces = array($startPageNameSpace); 683 } else { 684 $nameSpaces = ft_pageLookup($conf['start']); 685 } 686 687 // Parameters and search the best namespace 688 $pathNames = explode(':', $pageNameSpace); 689 $bestNbWordFound = 0; 690 $bestNamespaceId = ''; 691 foreach ($nameSpaces as $nameSpace) { 692 693 $nbWordFound = 0; 694 foreach ($pathNames as $pathName) { 695 if (strlen($pathName) > 2) { 696 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 697 } 698 } 699 if ($nbWordFound > $bestNbWordFound) { 700 // Take only the smallest namespace 701 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 702 $bestNbWordFound = $nbWordFound; 703 $bestNamespaceId = $nameSpace; 704 } 705 } 706 } 707 708 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 709 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 710 if ($bestNbWordFound > 0) { 711 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 712 } else { 713 $bestNamespaceScore = 0; 714 } 715 716 717 return array( 718 'namespace' => $bestNamespaceId, 719 'score' => $bestNamespaceScore 720 ); 721 722 } 723 724 /** 725 * @param $event 726 */ 727 private 728 function gotToEditMode(&$event) 729 { 730 global $ACT; 731 $ACT = 'edit'; 732 733 } 734 735 736 /** 737 * Redirect to an internal page ie: 738 * * on the same domain 739 * * no HTTP redirect 740 * * id rewrite 741 * @param string $targetPageId - target page id 742 * @param string $targetOriginId - the source of the target (redirect) 743 * @return bool - return true if the user has the permission and that the redirect was done 744 * @throws Exception 745 */ 746 private 747 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 748 { 749 /** 750 * Because we set the ID globally for the ID redirect 751 * we make sure that this is not a {@link MarkupPath} 752 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 753 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 754 */ 755 if (is_object($targetPageId)) { 756 $class = get_class($targetPageId); 757 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 758 } 759 760 if (is_object($targetOriginId)) { 761 $class = get_class($targetOriginId); 762 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 763 } 764 765 // If the user does not have the right to see the target page 766 // don't do anything 767 if (!(Identity::isReader($targetPageId))) { 768 return false; 769 } 770 771 // Change the id 772 global $ID; 773 global $INFO; 774 $sourceId = $ID; 775 $ID = $targetPageId; 776 if (isset($_REQUEST["id"])) { 777 $_REQUEST["id"] = $targetPageId; 778 } 779 if (isset($_GET["id"])) { 780 $_GET["id"] = $targetPageId; 781 } 782 783 /** 784 * Refresh the $INFO data 785 * 786 * the info attributes are used elsewhere 787 * 'id': for the sidebar 788 * 'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()} 789 * 'rev' : for the edit button to be sure that the page is still the same 790 */ 791 $INFO = pageinfo(); 792 793 /** 794 * Not compatible with 795 * https://www.dokuwiki.org/config:send404 is enabled 796 * 797 * This check happens before that dokuwiki is started 798 * and send an header in doku.php 799 * 800 * We send a warning 801 */ 802 global $conf; 803 if ($conf['send404'] == true) { 804 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 805 } 806 807 // Redirection 808 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 809 810 return true; 811 812 } 813 814 private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool 815 { 816 return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 817 } 818 819 /** 820 * The general HTTP Redirect method to an internal page 821 * where the redirection method decide which type of redirection 822 * @param string $targetIdOrUrl - a dokuwiki id or an url 823 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 824 * @param string $method - the redirection method 825 */ 826 private 827 function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool 828 { 829 830 global $ID; 831 832 833 // Log the redirections 834 $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method); 835 836 837 // An http external url ? 838 try { 839 $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl(); 840 } catch (ExceptionBadSyntax|ExceptionBadArgument $e) { 841 $isHttpUrl = false; 842 } 843 844 // If there is a bug in the isValid function for an internal url 845 // We get a loop. 846 // The Url becomes the id, the id is unknown and we do a redirect again 847 // 848 // We check then if the target starts with the base url 849 // if this is the case, it's valid 850 if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) { 851 $isHttpUrl = true; 852 } 853 if ($isHttpUrl) { 854 855 // defend against HTTP Response Splitting 856 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 857 $targetUrl = stripctl($targetIdOrUrl); 858 859 } else { 860 861 862 // Explode the page ID and the anchor (#) 863 $link = explode('#', $targetIdOrUrl, 2); 864 865 $url = UrlEndpoint::createDokuUrl(); 866 867 $urlParams = []; 868 // if this is search engine redirect 869 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 870 $replacementPart = array(':', '_', '-'); 871 $query = str_replace($replacementPart, ' ', $ID); 872 $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION); 873 $url->setQueryParameter("q", $query); 874 } 875 876 /** 877 * Doing a permanent redirect with a added query string 878 * create a new page url on the search engine 879 * 880 * ie 881 * http://host/page 882 * is not the same 883 * than 884 * http://host/page?whatever 885 * 886 * We can't pass query string otherwise, we get 887 * the SEO warning / error 888 * `Alternative page with proper canonical tag` 889 * 890 * Use HTTP X header for debug 891 */ 892 if ($method !== self::REDIRECT_PERMANENT_METHOD) { 893 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID); 894 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin); 895 } 896 897 $id = $link[0]; 898 $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id); 899 if (array_key_exists(1, $link)) { 900 $url->setFragment($link[1]); 901 } 902 $targetUrl = $url->toAbsoluteUrlString(); 903 904 } 905 906 /** 907 * The dokuwiki function {@link send_redirect()} 908 * set the `Location header` and in php, the header function 909 * in this case change the status code to 302 Arghhhh. 910 * The code below is adapted from this function {@link send_redirect()} 911 */ 912 global $MSG; // are there any undisplayed messages? keep them in session for display 913 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 914 //reopen session, store data and close session again 915 @session_start(); 916 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 917 } 918 session_write_close(); // always close the session 919 920 switch ($method) { 921 922 case self::REDIRECT_PERMANENT_METHOD: 923 ExecutionContext::getActualOrCreateFromEnv() 924 ->response() 925 ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT) 926 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 927 ->end(); 928 return true; 929 930 case self::REDIRECT_NOTFOUND_METHOD: 931 932 933 // Empty 404 body to not get the standard 404 page of the browser 934 // but a blank page to avoid a sort of FOUC. 935 // ie the user see a page briefly 936 ExecutionContext::getActualOrCreateFromEnv() 937 ->response() 938 ->setStatus(HttpResponseStatus::NOT_FOUND) 939 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 940 ->setBody(self::PAGE_404, Mime::getHtml()) 941 ->end(); 942 return true; 943 944 default: 945 LogUtility::msg("The method ($method) is not an http redirection"); 946 return false; 947 } 948 949 950 } 951 952 /** 953 * @param $id 954 * @return array 955 */ 956 private 957 function getBestPage($id): array 958 { 959 960 // The return parameters 961 $bestPageId = null; 962 $scorePageName = null; 963 964 // Get Score from a page 965 $pageName = noNS($id); 966 $pagesWithSameName = ft_pageLookup($pageName); 967 if (count($pagesWithSameName) > 0) { 968 969 // Search same namespace in the page found than in the Id page asked. 970 $bestNbWordFound = 0; 971 972 973 $wordsInPageSourceId = explode(':', $id); 974 foreach ($pagesWithSameName as $targetPageId => $title) { 975 976 // Nb of word found in the target page id 977 // that are in the source page id 978 $nbWordFound = 0; 979 foreach ($wordsInPageSourceId as $word) { 980 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 981 } 982 983 if ($bestPageId == null) { 984 985 $bestNbWordFound = $nbWordFound; 986 $bestPageId = $targetPageId; 987 988 } else { 989 990 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 991 992 $bestNbWordFound = $nbWordFound; 993 $bestPageId = $targetPageId; 994 995 } 996 997 } 998 999 } 1000 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 1001 return array( 1002 'id' => $bestPageId, 1003 'score' => $scorePageName); 1004 } 1005 return array( 1006 'id' => $bestPageId, 1007 'score' => $scorePageName 1008 ); 1009 1010 } 1011 1012 1013 /** 1014 * Redirect to the search engine 1015 */ 1016 private 1017 function redirectToSearchEngine() 1018 { 1019 1020 global $ID; 1021 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 1022 1023 } 1024 1025 1026 /** 1027 * 1028 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 1029 * * For a SQlite database, it will add a row into the log 1030 * 1031 * @param string $sourcePageId 1032 * @param $targetPageId 1033 * @param $algorithmic 1034 * @param $method - http or rewrite 1035 */ 1036 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 1037 { 1038 1039 $row = array( 1040 "TIMESTAMP" => date("c"), 1041 "SOURCE" => $sourcePageId, 1042 "TARGET" => $targetPageId, 1043 "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null, 1044 "TYPE" => $algorithmic, 1045 "METHOD" => $method 1046 ); 1047 $request = Sqlite::createOrGetBackendSqlite() 1048 ->createRequest() 1049 ->setTableRow('redirections_log', $row); 1050 try { 1051 $request 1052 ->execute(); 1053 } catch (ExceptionCompile $e) { 1054 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 1055 } finally { 1056 $request->close(); 1057 } 1058 1059 1060 } 1061 1062 /** 1063 * This function check if there is a redirection declared 1064 * in the redirection table 1065 * @return bool - true if a rewrite or redirection occurs 1066 * @throws Exception 1067 */ 1068 private function processingPageRules(): bool 1069 { 1070 global $ID; 1071 1072 $calculatedTarget = null; 1073 $ruleMatcher = null; // Used in a warning message if the target page does not exist 1074 // Known redirection in the table 1075 // Get the page from redirection data 1076 $rules = $this->pageRules->getRules(); 1077 foreach ($rules as $rule) { 1078 1079 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 1080 $ruleTarget = $rule[PageRules::TARGET_NAME]; 1081 1082 // Glob to Rexgexp 1083 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i'; 1084 1085 // Match ? 1086 // https://www.php.net/manual/en/function.preg-match.php 1087 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 1088 if ($pregMatchResult === false) { 1089 // The `if` to take into account this problem 1090 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1091 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1092 return false; 1093 } 1094 if ($pregMatchResult) { 1095 $calculatedTarget = $ruleTarget; 1096 foreach ($matches as $key => $match) { 1097 if ($key == 0) { 1098 continue; 1099 } else { 1100 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1101 } 1102 } 1103 break; 1104 } 1105 } 1106 1107 if ($calculatedTarget == null) { 1108 return false; 1109 } 1110 1111 // If this is an external redirect (other domain) 1112 try { 1113 $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl(); 1114 } catch (ExceptionBadSyntax $e) { 1115 $isHttpUrl = false; 1116 } 1117 if ($isHttpUrl) { 1118 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1119 return true; 1120 } 1121 1122 // If the page exist 1123 if (page_exists($calculatedTarget)) { 1124 1125 // This is DokuWiki Id and should always be lowercase 1126 // The page rule may have change that 1127 $calculatedTarget = strtolower($calculatedTarget); 1128 $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1129 if ($res) { 1130 return true; 1131 } else { 1132 return false; 1133 } 1134 1135 } else { 1136 1137 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1138 return false; 1139 1140 } 1141 1142 } 1143 1144 private function performNotFoundRedirect(string $targetId, string $origin): bool 1145 { 1146 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1147 } 1148 1149 1150} 1151