1<?php 2 3 4use ComboStrap\DatabasePageRow; 5use ComboStrap\DokuwikiId; 6use ComboStrap\ExceptionBadArgument; 7use ComboStrap\ExceptionBadSyntax; 8use ComboStrap\ExceptionCompile; 9use ComboStrap\ExceptionSqliteNotAvailable; 10use ComboStrap\ExecutionContext; 11use ComboStrap\FileSystems; 12use ComboStrap\HttpResponse; 13use ComboStrap\HttpResponseStatus; 14use ComboStrap\Identity; 15use ComboStrap\LogUtility; 16use ComboStrap\MarkupPath; 17use ComboStrap\Meta\Field\AliasType; 18use ComboStrap\Mime; 19use ComboStrap\PageId; 20use ComboStrap\PageRules; 21use ComboStrap\PageUrlPath; 22use ComboStrap\PageUrlType; 23use ComboStrap\RouterBestEndPage; 24use ComboStrap\Site; 25use ComboStrap\SiteConfig; 26use ComboStrap\Sqlite; 27use ComboStrap\Web\Url; 28use ComboStrap\Web\UrlEndpoint; 29use ComboStrap\Web\UrlRewrite; 30use ComboStrap\WikiPath; 31 32require_once(__DIR__ . '/../vendor/autoload.php'); 33 34/** 35 * Class action_plugin_combo_url 36 * 37 * The actual URL manager 38 * 39 * 40 */ 41class action_plugin_combo_router extends DokuWiki_Action_Plugin 42{ 43 44 /** 45 * @deprecated 46 */ 47 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 48 const ROUTER_ENABLE_CONF = "enableRouter"; 49 50 // The redirect type 51 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 52 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 53 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 54 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 55 56 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 57 58 // Where the target id value comes from 59 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 60 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 61 /** 62 * Named Permalink (canonical) 63 */ 64 const TARGET_ORIGIN_CANONICAL = 'canonical'; 65 const TARGET_ORIGIN_ALIAS = 'alias'; 66 /** 67 * Identifier Permalink (full page id) 68 */ 69 const TARGET_ORIGIN_PERMALINK = "permalink"; 70 /** 71 * Extended Permalink (abbreviated page id at the end) 72 */ 73 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 74 const TARGET_ORIGIN_START_PAGE = 'startPage'; 75 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 76 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 77 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 78 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 79 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 80 81 82 // The constant parameters 83 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 84 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 85 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 86 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 87 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 88 const GO_TO_EDIT_MODE = 'GoToEditMode'; 89 const NOTHING = 'Nothing'; 90 91 /** @var string - a name used in log and other places */ 92 const NAME = 'Url Manager'; 93 const CANONICAL = 'router'; 94 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 95 const REFRESH_HEADER_NAME = "Refresh"; 96 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 97 const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": "; 98 public const URL_MANAGER_NAME = "Router"; 99 100 101 /** 102 * @var PageRules 103 */ 104 private $pageRules; 105 106 107 function __construct() 108 { 109 // enable direct access to language strings 110 // ie $this->lang 111 $this->setupLocale(); 112 113 } 114 115 /** 116 * @param string $refreshHeader 117 * @return false|string 118 */ 119 public static function getUrlFromRefresh(string $refreshHeader) 120 { 121 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 122 } 123 124 public static function getUrlFromLocation($refreshHeader) 125 { 126 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 127 } 128 129 /** 130 * @return array|mixed|string|string[] 131 * 132 * Unfortunately, DOKUWIKI_STARTED is not the first event 133 * The id may have been changed by 134 * {@link action_plugin_combo_metalang::load_lang()} 135 * function, that's why we have this function 136 * to get the original requested id 137 */ 138 private static function getOriginalIdFromRequest() 139 { 140 $originalId = $_GET["id"] ?? null; 141 if ($originalId === null) { 142 return null; 143 } 144 return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId); 145 } 146 147 /** 148 * Determine if the request should be banned based on the id 149 * 150 * @param string $id 151 * @return bool 152 * 153 * See also {@link https://perishablepress.com/7g-firewall/#features} 154 * for blocking rules on http request data such as: 155 * * query_string 156 * * user_agent, 157 * * remote host 158 */ 159 public static function isShadowBanned(string $id): bool 160 { 161 /** 162 * ie 163 * wp-json:api:flutter_woo:config_file 164 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 165 * wp-admin 166 * 2020:wp-includes:wlwmanifest.xml 167 * wp-content:start 168 * wp-admin:css:start 169 * sito:wp-includes:wlwmanifest.xml 170 * site:wp-includes:wlwmanifest.xml 171 * cms:wp-includes:wlwmanifest.xml 172 * test:wp-includes:wlwmanifest.xml 173 * media:wp-includes:wlwmanifest.xml 174 * wp2:wp-includes:wlwmanifest.xml 175 * 2019:wp-includes:wlwmanifest.xml 176 * shop:wp-includes:wlwmanifest.xml 177 * wp1:wp-includes:wlwmanifest.xml 178 * news:wp-includes:wlwmanifest.xml 179 * 2018:wp-includes:wlwmanifest.xml 180 */ 181 if (strpos($id, 'wp-') !== false) { 182 return true; 183 } 184 185 /** 186 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 187 * db:oracle:999999.9:union:all:select_null:from_dual 188 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 189 */ 190 if (preg_match('/_chr_|_0_0/', $id) === 1) { 191 return true; 192 } 193 194 195 /** 196 * ie 197 * git:objects: 198 * git:refs:heads:stable 199 * git:logs:refs:heads:main 200 * git:logs:refs:heads:stable 201 * git:hooks:pre-push.sample 202 * git:hooks:pre-receive.sample 203 */ 204 if (strpos($id, "git:") === 0) { 205 return true; 206 } 207 208 return false; 209 210 } 211 212 /** 213 * @param string $id 214 * @return bool 215 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 216 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 217 * well-known:dnt-policy.txt 218 */ 219 public static function isWellKnownFile(string $id): bool 220 { 221 return strpos($id, "well-known") === 0; 222 } 223 224 225 function register(Doku_Event_Handler $controller) 226 { 227 228 if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 229 230 /** 231 * This will call the function {@link action_plugin_combo_router::_router()} 232 * The event is not DOKUWIKI_STARTED because this is not the first one 233 * 234 * https://www.dokuwiki.org/devel:event:init_lang_load 235 */ 236 $controller->register_hook('DOKUWIKI_STARTED', 237 'BEFORE', 238 $this, 239 'router', 240 array()); 241 242 /** 243 * This is the real first call of Dokuwiki 244 * Unfortunately, it does not create the environment 245 * We just ban to spare server resources 246 * 247 * https://www.dokuwiki.org/devel:event:init_lang_load 248 */ 249 $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array()); 250 251 } 252 253 254 } 255 256 /** 257 * 258 * We have created a spacial ban function that is 259 * called before the first function 260 * {@link action_plugin_combo_metalang::load_lang()} 261 * to spare CPU. 262 * 263 * @param $event 264 * @throws Exception 265 */ 266 function ban(&$event) 267 { 268 269 $id = self::getOriginalIdFromRequest(); 270 if ($id === null) { 271 return; 272 } 273 $page = MarkupPath::createMarkupFromId($id); 274 if (!FileSystems::exists($page)) { 275 // Well known 276 if (self::isWellKnownFile($id)) { 277 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 278 ExecutionContext::getActualOrCreateFromEnv() 279 ->response() 280 ->setStatus(HttpResponseStatus::NOT_FOUND) 281 ->end(); 282 return; 283 } 284 285 // Shadow banned 286 if (self::isShadowBanned($id)) { 287 $webSiteHomePage = Site::getIndexPageName(); 288 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 289 } 290 } 291 } 292 293 /** 294 * @param $event Doku_Event 295 * @param $param 296 * @return void 297 * @throws Exception 298 */ 299 function router(&$event, $param) 300 { 301 302 /** 303 * Just the {@link ExecutionContext::SHOW_ACTION} 304 * may be redirected 305 */ 306 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 307 if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) { 308 return; 309 } 310 311 $urlRewrite = Site::getUrlRewrite(); 312 if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) { 313 UrlRewrite::sendErrorMessage(); 314 return; 315 } 316 317 global $ID; 318 319 /** 320 * Without SQLite, this module does not work further 321 */ 322 try { 323 Sqlite::createOrGetSqlite(); 324 } catch (ExceptionSqliteNotAvailable $e) { 325 return; 326 } 327 328 $this->pageRules = new PageRules(); 329 330 331 /** 332 * Unfortunately, DOKUWIKI_STARTED is not the first event 333 * The id may have been changed by 334 * {@link action_plugin_combo_lang::load_lang()} 335 * function, that's why we check against the {@link $_REQUEST} 336 * and not the global ID 337 */ 338 $originalId = self::getOriginalIdFromRequest(); 339 340 /** 341 * Page is an existing id ? 342 */ 343 $requestedMarkupPath = MarkupPath::createMarkupFromId($ID); 344 if (FileSystems::exists($requestedMarkupPath)) { 345 346 /** 347 * If this is not the root home page 348 * and if the canonical id is the not the same, 349 * and if this is not a historical page (revision) 350 * redirect 351 */ 352 if ( 353 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed 354 && $ID != Site::getIndexPageName() 355 && !isset($_REQUEST["rev"]) 356 ) { 357 /** 358 * TODO: When saving for the first time, the page is not stored in the database 359 * but that's not the case actually 360 */ 361 $databasePageRow = $requestedMarkupPath->getDatabasePage(); 362 if ($databasePageRow->exists()) { 363 /** 364 * A move may leave the database in a bad state, 365 * unfortunately (ie page is not in index, unable to update, ...) 366 * We test therefore if the database page id exists 367 */ 368 $targetPageId = $databasePageRow->getFromRow("id"); 369 $targetPath = WikiPath::createMarkupPathFromId($targetPageId); 370 if (FileSystems::exists($targetPath)) { 371 $this->executePermanentRedirect( 372 $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(), 373 self::TARGET_ORIGIN_PERMALINK_EXTENDED 374 ); 375 } 376 } 377 } 378 return; 379 } 380 381 382 $identifier = $ID; 383 384 385 /** 386 * Page Id in the url 387 */ 388 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension()); 389 if ($shortPageId != null) { 390 $pageId = PageUrlPath::decodePageId($shortPageId); 391 } else { 392 /** 393 * Permalink with id 394 */ 395 $pageId = PageUrlPath::decodePageId($identifier); 396 } 397 if ($pageId !== null) { 398 399 if ($requestedMarkupPath->getParent() === null) { 400 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath(); 401 if ($page !== null && $page->exists()) { 402 $this->executePermanentRedirect( 403 $page->getCanonicalUrl()->toAbsoluteUrlString(), 404 self::TARGET_ORIGIN_PERMALINK 405 ); 406 return; 407 } 408 } 409 410 /** 411 * Page Id Abbr ? 412 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 413 */ 414 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath(); 415 if ($page === null) { 416 // or the length of the abbr has changed 417 $canonicalDatabasePage = new DatabasePageRow(); 418 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 419 if ($row !== null) { 420 $canonicalDatabasePage->setRow($row); 421 $page = $canonicalDatabasePage->getMarkupPath(); 422 } 423 } 424 if ($page !== null && $page->exists()) { 425 /** 426 * If the url canonical id has changed, we show it 427 * to the writer by performing a permanent redirect 428 */ 429 if ($identifier != $page->getUrlId()) { 430 // Google asks for a redirect 431 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 432 // People access your site through several different URLs. 433 // If, for example, your home page can be reached in multiple ways 434 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 435 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 436 // and use redirects to send traffic from the other URLs to your preferred URL. 437 $this->executePermanentRedirect( 438 $page->getCanonicalUrl()->toAbsoluteUrlString(), 439 self::TARGET_ORIGIN_PERMALINK_EXTENDED 440 ); 441 return; 442 } 443 444 $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 445 return; 446 447 } 448 // permanent url not yet in the database 449 // Other permanent such as permanent canonical ? 450 // We let the process go with the new identifier 451 452 } 453 454 // Global variable needed in the process 455 global $conf; 456 457 /** 458 * Identifier is a Canonical ? 459 */ 460 $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier); 461 $canonicalPage = $canonicalDatabasePage->getMarkupPath(); 462 if ($canonicalPage !== null && $canonicalPage->exists()) { 463 /** 464 * Does the canonical url is canonical name based 465 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 466 */ 467 if ($canonicalPage->getUrlId() === $identifier) { 468 $res = $this->executeTransparentRedirect( 469 $canonicalPage->getWikiId(), 470 self::TARGET_ORIGIN_CANONICAL 471 ); 472 } else { 473 $res = $this->executePermanentRedirect( 474 $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property 475 self::TARGET_ORIGIN_CANONICAL 476 ); 477 } 478 if ($res) { 479 return; 480 } 481 } 482 483 /** 484 * Identifier is an alias 485 */ 486 $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath(); 487 if ( 488 $aliasRequestedPage !== null 489 && $aliasRequestedPage->exists() 490 // The build alias is the file system metadata alias 491 // it may be null if the replication in the database was not successful 492 && $aliasRequestedPage->getBuildAlias() !== null 493 ) { 494 $buildAlias = $aliasRequestedPage->getBuildAlias(); 495 switch ($buildAlias->getType()) { 496 case AliasType::REDIRECT: 497 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 498 if ($res) { 499 return; 500 } 501 break; 502 case AliasType::SYNONYM: 503 $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS); 504 if ($res) { 505 return; 506 } 507 break; 508 default: 509 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 510 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 511 if ($res) { 512 return; 513 } 514 break; 515 } 516 } 517 518 519 // If there is a redirection defined in the page rules 520 $result = $this->processingPageRules(); 521 if ($result) { 522 // A redirection has occurred 523 // finish the process 524 return; 525 } 526 527 /** 528 * 529 * There was no redirection found, redirect to edit mode if writer 530 * 531 */ 532 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 533 534 $this->gotToEditMode($event); 535 // Stop here 536 return; 537 538 } 539 540 /** 541 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 542 */ 543 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 544 return; 545 } 546 547 // We are reader and their is no redirection set, we apply the algorithm 548 $readerAlgorithms = array(); 549 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 550 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 551 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 552 553 while ( 554 ($algorithm = array_shift($readerAlgorithms)) != null 555 ) { 556 557 switch ($algorithm) { 558 559 case self::NOTHING: 560 return; 561 562 case self::GO_TO_BEST_END_PAGE_NAME: 563 564 /** 565 * @var MarkupPath $bestEndPage 566 */ 567 list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath); 568 if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) { 569 $res = false; 570 switch ($method) { 571 case self::REDIRECT_PERMANENT_METHOD: 572 $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 573 break; 574 case self::REDIRECT_NOTFOUND_METHOD: 575 $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 576 break; 577 default: 578 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 579 } 580 if ($res) { 581 // Redirection has succeeded 582 return; 583 } 584 } 585 break; 586 587 case self::GO_TO_NS_START_PAGE: 588 589 // Start page with the conf['start'] parameter 590 $startPage = getNS($identifier) . ':' . $conf['start']; 591 if (page_exists($startPage)) { 592 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 593 if ($res) { 594 return; 595 } 596 } 597 598 // Start page with the same name than the namespace 599 $startPage = getNS($identifier) . ':' . curNS($identifier); 600 if (page_exists($startPage)) { 601 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 602 if ($res) { 603 return; 604 } 605 } 606 break; 607 608 case self::GO_TO_BEST_PAGE_NAME: 609 610 $bestPageId = null; 611 612 $bestPage = $this->getBestPage($identifier); 613 $bestPageId = $bestPage['id']; 614 $scorePageName = $bestPage['score']; 615 616 // Get Score from a Namespace 617 $bestNamespace = $this->scoreBestNamespace($identifier); 618 $bestNamespaceId = $bestNamespace['namespace']; 619 $namespaceScore = $bestNamespace['score']; 620 621 // Compare the two score 622 if ($scorePageName > 0 or $namespaceScore > 0) { 623 if ($scorePageName > $namespaceScore) { 624 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 625 } else { 626 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 627 } 628 return; 629 } 630 break; 631 632 case self::GO_TO_BEST_NAMESPACE: 633 634 $scoreNamespace = $this->scoreBestNamespace($identifier); 635 $bestNamespaceId = $scoreNamespace['namespace']; 636 $score = $scoreNamespace['score']; 637 638 if ($score > 0) { 639 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 640 return; 641 } 642 break; 643 644 case self::GO_TO_SEARCH_ENGINE: 645 646 $this->redirectToSearchEngine(); 647 648 return; 649 650 // End Switch Action 651 } 652 653 // End While Action 654 } 655 656 657 } 658 659 660 /** 661 * getBestNamespace 662 * Return a list with 'BestNamespaceId Score' 663 * @param $id 664 * @return array 665 */ 666 private 667 function scoreBestNamespace($id) 668 { 669 670 global $conf; 671 672 // Parameters 673 $pageNameSpace = getNS($id); 674 675 // If the page has an existing namespace start page take it, other search other namespace 676 $startPageNameSpace = $pageNameSpace . ":"; 677 $dateAt = ''; 678 // $startPageNameSpace will get a full path (ie with start or the namespace 679 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 680 if (page_exists($startPageNameSpace)) { 681 $nameSpaces = array($startPageNameSpace); 682 } else { 683 $nameSpaces = ft_pageLookup($conf['start']); 684 } 685 686 // Parameters and search the best namespace 687 $pathNames = explode(':', $pageNameSpace); 688 $bestNbWordFound = 0; 689 $bestNamespaceId = ''; 690 foreach ($nameSpaces as $nameSpace) { 691 692 $nbWordFound = 0; 693 foreach ($pathNames as $pathName) { 694 if (strlen($pathName) > 2) { 695 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 696 } 697 } 698 if ($nbWordFound > $bestNbWordFound) { 699 // Take only the smallest namespace 700 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 701 $bestNbWordFound = $nbWordFound; 702 $bestNamespaceId = $nameSpace; 703 } 704 } 705 } 706 707 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 708 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 709 if ($bestNbWordFound > 0) { 710 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 711 } else { 712 $bestNamespaceScore = 0; 713 } 714 715 716 return array( 717 'namespace' => $bestNamespaceId, 718 'score' => $bestNamespaceScore 719 ); 720 721 } 722 723 /** 724 * @param $event 725 */ 726 private 727 function gotToEditMode(&$event) 728 { 729 global $ACT; 730 $ACT = 'edit'; 731 732 } 733 734 735 /** 736 * Redirect to an internal page ie: 737 * * on the same domain 738 * * no HTTP redirect 739 * * id rewrite 740 * @param string $targetPageId - target page id 741 * @param string $targetOriginId - the source of the target (redirect) 742 * @return bool - return true if the user has the permission and that the redirect was done 743 * @throws Exception 744 */ 745 private 746 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 747 { 748 /** 749 * Because we set the ID globally for the ID redirect 750 * we make sure that this is not a {@link MarkupPath} 751 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 752 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 753 */ 754 if (is_object($targetPageId)) { 755 $class = get_class($targetPageId); 756 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 757 } 758 759 if (is_object($targetOriginId)) { 760 $class = get_class($targetOriginId); 761 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 762 } 763 764 // If the user does not have the right to see the target page 765 // don't do anything 766 if (!(Identity::isReader($targetPageId))) { 767 return false; 768 } 769 770 // Change the id 771 global $ID; 772 global $INFO; 773 $sourceId = $ID; 774 $ID = $targetPageId; 775 if (isset($_REQUEST["id"])) { 776 $_REQUEST["id"] = $targetPageId; 777 } 778 if (isset($_GET["id"])) { 779 $_GET["id"] = $targetPageId; 780 } 781 782 /** 783 * Refresh the $INFO data 784 * 785 * the info attributes are used elsewhere 786 * 'id': for the sidebar 787 * 'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()} 788 * 'rev' : for the edit button to be sure that the page is still the same 789 */ 790 $INFO = pageinfo(); 791 792 /** 793 * Not compatible with 794 * https://www.dokuwiki.org/config:send404 is enabled 795 * 796 * This check happens before that dokuwiki is started 797 * and send an header in doku.php 798 * 799 * We send a warning 800 */ 801 global $conf; 802 if ($conf['send404'] == true) { 803 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 804 } 805 806 // Redirection 807 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 808 809 return true; 810 811 } 812 813 private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool 814 { 815 return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 816 } 817 818 /** 819 * The general HTTP Redirect method to an internal page 820 * where the redirection method decide which type of redirection 821 * @param string $targetIdOrUrl - a dokuwiki id or an url 822 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 823 * @param string $method - the redirection method 824 */ 825 private 826 function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool 827 { 828 829 global $ID; 830 831 832 // Log the redirections 833 $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method); 834 835 836 // An http external url ? 837 try { 838 $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl(); 839 } catch (ExceptionBadSyntax|ExceptionBadArgument $e) { 840 $isHttpUrl = false; 841 } 842 843 // If there is a bug in the isValid function for an internal url 844 // We get a loop. 845 // The Url becomes the id, the id is unknown and we do a redirect again 846 // 847 // We check then if the target starts with the base url 848 // if this is the case, it's valid 849 if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) { 850 $isHttpUrl = true; 851 } 852 if ($isHttpUrl) { 853 854 // defend against HTTP Response Splitting 855 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 856 $targetUrl = stripctl($targetIdOrUrl); 857 858 } else { 859 860 861 // Explode the page ID and the anchor (#) 862 $link = explode('#', $targetIdOrUrl, 2); 863 864 $url = UrlEndpoint::createDokuUrl(); 865 866 $urlParams = []; 867 // if this is search engine redirect 868 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 869 $replacementPart = array(':', '_', '-'); 870 $query = str_replace($replacementPart, ' ', $ID); 871 $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION); 872 $url->setQueryParameter("q", $query); 873 } 874 875 /** 876 * Doing a permanent redirect with a added query string 877 * create a new page url on the search engine 878 * 879 * ie 880 * http://host/page 881 * is not the same 882 * than 883 * http://host/page?whatever 884 * 885 * We can't pass query string otherwise, we get 886 * the SEO warning / error 887 * `Alternative page with proper canonical tag` 888 * 889 * Use HTTP X header for debug 890 */ 891 if ($method !== self::REDIRECT_PERMANENT_METHOD) { 892 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID); 893 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin); 894 } 895 896 $id = $link[0]; 897 $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id); 898 if (array_key_exists(1, $link)) { 899 $url->setFragment($link[1]); 900 } 901 $targetUrl = $url->toAbsoluteUrlString(); 902 903 } 904 905 /** 906 * The dokuwiki function {@link send_redirect()} 907 * set the `Location header` and in php, the header function 908 * in this case change the status code to 302 Arghhhh. 909 * The code below is adapted from this function {@link send_redirect()} 910 */ 911 global $MSG; // are there any undisplayed messages? keep them in session for display 912 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 913 //reopen session, store data and close session again 914 @session_start(); 915 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 916 } 917 session_write_close(); // always close the session 918 919 switch ($method) { 920 921 case self::REDIRECT_PERMANENT_METHOD: 922 ExecutionContext::getActualOrCreateFromEnv() 923 ->response() 924 ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT) 925 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 926 ->end(); 927 return true; 928 929 case self::REDIRECT_NOTFOUND_METHOD: 930 931 932 // Empty 404 body to not get the standard 404 page of the browser 933 // but a blank page to avoid a sort of FOUC. 934 // ie the user see a page briefly 935 ExecutionContext::getActualOrCreateFromEnv() 936 ->response() 937 ->setStatus(HttpResponseStatus::NOT_FOUND) 938 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 939 ->setBody(self::PAGE_404, Mime::getHtml()) 940 ->end(); 941 return true; 942 943 default: 944 LogUtility::msg("The method ($method) is not an http redirection"); 945 return false; 946 } 947 948 949 } 950 951 /** 952 * @param $id 953 * @return array 954 */ 955 private 956 function getBestPage($id): array 957 { 958 959 // The return parameters 960 $bestPageId = null; 961 $scorePageName = null; 962 963 // Get Score from a page 964 $pageName = noNS($id); 965 $pagesWithSameName = ft_pageLookup($pageName); 966 if (count($pagesWithSameName) > 0) { 967 968 // Search same namespace in the page found than in the Id page asked. 969 $bestNbWordFound = 0; 970 971 972 $wordsInPageSourceId = explode(':', $id); 973 foreach ($pagesWithSameName as $targetPageId => $title) { 974 975 // Nb of word found in the target page id 976 // that are in the source page id 977 $nbWordFound = 0; 978 foreach ($wordsInPageSourceId as $word) { 979 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 980 } 981 982 if ($bestPageId == null) { 983 984 $bestNbWordFound = $nbWordFound; 985 $bestPageId = $targetPageId; 986 987 } else { 988 989 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 990 991 $bestNbWordFound = $nbWordFound; 992 $bestPageId = $targetPageId; 993 994 } 995 996 } 997 998 } 999 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 1000 return array( 1001 'id' => $bestPageId, 1002 'score' => $scorePageName); 1003 } 1004 return array( 1005 'id' => $bestPageId, 1006 'score' => $scorePageName 1007 ); 1008 1009 } 1010 1011 1012 /** 1013 * Redirect to the search engine 1014 */ 1015 private 1016 function redirectToSearchEngine() 1017 { 1018 1019 global $ID; 1020 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 1021 1022 } 1023 1024 1025 /** 1026 * 1027 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 1028 * * For a SQlite database, it will add a row into the log 1029 * 1030 * @param string $sourcePageId 1031 * @param $targetPageId 1032 * @param $algorithmic 1033 * @param $method - http or rewrite 1034 */ 1035 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 1036 { 1037 1038 $row = array( 1039 "TIMESTAMP" => date("c"), 1040 "SOURCE" => $sourcePageId, 1041 "TARGET" => $targetPageId, 1042 "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null, 1043 "TYPE" => $algorithmic, 1044 "METHOD" => $method 1045 ); 1046 $request = Sqlite::createOrGetBackendSqlite() 1047 ->createRequest() 1048 ->setTableRow('redirections_log', $row); 1049 try { 1050 $request 1051 ->execute(); 1052 } catch (ExceptionCompile $e) { 1053 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 1054 } finally { 1055 $request->close(); 1056 } 1057 1058 1059 } 1060 1061 /** 1062 * This function check if there is a redirection declared 1063 * in the redirection table 1064 * @return bool - true if a rewrite or redirection occurs 1065 * @throws Exception 1066 */ 1067 private function processingPageRules(): bool 1068 { 1069 global $ID; 1070 1071 $calculatedTarget = null; 1072 $ruleMatcher = null; // Used in a warning message if the target page does not exist 1073 // Known redirection in the table 1074 // Get the page from redirection data 1075 $rules = $this->pageRules->getRules(); 1076 foreach ($rules as $rule) { 1077 1078 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 1079 $ruleTarget = $rule[PageRules::TARGET_NAME]; 1080 1081 // Glob to Rexgexp 1082 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i'; 1083 1084 // Match ? 1085 // https://www.php.net/manual/en/function.preg-match.php 1086 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 1087 if ($pregMatchResult === false) { 1088 // The `if` to take into account this problem 1089 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1090 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1091 return false; 1092 } 1093 if ($pregMatchResult) { 1094 $calculatedTarget = $ruleTarget; 1095 foreach ($matches as $key => $match) { 1096 if ($key == 0) { 1097 continue; 1098 } else { 1099 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1100 } 1101 } 1102 break; 1103 } 1104 } 1105 1106 if ($calculatedTarget == null) { 1107 return false; 1108 } 1109 1110 // If this is an external redirect (other domain) 1111 try { 1112 $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl(); 1113 } catch (ExceptionBadSyntax $e) { 1114 $isHttpUrl = false; 1115 } 1116 if ($isHttpUrl) { 1117 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1118 return true; 1119 } 1120 1121 // If the page exist 1122 if (page_exists($calculatedTarget)) { 1123 1124 // This is DokuWiki Id and should always be lowercase 1125 // The page rule may have change that 1126 $calculatedTarget = strtolower($calculatedTarget); 1127 $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1128 if ($res) { 1129 return true; 1130 } else { 1131 return false; 1132 } 1133 1134 } else { 1135 1136 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1137 return false; 1138 1139 } 1140 1141 } 1142 1143 private function performNotFoundRedirect(string $targetId, string $origin): bool 1144 { 1145 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1146 } 1147 1148 1149} 1150