1<?php 2 3 4use ComboStrap\DatabasePageRow; 5use ComboStrap\DokuwikiId; 6use ComboStrap\ExceptionBadArgument; 7use ComboStrap\ExceptionBadSyntax; 8use ComboStrap\ExceptionCompile; 9use ComboStrap\ExceptionSqliteNotAvailable; 10use ComboStrap\ExecutionContext; 11use ComboStrap\FileSystems; 12use ComboStrap\HttpResponse; 13use ComboStrap\HttpResponseStatus; 14use ComboStrap\Identity; 15use ComboStrap\LogUtility; 16use ComboStrap\MarkupPath; 17use ComboStrap\Meta\Field\AliasType; 18use ComboStrap\Mime; 19use ComboStrap\PageId; 20use ComboStrap\PageRules; 21use ComboStrap\PageUrlPath; 22use ComboStrap\PageUrlType; 23use ComboStrap\RouterBestEndPage; 24use ComboStrap\Site; 25use ComboStrap\SiteConfig; 26use ComboStrap\Sqlite; 27use ComboStrap\Web\Url; 28use ComboStrap\Web\UrlEndpoint; 29use ComboStrap\Web\UrlRewrite; 30use ComboStrap\WikiPath; 31 32require_once(__DIR__ . '/../vendor/autoload.php'); 33 34/** 35 * Class action_plugin_combo_url 36 * 37 * The actual URL manager 38 * 39 * 40 */ 41class action_plugin_combo_router extends DokuWiki_Action_Plugin 42{ 43 44 /** 45 * @deprecated 46 */ 47 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 48 const ROUTER_ENABLE_CONF = "enableRouter"; 49 50 // The redirect type 51 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 52 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 53 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 54 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 55 56 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 57 58 // Where the target id value comes from 59 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 60 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 61 /** 62 * Named Permalink (canonical) 63 */ 64 const TARGET_ORIGIN_CANONICAL = 'canonical'; 65 const TARGET_ORIGIN_ALIAS = 'alias'; 66 /** 67 * Identifier Permalink (full page id) 68 */ 69 const TARGET_ORIGIN_PERMALINK = "permalink"; 70 /** 71 * Extended Permalink (abbreviated page id at the end) 72 */ 73 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 74 const TARGET_ORIGIN_START_PAGE = 'startPage'; 75 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 76 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 77 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 78 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 79 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 80 81 82 // The constant parameters 83 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 84 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 85 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 86 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 87 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 88 const GO_TO_EDIT_MODE = 'GoToEditMode'; 89 const NOTHING = 'Nothing'; 90 91 /** @var string - a name used in log and other places */ 92 const NAME = 'Url Manager'; 93 const CANONICAL = 'router'; 94 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 95 const REFRESH_HEADER_NAME = "Refresh"; 96 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 97 const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": "; 98 public const URL_MANAGER_NAME = "Router"; 99 100 101 /** 102 * @var PageRules 103 */ 104 private $pageRules; 105 106 107 function __construct() 108 { 109 // enable direct access to language strings 110 // ie $this->lang 111 $this->setupLocale(); 112 113 } 114 115 /** 116 * @param string $refreshHeader 117 * @return false|string 118 */ 119 public static function getUrlFromRefresh(string $refreshHeader) 120 { 121 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 122 } 123 124 public static function getUrlFromLocation($refreshHeader) 125 { 126 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 127 } 128 129 /** 130 * @return string|null 131 * 132 * Return the original id from the request 133 * ie `howto:how-to-get-started-with-combostrap-m3i8vga8` 134 * if `/howto/how-to-get-started-with-combostrap-m3i8vga8` 135 * 136 * Unfortunately, DOKUWIKI_STARTED is not the first event 137 * The id may have been changed by 138 * {@link action_plugin_combo_lang::load_lang()} 139 * function, that's why we have this function 140 * to get the original requested id 141 */ 142 private static function getOriginalIdFromRequest(): ?string 143 { 144 $originalId = $_GET["id"] ?? null; 145 if ($originalId === null) { 146 return null; 147 } 148 // We get a `/` as first character 149 // because we return an id, we need to delete it 150 $originalId = substr($originalId, 1); 151 // transform / to : 152 return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId); 153 } 154 155 /** 156 * Determine if the request should be banned based on the id 157 * 158 * @param string $id 159 * @return bool 160 * 161 * See also {@link https://perishablepress.com/7g-firewall/#features} 162 * for blocking rules on http request data such as: 163 * * query_string 164 * * user_agent, 165 * * remote host 166 */ 167 public static function isShadowBanned(string $id): bool 168 { 169 /** 170 * ie 171 * wp-json:api:flutter_woo:config_file 172 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 173 * wp-admin 174 * 2020:wp-includes:wlwmanifest.xml 175 * wp-content:start 176 * wp-admin:css:start 177 * sito:wp-includes:wlwmanifest.xml 178 * site:wp-includes:wlwmanifest.xml 179 * cms:wp-includes:wlwmanifest.xml 180 * test:wp-includes:wlwmanifest.xml 181 * media:wp-includes:wlwmanifest.xml 182 * wp2:wp-includes:wlwmanifest.xml 183 * 2019:wp-includes:wlwmanifest.xml 184 * shop:wp-includes:wlwmanifest.xml 185 * wp1:wp-includes:wlwmanifest.xml 186 * news:wp-includes:wlwmanifest.xml 187 * 2018:wp-includes:wlwmanifest.xml 188 */ 189 if (strpos($id, 'wp-') !== false) { 190 return true; 191 } 192 193 /** 194 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 195 * db:oracle:999999.9:union:all:select_null:from_dual 196 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 197 */ 198 if (preg_match('/_chr_|_0_0/', $id) === 1) { 199 return true; 200 } 201 202 203 /** 204 * ie 205 * git:objects: 206 * git:refs:heads:stable 207 * git:logs:refs:heads:main 208 * git:logs:refs:heads:stable 209 * git:hooks:pre-push.sample 210 * git:hooks:pre-receive.sample 211 */ 212 if (strpos($id, "git:") === 0) { 213 return true; 214 } 215 216 return false; 217 218 } 219 220 /** 221 * @param string $id 222 * @return bool 223 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 224 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 225 * well-known:dnt-policy.txt 226 */ 227 public static function isWellKnownFile(string $id): bool 228 { 229 return strpos($id, "well-known") === 0; 230 } 231 232 233 function register(Doku_Event_Handler $controller) 234 { 235 236 if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 237 238 /** 239 * This will call the function {@link action_plugin_combo_router::_router()} 240 * The event is not DOKUWIKI_STARTED because this is not the first one 241 * 242 * https://www.dokuwiki.org/devel:event:init_lang_load 243 */ 244 $controller->register_hook('DOKUWIKI_STARTED', 245 'BEFORE', 246 $this, 247 'router', 248 array()); 249 250 /** 251 * Bot Ban functionality 252 * 253 * Because we make a redirection to the home page, we need to check 254 * if the home is readable, for that, the AUTH plugin needs to be initialized 255 * That's why we wait 256 * https://www.dokuwiki.org/devel:event:dokuwiki_init_done 257 * 258 * and we can't use 259 * https://www.dokuwiki.org/devel:event:init_lang_load 260 * because there is no auth setup in {@link auth_aclcheck_cb()} 261 * and the the line `if (!$auth instanceof AuthPlugin) return AUTH_NONE;` return none; 262 */ 263 $controller->register_hook('DOKUWIKI_INIT_DONE', 'BEFORE', $this, 'ban', array()); 264 265 } 266 267 268 } 269 270 /** 271 * 272 * We have created a spacial ban function that is 273 * called before the first function 274 * {@link action_plugin_combo_metalang::load_lang()} 275 * to spare CPU. 276 * 277 * @param $event 278 * @throws Exception 279 */ 280 function ban(&$event) 281 { 282 283 $id = self::getOriginalIdFromRequest(); 284 if ($id === null) { 285 return; 286 } 287 $page = MarkupPath::createMarkupFromId($id); 288 if (FileSystems::exists($page)) { 289 return; 290 } 291 292 // Well known 293 if (self::isWellKnownFile($id)) { 294 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 295 ExecutionContext::getActualOrCreateFromEnv() 296 ->response() 297 ->setStatus(HttpResponseStatus::NOT_FOUND) 298 ->end(); 299 return; 300 } 301 302 // Shadow banned 303 if (self::isShadowBanned($id)) { 304 $webSiteHomePage = Site::getIndexPageName(); 305 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 306 } 307 308 } 309 310 /** 311 * @param $event Doku_Event 312 * @param $param 313 * @return void 314 * @throws Exception 315 */ 316 function router(&$event, $param) 317 { 318 319 /** 320 * Just the {@link ExecutionContext::SHOW_ACTION} 321 * may be redirected 322 */ 323 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 324 if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) { 325 return; 326 } 327 328 $urlRewrite = Site::getUrlRewrite(); 329 if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) { 330 UrlRewrite::sendErrorMessage(); 331 return; 332 } 333 334 global $ID; 335 336 /** 337 * Without SQLite, this module does not work further 338 */ 339 try { 340 Sqlite::createOrGetSqlite(); 341 } catch (ExceptionSqliteNotAvailable $e) { 342 return; 343 } 344 345 $this->pageRules = new PageRules(); 346 347 348 /** 349 * Unfortunately, DOKUWIKI_STARTED is not the first event 350 * The id may have been changed by 351 * {@link action_plugin_combo_lang::load_lang()} 352 * function, that's why we check against the {@link $_REQUEST} 353 * and not the global ID 354 */ 355 $originalId = self::getOriginalIdFromRequest(); 356 357 /** 358 * Page is an existing id ? 359 */ 360 $requestedMarkupPath = MarkupPath::createMarkupFromId($ID); 361 if (FileSystems::exists($requestedMarkupPath)) { 362 363 /** 364 * If this is not the root home page 365 * and if the canonical id is the not the same (the id has changed) 366 * and if this is not a historical page (revision) 367 * redirect 368 */ 369 if ( 370 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed 371 && $ID != Site::getIndexPageName() 372 && !isset($_REQUEST["rev"]) 373 ) { 374 /** 375 * TODO: When saving for the first time, the page is not stored in the database 376 * but that's not the case actually 377 */ 378 $databasePageRow = $requestedMarkupPath->getDatabasePage(); 379 if ($databasePageRow->exists()) { 380 /** 381 * A move may leave the database in a bad state, 382 * unfortunately (ie page is not in index, unable to update, ...) 383 * We test therefore if the database page id exists 384 */ 385 $targetPageId = $databasePageRow->getFromRow("id"); 386 $targetPath = WikiPath::createMarkupPathFromId($targetPageId); 387 if (FileSystems::exists($targetPath)) { 388 $this->executePermanentRedirect( 389 $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(), 390 self::TARGET_ORIGIN_PERMALINK_EXTENDED 391 ); 392 } 393 } 394 } 395 return; 396 } 397 398 399 $identifier = $ID; 400 401 402 /** 403 * Page Id in the url 404 */ 405 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension()); 406 if ($shortPageId != null) { 407 $pageId = PageUrlPath::decodePageId($shortPageId); 408 } else { 409 /** 410 * Permalink with id 411 */ 412 $pageId = PageUrlPath::decodePageId($identifier); 413 } 414 if ($pageId !== null) { 415 416 if ($requestedMarkupPath->getParent() === null) { 417 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath(); 418 if ($page !== null && $page->exists()) { 419 $this->executePermanentRedirect( 420 $page->getCanonicalUrl()->toAbsoluteUrlString(), 421 self::TARGET_ORIGIN_PERMALINK 422 ); 423 return; 424 } 425 } 426 427 /** 428 * Page Id Abbr ? 429 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 430 */ 431 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath(); 432 if ($page === null) { 433 // or the length of the abbr has changed 434 $canonicalDatabasePage = new DatabasePageRow(); 435 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 436 if ($row !== null) { 437 $canonicalDatabasePage->setRow($row); 438 $page = $canonicalDatabasePage->getMarkupPath(); 439 } 440 } 441 if ($page !== null && $page->exists()) { 442 /** 443 * If the url canonical id has changed, we show it 444 * to the writer by performing a permanent redirect 445 */ 446 if ($identifier != $page->getUrlId()) { 447 // Google asks for a redirect 448 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 449 // People access your site through several different URLs. 450 // If, for example, your home page can be reached in multiple ways 451 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 452 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 453 // and use redirects to send traffic from the other URLs to your preferred URL. 454 $this->executePermanentRedirect( 455 $page->getCanonicalUrl()->toAbsoluteUrlString(), 456 self::TARGET_ORIGIN_PERMALINK_EXTENDED 457 ); 458 return; 459 } 460 461 $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 462 return; 463 464 } 465 // permanent url not yet in the database 466 // Other permanent such as permanent canonical ? 467 // We let the process go with the new identifier 468 469 } 470 471 // Global variable needed in the process 472 global $conf; 473 474 /** 475 * Identifier is a Canonical ? 476 */ 477 $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier); 478 $canonicalPage = $canonicalDatabasePage->getMarkupPath(); 479 if ($canonicalPage !== null && $canonicalPage->exists()) { 480 /** 481 * Does the canonical url is canonical name based 482 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 483 */ 484 if ($canonicalPage->getUrlId() === $identifier) { 485 $res = $this->executeTransparentRedirect( 486 $canonicalPage->getWikiId(), 487 self::TARGET_ORIGIN_CANONICAL 488 ); 489 } else { 490 $res = $this->executePermanentRedirect( 491 $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property 492 self::TARGET_ORIGIN_CANONICAL 493 ); 494 } 495 if ($res) { 496 return; 497 } 498 } 499 500 /** 501 * Identifier is an alias 502 */ 503 $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath(); 504 if ( 505 $aliasRequestedPage !== null 506 && $aliasRequestedPage->exists() 507 // The build alias is the file system metadata alias 508 // it may be null if the replication in the database was not successful 509 && $aliasRequestedPage->getBuildAlias() !== null 510 ) { 511 $buildAlias = $aliasRequestedPage->getBuildAlias(); 512 switch ($buildAlias->getType()) { 513 case AliasType::REDIRECT: 514 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 515 if ($res) { 516 return; 517 } 518 break; 519 case AliasType::SYNONYM: 520 $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS); 521 if ($res) { 522 return; 523 } 524 break; 525 default: 526 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 527 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 528 if ($res) { 529 return; 530 } 531 break; 532 } 533 } 534 535 536 // If there is a redirection defined in the page rules 537 $result = $this->processingPageRules(); 538 if ($result) { 539 // A redirection has occurred 540 // finish the process 541 return; 542 } 543 544 /** 545 * 546 * There was no redirection found, redirect to edit mode if writer 547 * 548 */ 549 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 550 551 $this->gotToEditMode($event); 552 // Stop here 553 return; 554 555 } 556 557 /** 558 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 559 */ 560 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 561 return; 562 } 563 564 // We are reader and their is no redirection set, we apply the algorithm 565 $readerAlgorithms = array(); 566 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 567 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 568 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 569 570 while ( 571 ($algorithm = array_shift($readerAlgorithms)) != null 572 ) { 573 574 switch ($algorithm) { 575 576 case self::NOTHING: 577 return; 578 579 case self::GO_TO_BEST_END_PAGE_NAME: 580 581 /** 582 * @var MarkupPath $bestEndPage 583 */ 584 list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath); 585 if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) { 586 $res = false; 587 switch ($method) { 588 case self::REDIRECT_PERMANENT_METHOD: 589 $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 590 break; 591 case self::REDIRECT_NOTFOUND_METHOD: 592 $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 593 break; 594 default: 595 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 596 } 597 if ($res) { 598 // Redirection has succeeded 599 return; 600 } 601 } 602 break; 603 604 case self::GO_TO_NS_START_PAGE: 605 606 // Start page with the conf['start'] parameter 607 $startPage = getNS($identifier) . ':' . $conf['start']; 608 if (page_exists($startPage)) { 609 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 610 if ($res) { 611 return; 612 } 613 } 614 615 // Start page with the same name than the namespace 616 $startPage = getNS($identifier) . ':' . curNS($identifier); 617 if (page_exists($startPage)) { 618 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 619 if ($res) { 620 return; 621 } 622 } 623 break; 624 625 case self::GO_TO_BEST_PAGE_NAME: 626 627 $bestPageId = null; 628 629 $bestPage = $this->getBestPage($identifier); 630 $bestPageId = $bestPage['id']; 631 $scorePageName = $bestPage['score']; 632 633 // Get Score from a Namespace 634 $bestNamespace = $this->scoreBestNamespace($identifier); 635 $bestNamespaceId = $bestNamespace['namespace']; 636 $namespaceScore = $bestNamespace['score']; 637 638 // Compare the two score 639 if ($scorePageName > 0 or $namespaceScore > 0) { 640 if ($scorePageName > $namespaceScore) { 641 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 642 } else { 643 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 644 } 645 return; 646 } 647 break; 648 649 case self::GO_TO_BEST_NAMESPACE: 650 651 $scoreNamespace = $this->scoreBestNamespace($identifier); 652 $bestNamespaceId = $scoreNamespace['namespace']; 653 $score = $scoreNamespace['score']; 654 655 if ($score > 0) { 656 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 657 return; 658 } 659 break; 660 661 case self::GO_TO_SEARCH_ENGINE: 662 663 $this->redirectToSearchEngine(); 664 665 return; 666 667 // End Switch Action 668 } 669 670 // End While Action 671 } 672 673 674 } 675 676 677 /** 678 * getBestNamespace 679 * Return a list with 'BestNamespaceId Score' 680 * @param $id 681 * @return array 682 */ 683 private 684 function scoreBestNamespace($id) 685 { 686 687 global $conf; 688 689 // Parameters 690 $pageNameSpace = getNS($id); 691 692 // If the page has an existing namespace start page take it, other search other namespace 693 $startPageNameSpace = $pageNameSpace . ":"; 694 $dateAt = ''; 695 // $startPageNameSpace will get a full path (ie with start or the namespace 696 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 697 if (page_exists($startPageNameSpace)) { 698 $nameSpaces = array($startPageNameSpace); 699 } else { 700 $nameSpaces = ft_pageLookup($conf['start']); 701 } 702 703 // Parameters and search the best namespace 704 $pathNames = explode(':', $pageNameSpace); 705 $bestNbWordFound = 0; 706 $bestNamespaceId = ''; 707 foreach ($nameSpaces as $nameSpace) { 708 709 $nbWordFound = 0; 710 foreach ($pathNames as $pathName) { 711 if (strlen($pathName) > 2) { 712 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 713 } 714 } 715 if ($nbWordFound > $bestNbWordFound) { 716 // Take only the smallest namespace 717 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 718 $bestNbWordFound = $nbWordFound; 719 $bestNamespaceId = $nameSpace; 720 } 721 } 722 } 723 724 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 725 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 726 if ($bestNbWordFound > 0) { 727 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 728 } else { 729 $bestNamespaceScore = 0; 730 } 731 732 733 return array( 734 'namespace' => $bestNamespaceId, 735 'score' => $bestNamespaceScore 736 ); 737 738 } 739 740 /** 741 * @param $event 742 */ 743 private 744 function gotToEditMode(&$event) 745 { 746 global $ACT; 747 $ACT = 'edit'; 748 749 } 750 751 752 /** 753 * Redirect to an internal page ie: 754 * * on the same domain 755 * * no HTTP redirect 756 * * id rewrite 757 * @param string $targetPageId - target page id 758 * @param string $targetOriginId - the source of the target (redirect) 759 * @return bool - return true if the user has the permission and that the redirect was done 760 * @throws Exception 761 */ 762 private 763 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 764 { 765 /** 766 * Because we set the ID globally for the ID redirect 767 * we make sure that this is not a {@link MarkupPath} 768 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 769 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 770 */ 771 if (is_object($targetPageId)) { 772 $class = get_class($targetPageId); 773 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 774 } 775 776 if (is_object($targetOriginId)) { 777 $class = get_class($targetOriginId); 778 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 779 } 780 781 // If the user does not have the right to see the target page 782 // don't do anything 783 if (!(Identity::isReader($targetPageId))) { 784 return false; 785 } 786 787 // Change the id 788 global $ID; 789 global $INFO; 790 $sourceId = $ID; 791 $ID = $targetPageId; 792 if (isset($_REQUEST["id"])) { 793 $_REQUEST["id"] = $targetPageId; 794 } 795 if (isset($_GET["id"])) { 796 $_GET["id"] = $targetPageId; 797 } 798 799 /** 800 * Refresh the $INFO data 801 * 802 * the info attributes are used elsewhere 803 * 'id': for the sidebar 804 * 'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()} 805 * 'rev' : for the edit button to be sure that the page is still the same 806 */ 807 $INFO = pageinfo(); 808 809 /** 810 * Not compatible with 811 * https://www.dokuwiki.org/config:send404 is enabled 812 * 813 * This check happens before that dokuwiki is started 814 * and send an header in doku.php 815 * 816 * We send a warning 817 */ 818 global $conf; 819 if ($conf['send404'] == true) { 820 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 821 } 822 823 // Redirection 824 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 825 826 return true; 827 828 } 829 830 private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool 831 { 832 return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 833 } 834 835 /** 836 * The general HTTP Redirect method to an internal page 837 * where the redirection method decide which type of redirection 838 * @param string $targetIdOrUrl - a dokuwiki id or an url 839 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 840 * @param string $method - the redirection method 841 */ 842 private 843 function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool 844 { 845 846 global $ID; 847 848 849 // Log the redirections 850 $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method); 851 852 853 // An http external url ? 854 try { 855 $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl(); 856 } catch (ExceptionBadSyntax|ExceptionBadArgument $e) { 857 $isHttpUrl = false; 858 } 859 860 // If there is a bug in the isValid function for an internal url 861 // We get a loop. 862 // The Url becomes the id, the id is unknown and we do a redirect again 863 // 864 // We check then if the target starts with the base url 865 // if this is the case, it's valid 866 if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) { 867 $isHttpUrl = true; 868 } 869 if ($isHttpUrl) { 870 871 // defend against HTTP Response Splitting 872 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 873 $targetUrl = stripctl($targetIdOrUrl); 874 875 } else { 876 877 878 // Explode the page ID and the anchor (#) 879 $link = explode('#', $targetIdOrUrl, 2); 880 881 $url = UrlEndpoint::createDokuUrl(); 882 883 $urlParams = []; 884 // if this is search engine redirect 885 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 886 $replacementPart = array(':', '_', '-'); 887 $query = str_replace($replacementPart, ' ', $ID); 888 $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION); 889 $url->setQueryParameter("q", $query); 890 } 891 892 /** 893 * Doing a permanent redirect with a added query string 894 * create a new page url on the search engine 895 * 896 * ie 897 * http://host/page 898 * is not the same 899 * than 900 * http://host/page?whatever 901 * 902 * We can't pass query string otherwise, we get 903 * the SEO warning / error 904 * `Alternative page with proper canonical tag` 905 * 906 * Use HTTP X header for debug 907 */ 908 if ($method !== self::REDIRECT_PERMANENT_METHOD) { 909 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID); 910 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin); 911 } 912 913 $id = $link[0]; 914 $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id); 915 if (array_key_exists(1, $link)) { 916 $url->setFragment($link[1]); 917 } 918 $targetUrl = $url->toAbsoluteUrlString(); 919 920 } 921 922 /** 923 * The dokuwiki function {@link send_redirect()} 924 * set the `Location header` and in php, the header function 925 * in this case change the status code to 302 Arghhhh. 926 * The code below is adapted from this function {@link send_redirect()} 927 */ 928 global $MSG; // are there any undisplayed messages? keep them in session for display 929 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 930 //reopen session, store data and close session again 931 @session_start(); 932 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 933 } 934 session_write_close(); // always close the session 935 936 switch ($method) { 937 938 case self::REDIRECT_PERMANENT_METHOD: 939 ExecutionContext::getActualOrCreateFromEnv() 940 ->response() 941 ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT) 942 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 943 ->end(); 944 return true; 945 946 case self::REDIRECT_NOTFOUND_METHOD: 947 948 949 // Empty 404 body to not get the standard 404 page of the browser 950 // but a blank page to avoid a sort of FOUC. 951 // ie the user see a page briefly 952 ExecutionContext::getActualOrCreateFromEnv() 953 ->response() 954 ->setStatus(HttpResponseStatus::NOT_FOUND) 955 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 956 ->setBody(self::PAGE_404, Mime::getHtml()) 957 ->end(); 958 return true; 959 960 default: 961 LogUtility::msg("The method ($method) is not an http redirection"); 962 return false; 963 } 964 965 966 } 967 968 /** 969 * @param $id 970 * @return array 971 */ 972 private 973 function getBestPage($id): array 974 { 975 976 // The return parameters 977 $bestPageId = null; 978 $scorePageName = null; 979 980 // Get Score from a page 981 $pageName = noNS($id); 982 $pagesWithSameName = ft_pageLookup($pageName); 983 if (count($pagesWithSameName) > 0) { 984 985 // Search same namespace in the page found than in the Id page asked. 986 $bestNbWordFound = 0; 987 988 989 $wordsInPageSourceId = explode(':', $id); 990 foreach ($pagesWithSameName as $targetPageId => $title) { 991 992 // Nb of word found in the target page id 993 // that are in the source page id 994 $nbWordFound = 0; 995 foreach ($wordsInPageSourceId as $word) { 996 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 997 } 998 999 if ($bestPageId == null) { 1000 1001 $bestNbWordFound = $nbWordFound; 1002 $bestPageId = $targetPageId; 1003 1004 } else { 1005 1006 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 1007 1008 $bestNbWordFound = $nbWordFound; 1009 $bestPageId = $targetPageId; 1010 1011 } 1012 1013 } 1014 1015 } 1016 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 1017 return array( 1018 'id' => $bestPageId, 1019 'score' => $scorePageName); 1020 } 1021 return array( 1022 'id' => $bestPageId, 1023 'score' => $scorePageName 1024 ); 1025 1026 } 1027 1028 1029 /** 1030 * Redirect to the search engine 1031 */ 1032 private 1033 function redirectToSearchEngine() 1034 { 1035 1036 global $ID; 1037 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 1038 1039 } 1040 1041 1042 /** 1043 * 1044 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 1045 * * For a SQlite database, it will add a row into the log 1046 * 1047 * @param string $sourcePageId 1048 * @param $targetPageId 1049 * @param $algorithmic 1050 * @param $method - http or rewrite 1051 */ 1052 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 1053 { 1054 1055 $row = array( 1056 "TIMESTAMP" => date("c"), 1057 "SOURCE" => $sourcePageId, 1058 "TARGET" => $targetPageId, 1059 "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null, 1060 "TYPE" => $algorithmic, 1061 "METHOD" => $method 1062 ); 1063 $request = Sqlite::createOrGetBackendSqlite() 1064 ->createRequest() 1065 ->setTableRow('redirections_log', $row); 1066 try { 1067 $request 1068 ->execute(); 1069 } catch (ExceptionCompile $e) { 1070 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 1071 } finally { 1072 $request->close(); 1073 } 1074 1075 1076 } 1077 1078 /** 1079 * This function check if there is a redirection declared 1080 * in the redirection table 1081 * @return bool - true if a rewrite or redirection occurs 1082 * @throws Exception 1083 */ 1084 private function processingPageRules(): bool 1085 { 1086 global $ID; 1087 1088 $calculatedTarget = null; 1089 $ruleMatcher = null; // Used in a warning message if the target page does not exist 1090 // Known redirection in the table 1091 // Get the page from redirection data 1092 $rules = $this->pageRules->getRules(); 1093 foreach ($rules as $rule) { 1094 1095 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 1096 $ruleTarget = $rule[PageRules::TARGET_NAME]; 1097 1098 // Glob to Rexgexp 1099 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i'; 1100 1101 // Match ? 1102 // https://www.php.net/manual/en/function.preg-match.php 1103 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 1104 if ($pregMatchResult === false) { 1105 // The `if` to take into account this problem 1106 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1107 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1108 return false; 1109 } 1110 if ($pregMatchResult) { 1111 $calculatedTarget = $ruleTarget; 1112 foreach ($matches as $key => $match) { 1113 if ($key == 0) { 1114 continue; 1115 } else { 1116 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1117 } 1118 } 1119 break; 1120 } 1121 } 1122 1123 if ($calculatedTarget == null) { 1124 return false; 1125 } 1126 1127 // If this is an external redirect (other domain) 1128 try { 1129 $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl(); 1130 } catch (ExceptionBadSyntax $e) { 1131 $isHttpUrl = false; 1132 } 1133 if ($isHttpUrl) { 1134 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1135 return true; 1136 } 1137 1138 // If the page exist 1139 if (page_exists($calculatedTarget)) { 1140 1141 // This is DokuWiki Id and should always be lowercase 1142 // The page rule may have change that 1143 $calculatedTarget = strtolower($calculatedTarget); 1144 $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1145 if ($res) { 1146 return true; 1147 } else { 1148 return false; 1149 } 1150 1151 } else { 1152 1153 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1154 return false; 1155 1156 } 1157 1158 } 1159 1160 private function performNotFoundRedirect(string $targetId, string $origin): bool 1161 { 1162 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1163 } 1164 1165 1166} 1167