1<?php 2 3 4use ComboStrap\DatabasePageRow; 5use ComboStrap\DokuwikiId; 6use ComboStrap\ExceptionBadArgument; 7use ComboStrap\ExceptionBadSyntax; 8use ComboStrap\ExceptionCompile; 9use ComboStrap\ExceptionSqliteNotAvailable; 10use ComboStrap\ExecutionContext; 11use ComboStrap\FileSystems; 12use ComboStrap\HttpResponse; 13use ComboStrap\HttpResponseStatus; 14use ComboStrap\Identity; 15use ComboStrap\LogUtility; 16use ComboStrap\MarkupPath; 17use ComboStrap\Meta\Field\AliasType; 18use ComboStrap\Mime; 19use ComboStrap\PageId; 20use ComboStrap\PageRules; 21use ComboStrap\PageUrlPath; 22use ComboStrap\PageUrlType; 23use ComboStrap\RouterBestEndPage; 24use ComboStrap\Site; 25use ComboStrap\SiteConfig; 26use ComboStrap\Sqlite; 27use ComboStrap\Web\Url; 28use ComboStrap\Web\UrlEndpoint; 29use ComboStrap\Web\UrlRewrite; 30use ComboStrap\WikiPath; 31 32require_once(__DIR__ . '/../vendor/autoload.php'); 33 34/** 35 * Class action_plugin_combo_url 36 * 37 * The actual URL manager 38 * 39 * 40 */ 41class action_plugin_combo_router extends DokuWiki_Action_Plugin 42{ 43 44 /** 45 * @deprecated 46 */ 47 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 48 const ROUTER_ENABLE_CONF = "enableRouter"; 49 50 // The redirect type 51 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 52 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 53 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 54 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 55 56 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 57 58 // Where the target id value comes from 59 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 60 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 61 /** 62 * Named Permalink (canonical) 63 */ 64 const TARGET_ORIGIN_CANONICAL = 'canonical'; 65 const TARGET_ORIGIN_ALIAS = 'alias'; 66 /** 67 * Identifier Permalink (full page id) 68 */ 69 const TARGET_ORIGIN_PERMALINK = "permalink"; 70 /** 71 * Extended Permalink (abbreviated page id at the end) 72 */ 73 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 74 const TARGET_ORIGIN_START_PAGE = 'startPage'; 75 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 76 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 77 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 78 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 79 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 80 81 82 // The constant parameters 83 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 84 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 85 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 86 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 87 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 88 const GO_TO_EDIT_MODE = 'GoToEditMode'; 89 const NOTHING = 'Nothing'; 90 91 /** @var string - a name used in log and other places */ 92 const NAME = 'Url Manager'; 93 const CANONICAL = 'router'; 94 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 95 const REFRESH_HEADER_NAME = "Refresh"; 96 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 97 const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": "; 98 public const URL_MANAGER_NAME = "Router"; 99 100 101 /** 102 * @var PageRules 103 */ 104 private $pageRules; 105 106 107 function __construct() 108 { 109 // enable direct access to language strings 110 // ie $this->lang 111 $this->setupLocale(); 112 113 } 114 115 /** 116 * @param string $refreshHeader 117 * @return false|string 118 */ 119 public static function getUrlFromRefresh(string $refreshHeader) 120 { 121 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 122 } 123 124 public static function getUrlFromLocation($refreshHeader) 125 { 126 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 127 } 128 129 /** 130 * @return array|mixed|string|string[] 131 * 132 * Unfortunately, DOKUWIKI_STARTED is not the first event 133 * The id may have been changed by 134 * {@link action_plugin_combo_metalang::load_lang()} 135 * function, that's why we have this function 136 * to get the original requested id 137 */ 138 private static function getOriginalIdFromRequest() 139 { 140 $originalId = $_GET["id"] ?? null; 141 if ($originalId === null) { 142 return null; 143 } 144 return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId); 145 } 146 147 /** 148 * Determine if the request should be banned based on the id 149 * 150 * @param string $id 151 * @return bool 152 * 153 * See also {@link https://perishablepress.com/7g-firewall/#features} 154 * for blocking rules on http request data such as: 155 * * query_string 156 * * user_agent, 157 * * remote host 158 */ 159 public static function isShadowBanned(string $id): bool 160 { 161 /** 162 * ie 163 * wp-json:api:flutter_woo:config_file 164 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 165 * wp-admin 166 * 2020:wp-includes:wlwmanifest.xml 167 * wp-content:start 168 * wp-admin:css:start 169 * sito:wp-includes:wlwmanifest.xml 170 * site:wp-includes:wlwmanifest.xml 171 * cms:wp-includes:wlwmanifest.xml 172 * test:wp-includes:wlwmanifest.xml 173 * media:wp-includes:wlwmanifest.xml 174 * wp2:wp-includes:wlwmanifest.xml 175 * 2019:wp-includes:wlwmanifest.xml 176 * shop:wp-includes:wlwmanifest.xml 177 * wp1:wp-includes:wlwmanifest.xml 178 * news:wp-includes:wlwmanifest.xml 179 * 2018:wp-includes:wlwmanifest.xml 180 */ 181 if (strpos($id, 'wp-') !== false) { 182 return true; 183 } 184 185 /** 186 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 187 * db:oracle:999999.9:union:all:select_null:from_dual 188 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 189 */ 190 if (preg_match('/_chr_|_0_0/', $id) === 1) { 191 return true; 192 } 193 194 195 /** 196 * ie 197 * git:objects: 198 * git:refs:heads:stable 199 * git:logs:refs:heads:main 200 * git:logs:refs:heads:stable 201 * git:hooks:pre-push.sample 202 * git:hooks:pre-receive.sample 203 */ 204 if (strpos($id, "git:") === 0) { 205 return true; 206 } 207 208 return false; 209 210 } 211 212 /** 213 * @param string $id 214 * @return bool 215 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 216 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 217 * well-known:dnt-policy.txt 218 */ 219 public static function isWellKnownFile(string $id): bool 220 { 221 return strpos($id, "well-known") === 0; 222 } 223 224 225 function register(Doku_Event_Handler $controller) 226 { 227 228 if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 229 230 /** 231 * This will call the function {@link action_plugin_combo_router::_router()} 232 * The event is not DOKUWIKI_STARTED because this is not the first one 233 * 234 * https://www.dokuwiki.org/devel:event:init_lang_load 235 */ 236 $controller->register_hook('DOKUWIKI_STARTED', 237 'BEFORE', 238 $this, 239 'router', 240 array()); 241 242 /** 243 * This is the real first call of Dokuwiki 244 * Unfortunately, it does not create the environment 245 * We just ban to spare server resources 246 * 247 * https://www.dokuwiki.org/devel:event:init_lang_load 248 */ 249 $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array()); 250 251 } 252 253 254 } 255 256 /** 257 * 258 * We have created a spacial ban function that is 259 * called before the first function 260 * {@link action_plugin_combo_metalang::load_lang()} 261 * to spare CPU. 262 * 263 * @param $event 264 * @throws Exception 265 */ 266 function ban(&$event) 267 { 268 269 $id = self::getOriginalIdFromRequest(); 270 if ($id === null) { 271 return; 272 } 273 $page = MarkupPath::createMarkupFromId($id); 274 if (!FileSystems::exists($page)) { 275 // Well known 276 if (self::isWellKnownFile($id)) { 277 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 278 ExecutionContext::getActualOrCreateFromEnv() 279 ->response() 280 ->setStatus(HttpResponseStatus::NOT_FOUND) 281 ->end(); 282 return; 283 } 284 285 // Shadow banned 286 if (self::isShadowBanned($id)) { 287 $webSiteHomePage = Site::getIndexPageName(); 288 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 289 } 290 } 291 } 292 293 /** 294 * @param $event Doku_Event 295 * @param $param 296 * @return void 297 * @throws Exception 298 */ 299 function router(&$event, $param) 300 { 301 302 /** 303 * Just the {@link ExecutionContext::SHOW_ACTION} 304 * may be redirected 305 */ 306 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 307 if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) { 308 return; 309 } 310 311 $urlRewrite = Site::getUrlRewrite(); 312 if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) { 313 UrlRewrite::sendErrorMessage(); 314 return; 315 } 316 317 global $ID; 318 319 /** 320 * Without SQLite, this module does not work further 321 */ 322 try { 323 Sqlite::createOrGetSqlite(); 324 } catch (ExceptionSqliteNotAvailable $e) { 325 return; 326 } 327 328 $this->pageRules = new PageRules(); 329 330 331 /** 332 * Unfortunately, DOKUWIKI_STARTED is not the first event 333 * The id may have been changed by 334 * {@link action_plugin_combo_lang::load_lang()} 335 * function, that's why we check against the {@link $_REQUEST} 336 * and not the global ID 337 */ 338 $originalId = self::getOriginalIdFromRequest(); 339 340 /** 341 * Page is an existing id ? 342 */ 343 $requestedMarkupPath = MarkupPath::createMarkupFromId($ID); 344 if (FileSystems::exists($requestedMarkupPath)) { 345 346 /** 347 * If this is not the root home page 348 * and if the canonical id is the not the same, 349 * and if this is not a historical page (revision) 350 * redirect 351 */ 352 if ( 353 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed 354 && $ID != Site::getIndexPageName() 355 && !isset($_REQUEST["rev"]) 356 ) { 357 /** 358 * TODO: When saving for the first time, the page is not stored in the database 359 * but that's not the case actually 360 */ 361 $databasePageRow = $requestedMarkupPath->getDatabasePage(); 362 if ($databasePageRow->exists()) { 363 /** 364 * A move may leave the database in a bad state, 365 * unfortunately (ie page is not in index, unable to update, ...) 366 * We test therefore if the database page id exists 367 */ 368 $targetPageId = $databasePageRow->getFromRow("id"); 369 $targetPath = WikiPath::createMarkupPathFromId($targetPageId); 370 if (FileSystems::exists($targetPath)) { 371 $this->executePermanentRedirect( 372 $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(), 373 self::TARGET_ORIGIN_PERMALINK_EXTENDED 374 ); 375 } 376 } 377 } 378 return; 379 } 380 381 382 $identifier = $ID; 383 384 385 /** 386 * Page Id Website / root Permalink ? 387 */ 388 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension()); 389 if ($shortPageId !== null) { 390 $pageId = PageUrlPath::decodePageId($shortPageId); 391 if ($requestedMarkupPath->getParent() === null && $pageId !== null) { 392 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath(); 393 if ($page !== null && $page->exists()) { 394 $this->executePermanentRedirect( 395 $page->getCanonicalUrl()->toAbsoluteUrlString(), 396 self::TARGET_ORIGIN_PERMALINK 397 ); 398 } 399 } 400 401 /** 402 * Page Id Abbr ? 403 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 404 */ 405 if ( 406 $pageId !== null 407 ) { 408 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath(); 409 if ($page === null) { 410 // or the length of the abbr has changed 411 $canonicalDatabasePage = new DatabasePageRow(); 412 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 413 if ($row !== null) { 414 $canonicalDatabasePage->setRow($row); 415 $page = $canonicalDatabasePage->getMarkupPath(); 416 } 417 } 418 if ($page !== null && $page->exists()) { 419 /** 420 * If the url canonical id has changed, we show it 421 * to the writer by performing a permanent redirect 422 */ 423 if ($identifier != $page->getUrlId()) { 424 // Google asks for a redirect 425 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 426 // People access your site through several different URLs. 427 // If, for example, your home page can be reached in multiple ways 428 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 429 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 430 // and use redirects to send traffic from the other URLs to your preferred URL. 431 $this->executePermanentRedirect( 432 $page->getCanonicalUrl()->toAbsoluteUrlString(), 433 self::TARGET_ORIGIN_PERMALINK_EXTENDED 434 ); 435 return; 436 } 437 438 $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 439 return; 440 441 } 442 // permanent url not yet in the database 443 // Other permanent such as permanent canonical ? 444 // We let the process go with the new identifier 445 446 } 447 448 } 449 450 // Global variable needed in the process 451 global $conf; 452 453 /** 454 * Identifier is a Canonical ? 455 */ 456 $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier); 457 $canonicalPage = $canonicalDatabasePage->getMarkupPath(); 458 if ($canonicalPage !== null && $canonicalPage->exists()) { 459 /** 460 * Does the canonical url is canonical name based 461 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 462 */ 463 if ($canonicalPage->getUrlId() === $identifier) { 464 $res = $this->executeTransparentRedirect( 465 $canonicalPage->getWikiId(), 466 self::TARGET_ORIGIN_CANONICAL 467 ); 468 } else { 469 $res = $this->executePermanentRedirect( 470 $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property 471 self::TARGET_ORIGIN_CANONICAL 472 ); 473 } 474 if ($res) { 475 return; 476 } 477 } 478 479 /** 480 * Identifier is an alias 481 */ 482 $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath(); 483 if ( 484 $aliasRequestedPage !== null 485 && $aliasRequestedPage->exists() 486 // The build alias is the file system metadata alias 487 // it may be null if the replication in the database was not successful 488 && $aliasRequestedPage->getBuildAlias() !== null 489 ) { 490 $buildAlias = $aliasRequestedPage->getBuildAlias(); 491 switch ($buildAlias->getType()) { 492 case AliasType::REDIRECT: 493 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 494 if ($res) { 495 return; 496 } 497 break; 498 case AliasType::SYNONYM: 499 $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS); 500 if ($res) { 501 return; 502 } 503 break; 504 default: 505 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 506 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 507 if ($res) { 508 return; 509 } 510 break; 511 } 512 } 513 514 515 // If there is a redirection defined in the page rules 516 $result = $this->processingPageRules(); 517 if ($result) { 518 // A redirection has occurred 519 // finish the process 520 return; 521 } 522 523 /** 524 * 525 * There was no redirection found, redirect to edit mode if writer 526 * 527 */ 528 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 529 530 $this->gotToEditMode($event); 531 // Stop here 532 return; 533 534 } 535 536 /** 537 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 538 */ 539 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 540 return; 541 } 542 543 // We are reader and their is no redirection set, we apply the algorithm 544 $readerAlgorithms = array(); 545 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 546 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 547 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 548 549 while ( 550 ($algorithm = array_shift($readerAlgorithms)) != null 551 ) { 552 553 switch ($algorithm) { 554 555 case self::NOTHING: 556 return; 557 558 case self::GO_TO_BEST_END_PAGE_NAME: 559 560 /** 561 * @var MarkupPath $bestEndPage 562 */ 563 list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath); 564 if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) { 565 $res = false; 566 switch ($method) { 567 case self::REDIRECT_PERMANENT_METHOD: 568 $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 569 break; 570 case self::REDIRECT_NOTFOUND_METHOD: 571 $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 572 break; 573 default: 574 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 575 } 576 if ($res) { 577 // Redirection has succeeded 578 return; 579 } 580 } 581 break; 582 583 case self::GO_TO_NS_START_PAGE: 584 585 // Start page with the conf['start'] parameter 586 $startPage = getNS($identifier) . ':' . $conf['start']; 587 if (page_exists($startPage)) { 588 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 589 if ($res) { 590 return; 591 } 592 } 593 594 // Start page with the same name than the namespace 595 $startPage = getNS($identifier) . ':' . curNS($identifier); 596 if (page_exists($startPage)) { 597 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 598 if ($res) { 599 return; 600 } 601 } 602 break; 603 604 case self::GO_TO_BEST_PAGE_NAME: 605 606 $bestPageId = null; 607 608 $bestPage = $this->getBestPage($identifier); 609 $bestPageId = $bestPage['id']; 610 $scorePageName = $bestPage['score']; 611 612 // Get Score from a Namespace 613 $bestNamespace = $this->scoreBestNamespace($identifier); 614 $bestNamespaceId = $bestNamespace['namespace']; 615 $namespaceScore = $bestNamespace['score']; 616 617 // Compare the two score 618 if ($scorePageName > 0 or $namespaceScore > 0) { 619 if ($scorePageName > $namespaceScore) { 620 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 621 } else { 622 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 623 } 624 return; 625 } 626 break; 627 628 case self::GO_TO_BEST_NAMESPACE: 629 630 $scoreNamespace = $this->scoreBestNamespace($identifier); 631 $bestNamespaceId = $scoreNamespace['namespace']; 632 $score = $scoreNamespace['score']; 633 634 if ($score > 0) { 635 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 636 return; 637 } 638 break; 639 640 case self::GO_TO_SEARCH_ENGINE: 641 642 $this->redirectToSearchEngine(); 643 644 return; 645 646 // End Switch Action 647 } 648 649 // End While Action 650 } 651 652 653 } 654 655 656 /** 657 * getBestNamespace 658 * Return a list with 'BestNamespaceId Score' 659 * @param $id 660 * @return array 661 */ 662 private 663 function scoreBestNamespace($id) 664 { 665 666 global $conf; 667 668 // Parameters 669 $pageNameSpace = getNS($id); 670 671 // If the page has an existing namespace start page take it, other search other namespace 672 $startPageNameSpace = $pageNameSpace . ":"; 673 $dateAt = ''; 674 // $startPageNameSpace will get a full path (ie with start or the namespace 675 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 676 if (page_exists($startPageNameSpace)) { 677 $nameSpaces = array($startPageNameSpace); 678 } else { 679 $nameSpaces = ft_pageLookup($conf['start']); 680 } 681 682 // Parameters and search the best namespace 683 $pathNames = explode(':', $pageNameSpace); 684 $bestNbWordFound = 0; 685 $bestNamespaceId = ''; 686 foreach ($nameSpaces as $nameSpace) { 687 688 $nbWordFound = 0; 689 foreach ($pathNames as $pathName) { 690 if (strlen($pathName) > 2) { 691 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 692 } 693 } 694 if ($nbWordFound > $bestNbWordFound) { 695 // Take only the smallest namespace 696 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 697 $bestNbWordFound = $nbWordFound; 698 $bestNamespaceId = $nameSpace; 699 } 700 } 701 } 702 703 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 704 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 705 if ($bestNbWordFound > 0) { 706 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 707 } else { 708 $bestNamespaceScore = 0; 709 } 710 711 712 return array( 713 'namespace' => $bestNamespaceId, 714 'score' => $bestNamespaceScore 715 ); 716 717 } 718 719 /** 720 * @param $event 721 */ 722 private 723 function gotToEditMode(&$event) 724 { 725 global $ACT; 726 $ACT = 'edit'; 727 728 } 729 730 731 /** 732 * Redirect to an internal page ie: 733 * * on the same domain 734 * * no HTTP redirect 735 * * id rewrite 736 * @param string $targetPageId - target page id 737 * @param string $targetOriginId - the source of the target (redirect) 738 * @return bool - return true if the user has the permission and that the redirect was done 739 * @throws Exception 740 */ 741 private 742 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 743 { 744 /** 745 * Because we set the ID globally for the ID redirect 746 * we make sure that this is not a {@link MarkupPath} 747 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 748 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 749 */ 750 if (is_object($targetPageId)) { 751 $class = get_class($targetPageId); 752 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 753 } 754 755 if (is_object($targetOriginId)) { 756 $class = get_class($targetOriginId); 757 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 758 } 759 760 // If the user does not have the right to see the target page 761 // don't do anything 762 if (!(Identity::isReader($targetPageId))) { 763 return false; 764 } 765 766 // Change the id 767 global $ID; 768 global $INFO; 769 $sourceId = $ID; 770 $ID = $targetPageId; 771 if (isset($_REQUEST["id"])) { 772 $_REQUEST["id"] = $targetPageId; 773 } 774 if (isset($_GET["id"])) { 775 $_GET["id"] = $targetPageId; 776 } 777 778 /** 779 * Refresh the $INFO data 780 * 781 * the info attributes are used elsewhere 782 * 'id': for the sidebar 783 * 'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()} 784 * 'rev' : for the edit button to be sure that the page is still the same 785 */ 786 $INFO = pageinfo(); 787 788 /** 789 * Not compatible with 790 * https://www.dokuwiki.org/config:send404 is enabled 791 * 792 * This check happens before that dokuwiki is started 793 * and send an header in doku.php 794 * 795 * We send a warning 796 */ 797 global $conf; 798 if ($conf['send404'] == true) { 799 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 800 } 801 802 // Redirection 803 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 804 805 return true; 806 807 } 808 809 private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool 810 { 811 return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 812 } 813 814 /** 815 * The general HTTP Redirect method to an internal page 816 * where the redirection method decide which type of redirection 817 * @param string $targetIdOrUrl - a dokuwiki id or an url 818 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 819 * @param string $method - the redirection method 820 */ 821 private 822 function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool 823 { 824 825 global $ID; 826 827 828 // Log the redirections 829 $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method); 830 831 832 // An http external url ? 833 try { 834 $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl(); 835 } catch (ExceptionBadSyntax|ExceptionBadArgument $e) { 836 $isHttpUrl = false; 837 } 838 839 // If there is a bug in the isValid function for an internal url 840 // We get a loop. 841 // The Url becomes the id, the id is unknown and we do a redirect again 842 // 843 // We check then if the target starts with the base url 844 // if this is the case, it's valid 845 if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) { 846 $isHttpUrl = true; 847 } 848 if ($isHttpUrl) { 849 850 // defend against HTTP Response Splitting 851 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 852 $targetUrl = stripctl($targetIdOrUrl); 853 854 } else { 855 856 857 // Explode the page ID and the anchor (#) 858 $link = explode('#', $targetIdOrUrl, 2); 859 860 $url = UrlEndpoint::createDokuUrl(); 861 862 $urlParams = []; 863 // if this is search engine redirect 864 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 865 $replacementPart = array(':', '_', '-'); 866 $query = str_replace($replacementPart, ' ', $ID); 867 $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION); 868 $url->setQueryParameter("q", $query); 869 } 870 871 /** 872 * Doing a permanent redirect with a added query string 873 * create a new page url on the search engine 874 * 875 * ie 876 * http://host/page 877 * is not the same 878 * than 879 * http://host/page?whatever 880 * 881 * We can't pass query string otherwise, we get 882 * the SEO warning / error 883 * `Alternative page with proper canonical tag` 884 * 885 * Use HTTP X header for debug 886 */ 887 if ($method !== self::REDIRECT_PERMANENT_METHOD) { 888 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID); 889 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin); 890 } 891 892 $id = $link[0]; 893 $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id); 894 if (array_key_exists(1, $link)) { 895 $url->setFragment($link[1]); 896 } 897 $targetUrl = $url->toAbsoluteUrlString(); 898 899 } 900 901 /** 902 * The dokuwiki function {@link send_redirect()} 903 * set the `Location header` and in php, the header function 904 * in this case change the status code to 302 Arghhhh. 905 * The code below is adapted from this function {@link send_redirect()} 906 */ 907 global $MSG; // are there any undisplayed messages? keep them in session for display 908 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 909 //reopen session, store data and close session again 910 @session_start(); 911 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 912 } 913 session_write_close(); // always close the session 914 915 switch ($method) { 916 917 case self::REDIRECT_PERMANENT_METHOD: 918 ExecutionContext::getActualOrCreateFromEnv() 919 ->response() 920 ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT) 921 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 922 ->end(); 923 return true; 924 925 case self::REDIRECT_NOTFOUND_METHOD: 926 927 928 // Empty 404 body to not get the standard 404 page of the browser 929 // but a blank page to avoid a sort of FOUC. 930 // ie the user see a page briefly 931 ExecutionContext::getActualOrCreateFromEnv() 932 ->response() 933 ->setStatus(HttpResponseStatus::NOT_FOUND) 934 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 935 ->setBody(self::PAGE_404, Mime::getHtml()) 936 ->end(); 937 return true; 938 939 default: 940 LogUtility::msg("The method ($method) is not an http redirection"); 941 return false; 942 } 943 944 945 } 946 947 /** 948 * @param $id 949 * @return array 950 */ 951 private 952 function getBestPage($id): array 953 { 954 955 // The return parameters 956 $bestPageId = null; 957 $scorePageName = null; 958 959 // Get Score from a page 960 $pageName = noNS($id); 961 $pagesWithSameName = ft_pageLookup($pageName); 962 if (count($pagesWithSameName) > 0) { 963 964 // Search same namespace in the page found than in the Id page asked. 965 $bestNbWordFound = 0; 966 967 968 $wordsInPageSourceId = explode(':', $id); 969 foreach ($pagesWithSameName as $targetPageId => $title) { 970 971 // Nb of word found in the target page id 972 // that are in the source page id 973 $nbWordFound = 0; 974 foreach ($wordsInPageSourceId as $word) { 975 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 976 } 977 978 if ($bestPageId == null) { 979 980 $bestNbWordFound = $nbWordFound; 981 $bestPageId = $targetPageId; 982 983 } else { 984 985 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 986 987 $bestNbWordFound = $nbWordFound; 988 $bestPageId = $targetPageId; 989 990 } 991 992 } 993 994 } 995 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 996 return array( 997 'id' => $bestPageId, 998 'score' => $scorePageName); 999 } 1000 return array( 1001 'id' => $bestPageId, 1002 'score' => $scorePageName 1003 ); 1004 1005 } 1006 1007 1008 /** 1009 * Redirect to the search engine 1010 */ 1011 private 1012 function redirectToSearchEngine() 1013 { 1014 1015 global $ID; 1016 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 1017 1018 } 1019 1020 1021 /** 1022 * 1023 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 1024 * * For a SQlite database, it will add a row into the log 1025 * 1026 * @param string $sourcePageId 1027 * @param $targetPageId 1028 * @param $algorithmic 1029 * @param $method - http or rewrite 1030 */ 1031 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 1032 { 1033 1034 $row = array( 1035 "TIMESTAMP" => date("c"), 1036 "SOURCE" => $sourcePageId, 1037 "TARGET" => $targetPageId, 1038 "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null, 1039 "TYPE" => $algorithmic, 1040 "METHOD" => $method 1041 ); 1042 $request = Sqlite::createOrGetBackendSqlite() 1043 ->createRequest() 1044 ->setTableRow('redirections_log', $row); 1045 try { 1046 $request 1047 ->execute(); 1048 } catch (ExceptionCompile $e) { 1049 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 1050 } finally { 1051 $request->close(); 1052 } 1053 1054 1055 } 1056 1057 /** 1058 * This function check if there is a redirection declared 1059 * in the redirection table 1060 * @return bool - true if a rewrite or redirection occurs 1061 * @throws Exception 1062 */ 1063 private function processingPageRules(): bool 1064 { 1065 global $ID; 1066 1067 $calculatedTarget = null; 1068 $ruleMatcher = null; // Used in a warning message if the target page does not exist 1069 // Known redirection in the table 1070 // Get the page from redirection data 1071 $rules = $this->pageRules->getRules(); 1072 foreach ($rules as $rule) { 1073 1074 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 1075 $ruleTarget = $rule[PageRules::TARGET_NAME]; 1076 1077 // Glob to Rexgexp 1078 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i'; 1079 1080 // Match ? 1081 // https://www.php.net/manual/en/function.preg-match.php 1082 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 1083 if ($pregMatchResult === false) { 1084 // The `if` to take into account this problem 1085 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1086 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1087 return false; 1088 } 1089 if ($pregMatchResult) { 1090 $calculatedTarget = $ruleTarget; 1091 foreach ($matches as $key => $match) { 1092 if ($key == 0) { 1093 continue; 1094 } else { 1095 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1096 } 1097 } 1098 break; 1099 } 1100 } 1101 1102 if ($calculatedTarget == null) { 1103 return false; 1104 } 1105 1106 // If this is an external redirect (other domain) 1107 try { 1108 $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl(); 1109 } catch (ExceptionBadSyntax $e) { 1110 $isHttpUrl = false; 1111 } 1112 if ($isHttpUrl) { 1113 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1114 return true; 1115 } 1116 1117 // If the page exist 1118 if (page_exists($calculatedTarget)) { 1119 1120 // This is DokuWiki Id and should always be lowercase 1121 // The page rule may have change that 1122 $calculatedTarget = strtolower($calculatedTarget); 1123 $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1124 if ($res) { 1125 return true; 1126 } else { 1127 return false; 1128 } 1129 1130 } else { 1131 1132 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1133 return false; 1134 1135 } 1136 1137 } 1138 1139 private function performNotFoundRedirect(string $targetId, string $origin): bool 1140 { 1141 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1142 } 1143 1144 1145} 1146