1<?php 2 3 4use ComboStrap\DatabasePageRow; 5use ComboStrap\ExceptionBadArgument; 6use ComboStrap\ExceptionBadSyntax; 7use ComboStrap\ExceptionCompile; 8use ComboStrap\ExceptionSqliteNotAvailable; 9use ComboStrap\ExecutionContext; 10use ComboStrap\FileSystems; 11use ComboStrap\HttpResponse; 12use ComboStrap\HttpResponseStatus; 13use ComboStrap\Identity; 14use ComboStrap\LogUtility; 15use ComboStrap\MarkupPath; 16use ComboStrap\Meta\Field\AliasType; 17use ComboStrap\Mime; 18use ComboStrap\PageId; 19use ComboStrap\PageRules; 20use ComboStrap\PageUrlPath; 21use ComboStrap\PageUrlType; 22use ComboStrap\RouterBestEndPage; 23use ComboStrap\Site; 24use ComboStrap\SiteConfig; 25use ComboStrap\Sqlite; 26use ComboStrap\Web\Url; 27use ComboStrap\Web\UrlRewrite; 28use ComboStrap\WikiPath; 29 30require_once(__DIR__ . '/../vendor/autoload.php'); 31 32/** 33 * Class action_plugin_combo_url 34 * 35 * The actual URL manager 36 * 37 * 38 */ 39class action_plugin_combo_router extends DokuWiki_Action_Plugin 40{ 41 42 /** 43 * @deprecated 44 */ 45 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 46 const ROUTER_ENABLE_CONF = "enableRouter"; 47 48 // The redirect type 49 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 50 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 51 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 52 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 53 54 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 55 56 // Where the target id value comes from 57 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 58 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 59 /** 60 * Named Permalink (canonical) 61 */ 62 const TARGET_ORIGIN_CANONICAL = 'canonical'; 63 const TARGET_ORIGIN_ALIAS = 'alias'; 64 /** 65 * Identifier Permalink (full page id) 66 */ 67 const TARGET_ORIGIN_PERMALINK = "permalink"; 68 /** 69 * Extended Permalink (abbreviated page id at the end) 70 */ 71 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 72 const TARGET_ORIGIN_START_PAGE = 'startPage'; 73 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 74 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 75 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 76 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 77 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 78 79 80 // The constant parameters 81 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 82 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 83 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 84 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 85 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 86 const GO_TO_EDIT_MODE = 'GoToEditMode'; 87 const NOTHING = 'Nothing'; 88 89 /** @var string - a name used in log and other places */ 90 const NAME = 'Url Manager'; 91 const CANONICAL = 'router'; 92 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 93 const REFRESH_HEADER_NAME = "Refresh"; 94 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 95 const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": "; 96 public const URL_MANAGER_NAME = "Router"; 97 98 99 /** 100 * @var PageRules 101 */ 102 private $pageRules; 103 104 105 function __construct() 106 { 107 // enable direct access to language strings 108 // ie $this->lang 109 $this->setupLocale(); 110 111 } 112 113 /** 114 * @param string $refreshHeader 115 * @return false|string 116 */ 117 public static function getUrlFromRefresh(string $refreshHeader) 118 { 119 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 120 } 121 122 public static function getUrlFromLocation($refreshHeader) 123 { 124 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 125 } 126 127 /** 128 * @return array|mixed|string|string[] 129 * 130 * Unfortunately, DOKUWIKI_STARTED is not the first event 131 * The id may have been changed by 132 * {@link action_plugin_combo_metalang::load_lang()} 133 * function, that's why we have this function 134 * to get the original requested id 135 */ 136 private static function getOriginalIdFromRequest() 137 { 138 $originalId = $_GET["id"] ?? null; 139 if ($originalId === null) { 140 return null; 141 } 142 return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId); 143 } 144 145 /** 146 * Determine if the request should be banned based on the id 147 * 148 * @param string $id 149 * @return bool 150 * 151 * See also {@link https://perishablepress.com/7g-firewall/#features} 152 * for blocking rules on http request data such as: 153 * * query_string 154 * * user_agent, 155 * * remote host 156 */ 157 public static function isShadowBanned(string $id): bool 158 { 159 /** 160 * ie 161 * wp-json:api:flutter_woo:config_file 162 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 163 * wp-admin 164 * 2020:wp-includes:wlwmanifest.xml 165 * wp-content:start 166 * wp-admin:css:start 167 * sito:wp-includes:wlwmanifest.xml 168 * site:wp-includes:wlwmanifest.xml 169 * cms:wp-includes:wlwmanifest.xml 170 * test:wp-includes:wlwmanifest.xml 171 * media:wp-includes:wlwmanifest.xml 172 * wp2:wp-includes:wlwmanifest.xml 173 * 2019:wp-includes:wlwmanifest.xml 174 * shop:wp-includes:wlwmanifest.xml 175 * wp1:wp-includes:wlwmanifest.xml 176 * news:wp-includes:wlwmanifest.xml 177 * 2018:wp-includes:wlwmanifest.xml 178 */ 179 if (strpos($id, 'wp-') !== false) { 180 return true; 181 } 182 183 /** 184 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 185 * db:oracle:999999.9:union:all:select_null:from_dual 186 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 187 */ 188 if (preg_match('/_chr_|_0_0/', $id) === 1) { 189 return true; 190 } 191 192 193 /** 194 * ie 195 * git:objects: 196 * git:refs:heads:stable 197 * git:logs:refs:heads:main 198 * git:logs:refs:heads:stable 199 * git:hooks:pre-push.sample 200 * git:hooks:pre-receive.sample 201 */ 202 if (strpos($id, "git:") === 0) { 203 return true; 204 } 205 206 return false; 207 208 } 209 210 /** 211 * @param string $id 212 * @return bool 213 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 214 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 215 * well-known:dnt-policy.txt 216 */ 217 public static function isWellKnownFile(string $id): bool 218 { 219 return strpos($id, "well-known") === 0; 220 } 221 222 223 function register(Doku_Event_Handler $controller) 224 { 225 226 if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 227 228 /** 229 * This will call the function {@link action_plugin_combo_router::_router()} 230 * The event is not DOKUWIKI_STARTED because this is not the first one 231 * 232 * https://www.dokuwiki.org/devel:event:init_lang_load 233 */ 234 $controller->register_hook('DOKUWIKI_STARTED', 235 'BEFORE', 236 $this, 237 'router', 238 array()); 239 240 /** 241 * This is the real first call of Dokuwiki 242 * Unfortunately, it does not create the environment 243 * We just ban to spare server resources 244 * 245 * https://www.dokuwiki.org/devel:event:init_lang_load 246 */ 247 $controller->register_hook('INIT_LANG_LOAD', 'BEFORE', $this, 'ban', array()); 248 249 } 250 251 252 } 253 254 /** 255 * 256 * We have created a spacial ban function that is 257 * called before the first function 258 * {@link action_plugin_combo_metalang::load_lang()} 259 * to spare CPU. 260 * 261 * @param $event 262 * @throws Exception 263 */ 264 function ban(&$event) 265 { 266 267 $id = self::getOriginalIdFromRequest(); 268 if ($id === null) { 269 return; 270 } 271 $page = MarkupPath::createMarkupFromId($id); 272 if (!FileSystems::exists($page)) { 273 // Well known 274 if (self::isWellKnownFile($id)) { 275 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 276 ExecutionContext::getActualOrCreateFromEnv() 277 ->response() 278 ->setStatus(HttpResponseStatus::NOT_FOUND) 279 ->end(); 280 return; 281 } 282 283 // Shadow banned 284 if (self::isShadowBanned($id)) { 285 $webSiteHomePage = Site::getIndexPageName(); 286 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 287 } 288 } 289 } 290 291 /** 292 * @param $event Doku_Event 293 * @param $param 294 * @return void 295 * @throws Exception 296 */ 297 function router(&$event, $param) 298 { 299 300 /** 301 * Just the {@link ExecutionContext::SHOW_ACTION} 302 * may be redirected 303 */ 304 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 305 if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) { 306 return; 307 } 308 309 $urlRewrite = Site::getUrlRewrite(); 310 if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) { 311 UrlRewrite::sendErrorMessage(); 312 return; 313 } 314 315 global $ID; 316 317 /** 318 * Without SQLite, this module does not work further 319 */ 320 try { 321 Sqlite::createOrGetSqlite(); 322 } catch (ExceptionSqliteNotAvailable $e) { 323 return; 324 } 325 326 $this->pageRules = new PageRules(); 327 328 329 /** 330 * Unfortunately, DOKUWIKI_STARTED is not the first event 331 * The id may have been changed by 332 * {@link action_plugin_combo_lang::load_lang()} 333 * function, that's why we check against the {@link $_REQUEST} 334 * and not the global ID 335 */ 336 $originalId = self::getOriginalIdFromRequest(); 337 338 /** 339 * Page is an existing id ? 340 */ 341 $requestedMarkupPath = MarkupPath::createMarkupFromId($ID); 342 if (FileSystems::exists($requestedMarkupPath)) { 343 344 /** 345 * If this is not the root home page 346 * and if the canonical id is the not the same, 347 * and if this is not a historical page (revision) 348 * redirect 349 */ 350 if ( 351 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed 352 && $ID != Site::getIndexPageName() 353 && !isset($_REQUEST["rev"]) 354 ) { 355 /** 356 * TODO: When saving for the first time, the page is not stored in the database 357 * but that's not the case actually 358 */ 359 $databasePageRow = $requestedMarkupPath->getDatabasePage(); 360 if ($databasePageRow->exists()) { 361 /** 362 * A move may leave the database in a bad state, 363 * unfortunately (ie page is not in index, unable to update, ...) 364 * We test therefore if the database page id exists 365 */ 366 $targetPageId = $databasePageRow->getFromRow("id"); 367 $targetPath = WikiPath::createMarkupPathFromId($targetPageId); 368 if (FileSystems::exists($targetPath)) { 369 $this->executePermanentRedirect( 370 $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(), 371 self::TARGET_ORIGIN_PERMALINK_EXTENDED 372 ); 373 } 374 } 375 } 376 return; 377 } 378 379 380 $identifier = $ID; 381 382 383 /** 384 * Page Id Website / root Permalink ? 385 */ 386 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension()); 387 if ($shortPageId !== null) { 388 $pageId = PageUrlPath::decodePageId($shortPageId); 389 if ($requestedMarkupPath->getParent() === null && $pageId !== null) { 390 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath(); 391 if ($page !== null && $page->exists()) { 392 $this->executePermanentRedirect( 393 $page->getCanonicalUrl()->toAbsoluteUrlString(), 394 self::TARGET_ORIGIN_PERMALINK 395 ); 396 } 397 } 398 399 /** 400 * Page Id Abbr ? 401 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 402 */ 403 if ( 404 $pageId !== null 405 ) { 406 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath(); 407 if ($page === null) { 408 // or the length of the abbr has changed 409 $canonicalDatabasePage = new DatabasePageRow(); 410 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 411 if ($row !== null) { 412 $canonicalDatabasePage->setRow($row); 413 $page = $canonicalDatabasePage->getMarkupPath(); 414 } 415 } 416 if ($page !== null && $page->exists()) { 417 /** 418 * If the url canonical id has changed, we show it 419 * to the writer by performing a permanent redirect 420 */ 421 if ($identifier != $page->getUrlId()) { 422 // Google asks for a redirect 423 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 424 // People access your site through several different URLs. 425 // If, for example, your home page can be reached in multiple ways 426 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 427 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 428 // and use redirects to send traffic from the other URLs to your preferred URL. 429 $this->executePermanentRedirect( 430 $page->getCanonicalUrl()->toAbsoluteUrlString(), 431 self::TARGET_ORIGIN_PERMALINK_EXTENDED 432 ); 433 return; 434 } 435 436 $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 437 return; 438 439 } 440 // permanent url not yet in the database 441 // Other permanent such as permanent canonical ? 442 // We let the process go with the new identifier 443 444 } 445 446 } 447 448 // Global variable needed in the process 449 global $conf; 450 451 /** 452 * Identifier is a Canonical ? 453 */ 454 $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier); 455 $canonicalPage = $canonicalDatabasePage->getMarkupPath(); 456 if ($canonicalPage !== null && $canonicalPage->exists()) { 457 /** 458 * Does the canonical url is canonical name based 459 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 460 */ 461 if ($canonicalPage->getUrlId() === $identifier) { 462 $res = $this->executeTransparentRedirect( 463 $canonicalPage->getWikiId(), 464 self::TARGET_ORIGIN_CANONICAL 465 ); 466 } else { 467 $res = $this->executePermanentRedirect( 468 $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property 469 self::TARGET_ORIGIN_CANONICAL 470 ); 471 } 472 if ($res) { 473 return; 474 } 475 } 476 477 /** 478 * Identifier is an alias 479 */ 480 $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath(); 481 if ( 482 $aliasRequestedPage !== null 483 && $aliasRequestedPage->exists() 484 // The build alias is the file system metadata alias 485 // it may be null if the replication in the database was not successful 486 && $aliasRequestedPage->getBuildAlias() !== null 487 ) { 488 $buildAlias = $aliasRequestedPage->getBuildAlias(); 489 switch ($buildAlias->getType()) { 490 case AliasType::REDIRECT: 491 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 492 if ($res) { 493 return; 494 } 495 break; 496 case AliasType::SYNONYM: 497 $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS); 498 if ($res) { 499 return; 500 } 501 break; 502 default: 503 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 504 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 505 if ($res) { 506 return; 507 } 508 break; 509 } 510 } 511 512 513 // If there is a redirection defined in the page rules 514 $result = $this->processingPageRules(); 515 if ($result) { 516 // A redirection has occurred 517 // finish the process 518 return; 519 } 520 521 /** 522 * 523 * There was no redirection found, redirect to edit mode if writer 524 * 525 */ 526 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 527 528 $this->gotToEditMode($event); 529 // Stop here 530 return; 531 532 } 533 534 /** 535 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 536 */ 537 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 538 return; 539 } 540 541 // We are reader and their is no redirection set, we apply the algorithm 542 $readerAlgorithms = array(); 543 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 544 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 545 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 546 547 while ( 548 ($algorithm = array_shift($readerAlgorithms)) != null 549 ) { 550 551 switch ($algorithm) { 552 553 case self::NOTHING: 554 return; 555 556 case self::GO_TO_BEST_END_PAGE_NAME: 557 558 /** 559 * @var MarkupPath $bestEndPage 560 */ 561 list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath); 562 if ($bestEndPage != null) { 563 $res = false; 564 switch ($method) { 565 case self::REDIRECT_PERMANENT_METHOD: 566 $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 567 break; 568 case self::REDIRECT_NOTFOUND_METHOD: 569 $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 570 break; 571 default: 572 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 573 } 574 if ($res) { 575 // Redirection has succeeded 576 return; 577 } 578 } 579 break; 580 581 case self::GO_TO_NS_START_PAGE: 582 583 // Start page with the conf['start'] parameter 584 $startPage = getNS($identifier) . ':' . $conf['start']; 585 if (page_exists($startPage)) { 586 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 587 if ($res) { 588 return; 589 } 590 } 591 592 // Start page with the same name than the namespace 593 $startPage = getNS($identifier) . ':' . curNS($identifier); 594 if (page_exists($startPage)) { 595 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 596 if ($res) { 597 return; 598 } 599 } 600 break; 601 602 case self::GO_TO_BEST_PAGE_NAME: 603 604 $bestPageId = null; 605 606 $bestPage = $this->getBestPage($identifier); 607 $bestPageId = $bestPage['id']; 608 $scorePageName = $bestPage['score']; 609 610 // Get Score from a Namespace 611 $bestNamespace = $this->scoreBestNamespace($identifier); 612 $bestNamespaceId = $bestNamespace['namespace']; 613 $namespaceScore = $bestNamespace['score']; 614 615 // Compare the two score 616 if ($scorePageName > 0 or $namespaceScore > 0) { 617 if ($scorePageName > $namespaceScore) { 618 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 619 } else { 620 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 621 } 622 return; 623 } 624 break; 625 626 case self::GO_TO_BEST_NAMESPACE: 627 628 $scoreNamespace = $this->scoreBestNamespace($identifier); 629 $bestNamespaceId = $scoreNamespace['namespace']; 630 $score = $scoreNamespace['score']; 631 632 if ($score > 0) { 633 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 634 return; 635 } 636 break; 637 638 case self::GO_TO_SEARCH_ENGINE: 639 640 $this->redirectToSearchEngine(); 641 642 return; 643 644 // End Switch Action 645 } 646 647 // End While Action 648 } 649 650 651 } 652 653 654 /** 655 * getBestNamespace 656 * Return a list with 'BestNamespaceId Score' 657 * @param $id 658 * @return array 659 */ 660 private 661 function scoreBestNamespace($id) 662 { 663 664 global $conf; 665 666 // Parameters 667 $pageNameSpace = getNS($id); 668 669 // If the page has an existing namespace start page take it, other search other namespace 670 $startPageNameSpace = $pageNameSpace . ":"; 671 $dateAt = ''; 672 // $startPageNameSpace will get a full path (ie with start or the namespace 673 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 674 if (page_exists($startPageNameSpace)) { 675 $nameSpaces = array($startPageNameSpace); 676 } else { 677 $nameSpaces = ft_pageLookup($conf['start']); 678 } 679 680 // Parameters and search the best namespace 681 $pathNames = explode(':', $pageNameSpace); 682 $bestNbWordFound = 0; 683 $bestNamespaceId = ''; 684 foreach ($nameSpaces as $nameSpace) { 685 686 $nbWordFound = 0; 687 foreach ($pathNames as $pathName) { 688 if (strlen($pathName) > 2) { 689 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 690 } 691 } 692 if ($nbWordFound > $bestNbWordFound) { 693 // Take only the smallest namespace 694 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 695 $bestNbWordFound = $nbWordFound; 696 $bestNamespaceId = $nameSpace; 697 } 698 } 699 } 700 701 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 702 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 703 if ($bestNbWordFound > 0) { 704 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 705 } else { 706 $bestNamespaceScore = 0; 707 } 708 709 710 return array( 711 'namespace' => $bestNamespaceId, 712 'score' => $bestNamespaceScore 713 ); 714 715 } 716 717 /** 718 * @param $event 719 */ 720 private 721 function gotToEditMode(&$event) 722 { 723 global $ACT; 724 $ACT = 'edit'; 725 726 } 727 728 729 /** 730 * Redirect to an internal page ie: 731 * * on the same domain 732 * * no HTTP redirect 733 * * id rewrite 734 * @param string $targetPageId - target page id 735 * @param string $targetOriginId - the source of the target (redirect) 736 * @return bool - return true if the user has the permission and that the redirect was done 737 * @throws Exception 738 */ 739 private 740 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 741 { 742 /** 743 * Because we set the ID globally for the ID redirect 744 * we make sure that this is not a {@link MarkupPath} 745 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 746 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 747 */ 748 if (is_object($targetPageId)) { 749 $class = get_class($targetPageId); 750 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 751 } 752 753 if (is_object($targetOriginId)) { 754 $class = get_class($targetOriginId); 755 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 756 } 757 758 // If the user does not have the right to see the target page 759 // don't do anything 760 if (!(Identity::isReader($targetPageId))) { 761 return false; 762 } 763 764 // Change the id 765 global $ID; 766 global $INFO; 767 $sourceId = $ID; 768 $ID = $targetPageId; 769 if (isset($_REQUEST["id"])) { 770 $_REQUEST["id"] = $targetPageId; 771 } 772 if (isset($_GET["id"])) { 773 $_GET["id"] = $targetPageId; 774 } 775 776 /** 777 * Refresh the $INFO data 778 * 779 * the info attributes are used elsewhere 780 * 'id': for the sidebar 781 * 'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()} 782 * 'rev' : for the edit button to be sure that the page is still the same 783 */ 784 $INFO = pageinfo(); 785 786 /** 787 * Not compatible with 788 * https://www.dokuwiki.org/config:send404 is enabled 789 * 790 * This check happens before that dokuwiki is started 791 * and send an header in doku.php 792 * 793 * We send a warning 794 */ 795 global $conf; 796 if ($conf['send404'] == true) { 797 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 798 } 799 800 // Redirection 801 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 802 803 return true; 804 805 } 806 807 private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool 808 { 809 return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 810 } 811 812 /** 813 * The general HTTP Redirect method to an internal page 814 * where the redirection method decide which type of redirection 815 * @param string $targetIdOrUrl - a dokuwiki id or an url 816 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 817 * @param string $method - the redirection method 818 */ 819 private 820 function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool 821 { 822 823 global $ID; 824 825 826 // Log the redirections 827 $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method); 828 829 830 // An http external url ? 831 try { 832 $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl(); 833 } catch (ExceptionBadSyntax|ExceptionBadArgument $e) { 834 $isHttpUrl = false; 835 } 836 837 // If there is a bug in the isValid function for an internal url 838 // We get a loop. 839 // The Url becomes the id, the id is unknown and we do a redirect again 840 // 841 // We check then if the target starts with the base url 842 // if this is the case, it's valid 843 if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) { 844 $isHttpUrl = true; 845 } 846 if ($isHttpUrl) { 847 848 // defend against HTTP Response Splitting 849 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 850 $targetUrl = stripctl($targetIdOrUrl); 851 852 } else { 853 854 855 // Explode the page ID and the anchor (#) 856 $link = explode('#', $targetIdOrUrl, 2); 857 858 859 $urlParams = []; 860 // if this is search engine redirect 861 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 862 $replacementPart = array(':', '_', '-'); 863 $query = str_replace($replacementPart, ' ', $ID); 864 $urlParams["do"] = "search"; 865 $urlParams["q"] = $query; 866 } 867 868 /** 869 * Doing a permanent redirect with a added query string 870 * create a new page url on the search engine 871 * 872 * ie 873 * http://host/page 874 * is not the same 875 * than 876 * http://host/page?whatever 877 * 878 * We can't pass query string otherwise, we get 879 * the error 880 * `Alternative page with proper canonical tag` 881 */ 882 if ($method !== self::REDIRECT_PERMANENT_METHOD) { 883 $urlParams[action_plugin_combo_routermessage::ORIGIN_PAGE] = $ID; 884 $urlParams[action_plugin_combo_routermessage::ORIGIN_TYPE] = $targetOrigin; 885 } 886 887 $targetUrl = wl($link[0], $urlParams, true, '&'); 888 // %3A back to : 889 $targetUrl = str_replace("%3A", ":", $targetUrl); 890 if (array_key_exists(1, $link)) { 891 $targetUrl .= '#' . rawurlencode($link[1]); 892 } 893 894 } 895 896 /** 897 * The dokuwiki function {@link send_redirect()} 898 * set the `Location header` and in php, the header function 899 * in this case change the status code to 302 Arghhhh. 900 * The code below is adapted from this function {@link send_redirect()} 901 */ 902 global $MSG; // are there any undisplayed messages? keep them in session for display 903 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 904 //reopen session, store data and close session again 905 @session_start(); 906 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 907 } 908 session_write_close(); // always close the session 909 910 switch ($method) { 911 912 case self::REDIRECT_PERMANENT_METHOD: 913 ExecutionContext::getActualOrCreateFromEnv() 914 ->response() 915 ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT) 916 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 917 ->end(); 918 return true; 919 920 case self::REDIRECT_NOTFOUND_METHOD: 921 922 923 // Empty 404 body to not get the standard 404 page of the browser 924 // but a blank page to avoid a sort of FOUC. 925 // ie the user see a page briefly 926 ExecutionContext::getActualOrCreateFromEnv() 927 ->response() 928 ->setStatus(HttpResponseStatus::NOT_FOUND) 929 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 930 ->setBody(self::PAGE_404, Mime::getHtml()) 931 ->end(); 932 return true; 933 934 default: 935 LogUtility::msg("The method ($method) is not an http redirection"); 936 return false; 937 } 938 939 940 } 941 942 /** 943 * @param $id 944 * @return array 945 */ 946 private 947 function getBestPage($id): array 948 { 949 950 // The return parameters 951 $bestPageId = null; 952 $scorePageName = null; 953 954 // Get Score from a page 955 $pageName = noNS($id); 956 $pagesWithSameName = ft_pageLookup($pageName); 957 if (count($pagesWithSameName) > 0) { 958 959 // Search same namespace in the page found than in the Id page asked. 960 $bestNbWordFound = 0; 961 962 963 $wordsInPageSourceId = explode(':', $id); 964 foreach ($pagesWithSameName as $targetPageId => $title) { 965 966 // Nb of word found in the target page id 967 // that are in the source page id 968 $nbWordFound = 0; 969 foreach ($wordsInPageSourceId as $word) { 970 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 971 } 972 973 if ($bestPageId == null) { 974 975 $bestNbWordFound = $nbWordFound; 976 $bestPageId = $targetPageId; 977 978 } else { 979 980 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 981 982 $bestNbWordFound = $nbWordFound; 983 $bestPageId = $targetPageId; 984 985 } 986 987 } 988 989 } 990 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 991 return array( 992 'id' => $bestPageId, 993 'score' => $scorePageName); 994 } 995 return array( 996 'id' => $bestPageId, 997 'score' => $scorePageName 998 ); 999 1000 } 1001 1002 1003 /** 1004 * Redirect to the search engine 1005 */ 1006 private 1007 function redirectToSearchEngine() 1008 { 1009 1010 global $ID; 1011 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 1012 1013 } 1014 1015 1016 /** 1017 * 1018 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 1019 * * For a SQlite database, it will add a row into the log 1020 * 1021 * @param string $sourcePageId 1022 * @param $targetPageId 1023 * @param $algorithmic 1024 * @param $method - http or rewrite 1025 */ 1026 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 1027 { 1028 1029 $row = array( 1030 "TIMESTAMP" => date("c"), 1031 "SOURCE" => $sourcePageId, 1032 "TARGET" => $targetPageId, 1033 "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null, 1034 "TYPE" => $algorithmic, 1035 "METHOD" => $method 1036 ); 1037 $request = Sqlite::createOrGetBackendSqlite() 1038 ->createRequest() 1039 ->setTableRow('redirections_log', $row); 1040 try { 1041 $request 1042 ->execute(); 1043 } catch (ExceptionCompile $e) { 1044 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 1045 } finally { 1046 $request->close(); 1047 } 1048 1049 1050 } 1051 1052 /** 1053 * This function check if there is a redirection declared 1054 * in the redirection table 1055 * @return bool - true if a rewrite or redirection occurs 1056 * @throws Exception 1057 */ 1058 private function processingPageRules(): bool 1059 { 1060 global $ID; 1061 1062 $calculatedTarget = null; 1063 $ruleMatcher = null; // Used in a warning message if the target page does not exist 1064 // Known redirection in the table 1065 // Get the page from redirection data 1066 $rules = $this->pageRules->getRules(); 1067 foreach ($rules as $rule) { 1068 1069 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 1070 $ruleTarget = $rule[PageRules::TARGET_NAME]; 1071 1072 // Glob to Rexgexp 1073 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i'; 1074 1075 // Match ? 1076 // https://www.php.net/manual/en/function.preg-match.php 1077 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 1078 if ($pregMatchResult === false) { 1079 // The `if` to take into account this problem 1080 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1081 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1082 return false; 1083 } 1084 if ($pregMatchResult) { 1085 $calculatedTarget = $ruleTarget; 1086 foreach ($matches as $key => $match) { 1087 if ($key == 0) { 1088 continue; 1089 } else { 1090 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1091 } 1092 } 1093 break; 1094 } 1095 } 1096 1097 if ($calculatedTarget == null) { 1098 return false; 1099 } 1100 1101 // If this is an external redirect (other domain) 1102 try { 1103 $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl(); 1104 } catch (ExceptionBadSyntax $e) { 1105 $isHttpUrl = false; 1106 } 1107 if ($isHttpUrl) { 1108 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1109 return true; 1110 } 1111 1112 // If the page exist 1113 if (page_exists($calculatedTarget)) { 1114 1115 // This is DokuWiki Id and should always be lowercase 1116 // The page rule may have change that 1117 $calculatedTarget = strtolower($calculatedTarget); 1118 $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1119 if ($res) { 1120 return true; 1121 } else { 1122 return false; 1123 } 1124 1125 } else { 1126 1127 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1128 return false; 1129 1130 } 1131 1132 } 1133 1134 private function performNotFoundRedirect(string $targetId, string $origin): bool 1135 { 1136 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1137 } 1138 1139 1140} 1141