1<?php 2 3 4use ComboStrap\DatabasePageRow; 5use ComboStrap\DokuwikiId; 6use ComboStrap\ExceptionBadArgument; 7use ComboStrap\ExceptionBadSyntax; 8use ComboStrap\ExceptionCompile; 9use ComboStrap\ExceptionNotFound; 10use ComboStrap\ExceptionSqliteNotAvailable; 11use ComboStrap\ExecutionContext; 12use ComboStrap\FileSystems; 13use ComboStrap\HttpResponse; 14use ComboStrap\HttpResponseStatus; 15use ComboStrap\Identity; 16use ComboStrap\LogUtility; 17use ComboStrap\MarkupPath; 18use ComboStrap\Meta\Field\AliasType; 19use ComboStrap\Mime; 20use ComboStrap\PageId; 21use ComboStrap\PageRules; 22use ComboStrap\PageUrlPath; 23use ComboStrap\PageUrlType; 24use ComboStrap\RouterBestEndPage; 25use ComboStrap\Site; 26use ComboStrap\SiteConfig; 27use ComboStrap\Sqlite; 28use ComboStrap\Web\Url; 29use ComboStrap\Web\UrlEndpoint; 30use ComboStrap\Web\UrlRewrite; 31use ComboStrap\WikiPath; 32 33require_once(__DIR__ . '/../vendor/autoload.php'); 34 35/** 36 * Class action_plugin_combo_url 37 * 38 * The actual URL manager 39 * 40 * 41 */ 42class action_plugin_combo_router extends DokuWiki_Action_Plugin 43{ 44 45 /** 46 * @deprecated 47 */ 48 const URL_MANAGER_ENABLE_CONF = "enableUrlManager"; 49 const ROUTER_ENABLE_CONF = "enableRouter"; 50 51 // The redirect type 52 const REDIRECT_TRANSPARENT_METHOD = 'transparent'; // was (Id) 53 // For permanent, see https://developers.google.com/search/docs/advanced/crawling/301-redirects 54 const REDIRECT_PERMANENT_METHOD = 'permanent'; // was `Http` (301) 55 const REDIRECT_NOTFOUND_METHOD = "notfound"; // 404 (See other) (when best page name is calculated) 56 57 public const PERMANENT_REDIRECT_CANONICAL = "permanent:redirect"; 58 59 // Where the target id value comes from 60 const TARGET_ORIGIN_WELL_KNOWN = 'well-known'; 61 const TARGET_ORIGIN_PAGE_RULES = 'pageRules'; 62 /** 63 * Named Permalink (canonical) 64 */ 65 const TARGET_ORIGIN_CANONICAL = 'canonical'; 66 const TARGET_ORIGIN_ALIAS = 'alias'; 67 /** 68 * Identifier Permalink (full page id) 69 */ 70 const TARGET_ORIGIN_PERMALINK = "permalink"; 71 /** 72 * Extended Permalink (abbreviated page id at the end) 73 */ 74 const TARGET_ORIGIN_PERMALINK_EXTENDED = "extendedPermalink"; 75 const TARGET_ORIGIN_START_PAGE = 'startPage'; 76 const TARGET_ORIGIN_BEST_PAGE_NAME = 'bestPageName'; 77 const TARGET_ORIGIN_BEST_NAMESPACE = 'bestNamespace'; 78 const TARGET_ORIGIN_SEARCH_ENGINE = 'searchEngine'; 79 const TARGET_ORIGIN_BEST_END_PAGE_NAME = 'bestEndPageName'; 80 const TARGET_ORIGIN_SHADOW_BANNED = "shadowBanned"; 81 82 83 // The constant parameters 84 const GO_TO_SEARCH_ENGINE = 'GoToSearchEngine'; 85 const GO_TO_BEST_NAMESPACE = 'GoToBestNamespace'; 86 const GO_TO_BEST_PAGE_NAME = 'GoToBestPageName'; 87 const GO_TO_BEST_END_PAGE_NAME = 'GoToBestEndPageName'; 88 const GO_TO_NS_START_PAGE = 'GoToNsStartPage'; 89 const GO_TO_EDIT_MODE = 'GoToEditMode'; 90 const NOTHING = 'Nothing'; 91 92 /** @var string - a name used in log and other places */ 93 const NAME = 'Url Manager'; 94 const CANONICAL = 'router'; 95 const PAGE_404 = "<html lang=\"en\"><body></body></html>"; 96 const REFRESH_HEADER_NAME = "Refresh"; 97 const REFRESH_HEADER_PREFIX = self::REFRESH_HEADER_NAME . ': 0;url='; 98 const LOCATION_HEADER_PREFIX = HttpResponse::LOCATION_HEADER_NAME . ": "; 99 public const URL_MANAGER_NAME = "Router"; 100 101 102 /** 103 * @var PageRules 104 */ 105 private $pageRules; 106 107 108 function __construct() 109 { 110 // enable direct access to language strings 111 // ie $this->lang 112 $this->setupLocale(); 113 114 } 115 116 /** 117 * @param string $refreshHeader 118 * @return false|string 119 */ 120 public static function getUrlFromRefresh(string $refreshHeader) 121 { 122 return substr($refreshHeader, strlen(action_plugin_combo_router::REFRESH_HEADER_PREFIX)); 123 } 124 125 public static function getUrlFromLocation($refreshHeader) 126 { 127 return substr($refreshHeader, strlen(action_plugin_combo_router::LOCATION_HEADER_PREFIX)); 128 } 129 130 /** 131 * @return string|null 132 * 133 * Return the original id from the request 134 * ie `howto:how-to-get-started-with-combostrap-m3i8vga8` 135 * if `/howto/how-to-get-started-with-combostrap-m3i8vga8` 136 * 137 * Unfortunately, DOKUWIKI_STARTED is not the first event 138 * The id may have been changed by 139 * {@link action_plugin_combo_lang::load_lang()} 140 * function, that's why we have this function 141 * to get the original requested id 142 */ 143 private static function getOriginalIdFromRequest(): ?string 144 { 145 $originalId = $_GET["id"] ?? null; 146 if ($originalId === null) { 147 return null; 148 } 149 // We get a `/` as first character 150 // because we return an id, we need to delete it 151 $originalId = substr($originalId, 1); 152 // transform / to : 153 return str_replace("/", WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $originalId); 154 } 155 156 /** 157 * Determine if the request should be banned based on the id 158 * 159 * @param string $id 160 * @return bool 161 * 162 * See also {@link https://perishablepress.com/7g-firewall/#features} 163 * for blocking rules on http request data such as: 164 * * query_string 165 * * user_agent, 166 * * remote host 167 */ 168 public static function isShadowBanned(string $id): bool 169 { 170 /** 171 * ie 172 * wp-json:api:flutter_woo:config_file 173 * wp-content:plugins:wpdiscuz:themes:default:style-rtl.css 174 * wp-admin 175 * 2020:wp-includes:wlwmanifest.xml 176 * wp-content:start 177 * wp-admin:css:start 178 * sito:wp-includes:wlwmanifest.xml 179 * site:wp-includes:wlwmanifest.xml 180 * cms:wp-includes:wlwmanifest.xml 181 * test:wp-includes:wlwmanifest.xml 182 * media:wp-includes:wlwmanifest.xml 183 * wp2:wp-includes:wlwmanifest.xml 184 * 2019:wp-includes:wlwmanifest.xml 185 * shop:wp-includes:wlwmanifest.xml 186 * wp1:wp-includes:wlwmanifest.xml 187 * news:wp-includes:wlwmanifest.xml 188 * 2018:wp-includes:wlwmanifest.xml 189 */ 190 if (strpos($id, 'wp-') !== false) { 191 return true; 192 } 193 194 /** 195 * db:oracle:long_or_1_utl_inaddr.get_host_address_chr_33_chr_126_chr_33_chr_65_chr_66_chr_67_chr_49_chr_52_chr_53_chr_90_chr_81_chr_54_chr_50_chr_68_chr_87_chr_81_chr_65_chr_70_chr_80_chr_79_chr_73_chr_89_chr_67_chr_70_chr_68_chr_33_chr_126_chr_33 196 * db:oracle:999999.9:union:all:select_null:from_dual 197 * db:oracle:999999.9:union:all:select_null:from_dual_and_0_0 198 */ 199 if (preg_match('/_chr_|_0_0/', $id) === 1) { 200 return true; 201 } 202 203 204 /** 205 * ie 206 * git:objects: 207 * git:refs:heads:stable 208 * git:logs:refs:heads:main 209 * git:logs:refs:heads:stable 210 * git:hooks:pre-push.sample 211 * git:hooks:pre-receive.sample 212 */ 213 if (strpos($id, "git:") === 0) { 214 return true; 215 } 216 217 return false; 218 219 } 220 221 /** 222 * @param string $id 223 * @return bool 224 * well-known:traffic-advice = https://github.com/buettner/private-prefetch-proxy/blob/main/traffic-advice.md 225 * .well-known/security.txt, id=well-known:security.txt = https://securitytxt.org/ 226 * well-known:dnt-policy.txt 227 */ 228 public static function isWellKnownFile(string $id): bool 229 { 230 return strpos($id, "well-known") === 0; 231 } 232 233 234 function register(Doku_Event_Handler $controller) 235 { 236 237 if (SiteConfig::getConfValue(self::ROUTER_ENABLE_CONF, 1)) { 238 239 /** 240 * This will call the function {@link action_plugin_combo_router::_router()} 241 * The event is not DOKUWIKI_STARTED because this is not the first one 242 * 243 * https://www.dokuwiki.org/devel:event:init_lang_load 244 */ 245 $controller->register_hook('DOKUWIKI_STARTED', 246 'BEFORE', 247 $this, 248 'router', 249 array()); 250 251 /** 252 * Bot Ban functionality 253 * 254 * Because we make a redirection to the home page, we need to check 255 * if the home is readable, for that, the AUTH plugin needs to be initialized 256 * That's why we wait 257 * https://www.dokuwiki.org/devel:event:dokuwiki_init_done 258 * 259 * and we can't use 260 * https://www.dokuwiki.org/devel:event:init_lang_load 261 * because there is no auth setup in {@link auth_aclcheck_cb()} 262 * and the the line `if (!$auth instanceof AuthPlugin) return AUTH_NONE;` return none; 263 */ 264 $controller->register_hook('DOKUWIKI_INIT_DONE', 'BEFORE', $this, 'ban', array()); 265 266 } 267 268 269 } 270 271 /** 272 * 273 * We have created a spacial ban function that is 274 * called before the first function 275 * {@link action_plugin_combo_metalang::load_lang()} 276 * to spare CPU. 277 * 278 * @param $event 279 * @throws Exception 280 */ 281 function ban(&$event) 282 { 283 284 $id = self::getOriginalIdFromRequest(); 285 if ($id === null) { 286 return; 287 } 288 $page = MarkupPath::createMarkupFromId($id); 289 if (FileSystems::exists($page)) { 290 return; 291 } 292 293 // Well known 294 if (self::isWellKnownFile($id)) { 295 $this->logRedirection($id, "", self::TARGET_ORIGIN_WELL_KNOWN, self::REDIRECT_NOTFOUND_METHOD); 296 ExecutionContext::getActualOrCreateFromEnv() 297 ->response() 298 ->setStatus(HttpResponseStatus::NOT_FOUND) 299 ->end(); 300 return; 301 } 302 303 // Shadow banned 304 if (self::isShadowBanned($id)) { 305 $webSiteHomePage = Site::getIndexPageName(); 306 $this->executeTransparentRedirect($webSiteHomePage, self::TARGET_ORIGIN_SHADOW_BANNED); 307 } 308 309 } 310 311 /** 312 * @param $event Doku_Event 313 * @param $param 314 * @return void 315 * @throws Exception 316 */ 317 function router(&$event, $param) 318 { 319 320 /** 321 * Just the {@link ExecutionContext::SHOW_ACTION} 322 * may be redirected 323 */ 324 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 325 if ($executionContext->getExecutingAction() !== ExecutionContext::SHOW_ACTION) { 326 return; 327 } 328 329 $urlRewrite = Site::getUrlRewrite(); 330 if ($urlRewrite == UrlRewrite::VALUE_DOKU_REWRITE) { 331 UrlRewrite::sendErrorMessage(); 332 return; 333 } 334 335 global $ID; 336 337 /** 338 * Without SQLite, this module does not work further 339 */ 340 try { 341 Sqlite::createOrGetSqlite(); 342 } catch (ExceptionSqliteNotAvailable $e) { 343 return; 344 } 345 346 $this->pageRules = new PageRules(); 347 348 349 /** 350 * Unfortunately, DOKUWIKI_STARTED is not the first event 351 * The id may have been changed by 352 * {@link action_plugin_combo_lang::load_lang()} 353 * function, that's why we check against the {@link $_REQUEST} 354 * and not the global ID 355 */ 356 $originalId = self::getOriginalIdFromRequest(); 357 358 /** 359 * Page is an existing id ? 360 */ 361 $requestedMarkupPath = MarkupPath::createMarkupFromId($ID); 362 if (FileSystems::exists($requestedMarkupPath)) { 363 364 /** 365 * If this is not the root home page 366 * and if the canonical id is the not the same (the id has changed) 367 * and if this is not a historical page (revision) 368 * redirect 369 */ 370 if ( 371 $originalId !== $requestedMarkupPath->getUrlId() // The id may have been changed 372 && $ID != Site::getIndexPageName() 373 && !isset($_REQUEST["rev"]) 374 ) { 375 /** 376 * TODO: When saving for the first time, the page is not stored in the database 377 * but that's not the case actually 378 */ 379 $databasePageRow = $requestedMarkupPath->getDatabasePage(); 380 if ($databasePageRow->exists()) { 381 /** 382 * A move may leave the database in a bad state, 383 * unfortunately (ie page is not in index, unable to update, ...) 384 * We test therefore if the database page id exists 385 */ 386 $targetPageId = $databasePageRow->getFromRow("id"); 387 $targetPath = WikiPath::createMarkupPathFromId($targetPageId); 388 if (FileSystems::exists($targetPath)) { 389 $this->executePermanentRedirect( 390 $requestedMarkupPath->getCanonicalUrl()->toAbsoluteUrlString(), 391 self::TARGET_ORIGIN_PERMALINK_EXTENDED 392 ); 393 } 394 } 395 } 396 return; 397 } 398 399 400 $identifier = $ID; 401 402 403 /** 404 * Page Id in the url 405 */ 406 $shortPageId = PageUrlPath::getShortEncodedPageIdFromUrlId($requestedMarkupPath->getPathObject()->getLastNameWithoutExtension()); 407 if ($shortPageId != null) { 408 $pageId = PageUrlPath::decodePageId($shortPageId); 409 } else { 410 /** 411 * Permalink with id 412 */ 413 $pageId = PageUrlPath::decodePageId($identifier); 414 } 415 if ($pageId !== null) { 416 417 if ($requestedMarkupPath->getParent() === null) { 418 $page = DatabasePageRow::createFromPageId($pageId)->getMarkupPath(); 419 if ($page !== null && $page->exists()) { 420 $this->executePermanentRedirect( 421 $page->getCanonicalUrl()->toAbsoluteUrlString(), 422 self::TARGET_ORIGIN_PERMALINK 423 ); 424 return; 425 } 426 } 427 428 /** 429 * Page Id Abbr ? 430 * {@link PageUrlType::CONF_CANONICAL_URL_TYPE} 431 */ 432 $page = DatabasePageRow::createFromPageIdAbbr($pageId)->getMarkupPath(); 433 if ($page === null) { 434 // or the length of the abbr has changed 435 $canonicalDatabasePage = new DatabasePageRow(); 436 try { 437 $row = $canonicalDatabasePage->getDatabaseRowFromAttribute("substr(" . PageId::PROPERTY_NAME . ", 1, " . strlen($pageId) . ")", $pageId); 438 $canonicalDatabasePage->setRow($row); 439 $page = $canonicalDatabasePage->getMarkupPath(); 440 } catch (ExceptionNotFound $e) { 441 // nothing to do 442 } 443 } 444 if ($page !== null && $page->exists()) { 445 /** 446 * If the url canonical id has changed, we show it 447 * to the writer by performing a permanent redirect 448 */ 449 if ($identifier != $page->getUrlId()) { 450 // Google asks for a redirect 451 // https://developers.google.com/search/docs/advanced/crawling/301-redirects 452 // People access your site through several different URLs. 453 // If, for example, your home page can be reached in multiple ways 454 // (for instance, http://example.com/home, http://home.example.com, or http://www.example.com), 455 // it's a good idea to pick one of those URLs as your preferred (canonical) destination, 456 // and use redirects to send traffic from the other URLs to your preferred URL. 457 $this->executePermanentRedirect( 458 $page->getCanonicalUrl()->toAbsoluteUrlString(), 459 self::TARGET_ORIGIN_PERMALINK_EXTENDED 460 ); 461 return; 462 } 463 464 $this->executeTransparentRedirect($page->getWikiId(), self::TARGET_ORIGIN_PERMALINK_EXTENDED); 465 return; 466 467 } 468 // permanent url not yet in the database 469 // Other permanent such as permanent canonical ? 470 // We let the process go with the new identifier 471 472 } 473 474 // Global variable needed in the process 475 global $conf; 476 477 /** 478 * Identifier is a Canonical ? 479 */ 480 $canonicalDatabasePage = DatabasePageRow::createFromCanonical($identifier); 481 $canonicalPage = $canonicalDatabasePage->getMarkupPath(); 482 if ($canonicalPage !== null && $canonicalPage->exists()) { 483 /** 484 * Does the canonical url is canonical name based 485 * ie {@link PageUrlType::CONF_VALUE_CANONICAL_PATH} 486 */ 487 if ($canonicalPage->getUrlId() === $identifier) { 488 $res = $this->executeTransparentRedirect( 489 $canonicalPage->getWikiId(), 490 self::TARGET_ORIGIN_CANONICAL 491 ); 492 } else { 493 $res = $this->executePermanentRedirect( 494 $canonicalPage->getWikiId(), // not the url because, it allows to add url query redirection property 495 self::TARGET_ORIGIN_CANONICAL 496 ); 497 } 498 if ($res) { 499 return; 500 } 501 } 502 503 /** 504 * Identifier is an alias 505 */ 506 $aliasRequestedPage = DatabasePageRow::createFromAlias($identifier)->getMarkupPath(); 507 if ( 508 $aliasRequestedPage !== null 509 && $aliasRequestedPage->exists() 510 // The build alias is the file system metadata alias 511 // it may be null if the replication in the database was not successful 512 && $aliasRequestedPage->getBuildAlias() !== null 513 ) { 514 $buildAlias = $aliasRequestedPage->getBuildAlias(); 515 switch ($buildAlias->getType()) { 516 case AliasType::REDIRECT: 517 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 518 if ($res) { 519 return; 520 } 521 break; 522 case AliasType::SYNONYM: 523 $res = $this->executeTransparentRedirect($aliasRequestedPage->getWikiId(), self::TARGET_ORIGIN_ALIAS); 524 if ($res) { 525 return; 526 } 527 break; 528 default: 529 LogUtility::msg("The alias type ({$buildAlias->getType()}) is unknown. A permanent redirect was performed for the alias $identifier"); 530 $res = $this->executePermanentRedirect($aliasRequestedPage->getCanonicalUrl()->toAbsoluteUrlString(), self::TARGET_ORIGIN_ALIAS); 531 if ($res) { 532 return; 533 } 534 break; 535 } 536 } 537 538 539 // If there is a redirection defined in the page rules 540 $result = $this->processingPageRules(); 541 if ($result) { 542 // A redirection has occurred 543 // finish the process 544 return; 545 } 546 547 /** 548 * 549 * There was no redirection found, redirect to edit mode if writer 550 * 551 */ 552 if (Identity::isWriter() && $this->getConf(self::GO_TO_EDIT_MODE) == 1) { 553 554 $this->gotToEditMode($event); 555 // Stop here 556 return; 557 558 } 559 560 /** 561 * We are still a reader, the redirection does not exist the user is not allowed to edit the page (public of other) 562 */ 563 if ($this->getConf('ActionReaderFirst') == self::NOTHING) { 564 return; 565 } 566 567 // We are reader and their is no redirection set, we apply the algorithm 568 $readerAlgorithms = array(); 569 $readerAlgorithms[0] = $this->getConf('ActionReaderFirst'); 570 $readerAlgorithms[1] = $this->getConf('ActionReaderSecond'); 571 $readerAlgorithms[2] = $this->getConf('ActionReaderThird'); 572 573 while ( 574 ($algorithm = array_shift($readerAlgorithms)) != null 575 ) { 576 577 switch ($algorithm) { 578 579 case self::NOTHING: 580 return; 581 582 case self::GO_TO_BEST_END_PAGE_NAME: 583 584 /** 585 * @var MarkupPath $bestEndPage 586 */ 587 list($bestEndPage, $method) = RouterBestEndPage::process($requestedMarkupPath); 588 if ($bestEndPage != null && $bestEndPage->getWikiId() !== $requestedMarkupPath->getWikiId()) { 589 $res = false; 590 switch ($method) { 591 case self::REDIRECT_PERMANENT_METHOD: 592 $res = $this->executePermanentRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 593 break; 594 case self::REDIRECT_NOTFOUND_METHOD: 595 $res = $this->performNotFoundRedirect($bestEndPage->getWikiId(), self::TARGET_ORIGIN_BEST_END_PAGE_NAME); 596 break; 597 default: 598 LogUtility::msg("This redirection method ($method) was not expected for the redirection algorithm ($algorithm)"); 599 } 600 if ($res) { 601 // Redirection has succeeded 602 return; 603 } 604 } 605 break; 606 607 case self::GO_TO_NS_START_PAGE: 608 609 // Start page with the conf['start'] parameter 610 $startPage = getNS($identifier) . ':' . $conf['start']; 611 if (page_exists($startPage)) { 612 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 613 if ($res) { 614 return; 615 } 616 } 617 618 // Start page with the same name than the namespace 619 $startPage = getNS($identifier) . ':' . curNS($identifier); 620 if (page_exists($startPage)) { 621 $res = $this->performNotFoundRedirect($startPage, self::TARGET_ORIGIN_START_PAGE); 622 if ($res) { 623 return; 624 } 625 } 626 break; 627 628 case self::GO_TO_BEST_PAGE_NAME: 629 630 $bestPageId = null; 631 632 $bestPage = $this->getBestPage($identifier); 633 $bestPageId = $bestPage['id']; 634 $scorePageName = $bestPage['score']; 635 636 // Get Score from a Namespace 637 $bestNamespace = $this->scoreBestNamespace($identifier); 638 $bestNamespaceId = $bestNamespace['namespace']; 639 $namespaceScore = $bestNamespace['score']; 640 641 // Compare the two score 642 if ($scorePageName > 0 or $namespaceScore > 0) { 643 if ($scorePageName > $namespaceScore) { 644 $this->performNotFoundRedirect($bestPageId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 645 } else { 646 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_PAGE_NAME); 647 } 648 return; 649 } 650 break; 651 652 case self::GO_TO_BEST_NAMESPACE: 653 654 $scoreNamespace = $this->scoreBestNamespace($identifier); 655 $bestNamespaceId = $scoreNamespace['namespace']; 656 $score = $scoreNamespace['score']; 657 658 if ($score > 0) { 659 $this->performNotFoundRedirect($bestNamespaceId, self::TARGET_ORIGIN_BEST_NAMESPACE); 660 return; 661 } 662 break; 663 664 case self::GO_TO_SEARCH_ENGINE: 665 666 $this->redirectToSearchEngine(); 667 668 return; 669 670 // End Switch Action 671 } 672 673 // End While Action 674 } 675 676 677 } 678 679 680 /** 681 * getBestNamespace 682 * Return a list with 'BestNamespaceId Score' 683 * @param $id 684 * @return array 685 */ 686 private 687 function scoreBestNamespace($id) 688 { 689 690 global $conf; 691 692 // Parameters 693 $pageNameSpace = getNS($id); 694 695 // If the page has an existing namespace start page take it, other search other namespace 696 $startPageNameSpace = $pageNameSpace . ":"; 697 $dateAt = ''; 698 // $startPageNameSpace will get a full path (ie with start or the namespace 699 resolve_pageid($pageNameSpace, $startPageNameSpace, $exists, $dateAt, true); 700 if (page_exists($startPageNameSpace)) { 701 $nameSpaces = array($startPageNameSpace); 702 } else { 703 $nameSpaces = ft_pageLookup($conf['start']); 704 } 705 706 // Parameters and search the best namespace 707 $pathNames = explode(':', $pageNameSpace); 708 $bestNbWordFound = 0; 709 $bestNamespaceId = ''; 710 foreach ($nameSpaces as $nameSpace) { 711 712 $nbWordFound = 0; 713 foreach ($pathNames as $pathName) { 714 if (strlen($pathName) > 2) { 715 $nbWordFound = $nbWordFound + substr_count($nameSpace, $pathName); 716 } 717 } 718 if ($nbWordFound > $bestNbWordFound) { 719 // Take only the smallest namespace 720 if (strlen($nameSpace) < strlen($bestNamespaceId) or $nbWordFound > $bestNbWordFound) { 721 $bestNbWordFound = $nbWordFound; 722 $bestNamespaceId = $nameSpace; 723 } 724 } 725 } 726 727 $startPageFactor = $this->getConf('WeightFactorForStartPage'); 728 $nameSpaceFactor = $this->getConf('WeightFactorForSameNamespace'); 729 if ($bestNbWordFound > 0) { 730 $bestNamespaceScore = $bestNbWordFound * $nameSpaceFactor + $startPageFactor; 731 } else { 732 $bestNamespaceScore = 0; 733 } 734 735 736 return array( 737 'namespace' => $bestNamespaceId, 738 'score' => $bestNamespaceScore 739 ); 740 741 } 742 743 /** 744 * @param $event 745 */ 746 private 747 function gotToEditMode(&$event) 748 { 749 global $ACT; 750 $ACT = 'edit'; 751 752 } 753 754 755 /** 756 * Redirect to an internal page ie: 757 * * on the same domain 758 * * no HTTP redirect 759 * * id rewrite 760 * @param string $targetPageId - target page id 761 * @param string $targetOriginId - the source of the target (redirect) 762 * @return bool - return true if the user has the permission and that the redirect was done 763 * @throws Exception 764 */ 765 private 766 function executeTransparentRedirect(string $targetPageId, string $targetOriginId): bool 767 { 768 /** 769 * Because we set the ID globally for the ID redirect 770 * we make sure that this is not a {@link MarkupPath} 771 * object otherwise we got an error in the {@link \ComboStrap\AnalyticsMenuItem} 772 * because the constructor takes it {@link \dokuwiki\Menu\Item\AbstractItem} 773 */ 774 if (is_object($targetPageId)) { 775 $class = get_class($targetPageId); 776 LogUtility::msg("The parameters targetPageId ($targetPageId) is an object of the class ($class) and it should be a page id"); 777 } 778 779 if (is_object($targetOriginId)) { 780 $class = get_class($targetOriginId); 781 LogUtility::msg("The parameters targetOriginId ($targetOriginId) is an object of the class ($class) and it should be a page id"); 782 } 783 784 // If the user does not have the right to see the target page 785 // don't do anything 786 if (!(Identity::isReader($targetPageId))) { 787 return false; 788 } 789 790 // Change the id 791 global $ID; 792 global $INFO; 793 $sourceId = $ID; 794 $ID = $targetPageId; 795 if (isset($_REQUEST["id"])) { 796 $_REQUEST["id"] = $targetPageId; 797 } 798 if (isset($_GET["id"])) { 799 $_GET["id"] = $targetPageId; 800 } 801 802 /** 803 * Refresh the $INFO data 804 * 805 * the info attributes are used elsewhere 806 * 'id': for the sidebar 807 * 'exist' : for the meta robot = noindex,follow, see {@link tpl_metaheaders()} 808 * 'rev' : for the edit button to be sure that the page is still the same 809 */ 810 $INFO = pageinfo(); 811 812 /** 813 * Not compatible with 814 * https://www.dokuwiki.org/config:send404 is enabled 815 * 816 * This check happens before that dokuwiki is started 817 * and send an header in doku.php 818 * 819 * We send a warning 820 */ 821 global $conf; 822 if ($conf['send404'] == true) { 823 LogUtility::msg("The <a href=\"https://www.dokuwiki.org/config:send404\">dokuwiki send404 configuration</a> is on and should be disabled when using the url manager", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 824 } 825 826 // Redirection 827 $this->logRedirection($sourceId, $targetPageId, $targetOriginId, self::REDIRECT_TRANSPARENT_METHOD); 828 829 return true; 830 831 } 832 833 private function executePermanentRedirect(string $targetIdOrUrl, $targetOrigin): bool 834 { 835 return $this->executeHttpRedirect($targetIdOrUrl, $targetOrigin, self::REDIRECT_PERMANENT_METHOD); 836 } 837 838 /** 839 * The general HTTP Redirect method to an internal page 840 * where the redirection method decide which type of redirection 841 * @param string $targetIdOrUrl - a dokuwiki id or an url 842 * @param string $targetOrigin - the origin of the target (the algorithm used to get the target origin) 843 * @param string $method - the redirection method 844 */ 845 private 846 function executeHttpRedirect(string $targetIdOrUrl, string $targetOrigin, string $method): bool 847 { 848 849 global $ID; 850 851 852 // Log the redirections 853 $this->logRedirection($ID, $targetIdOrUrl, $targetOrigin, $method); 854 855 856 // An http external url ? 857 try { 858 $isHttpUrl = Url::createFromString($targetIdOrUrl)->isHttpUrl(); 859 } catch (ExceptionBadSyntax|ExceptionBadArgument $e) { 860 $isHttpUrl = false; 861 } 862 863 // If there is a bug in the isValid function for an internal url 864 // We get a loop. 865 // The Url becomes the id, the id is unknown and we do a redirect again 866 // 867 // We check then if the target starts with the base url 868 // if this is the case, it's valid 869 if (!$isHttpUrl && strpos($targetIdOrUrl, DOKU_URL) === 0) { 870 $isHttpUrl = true; 871 } 872 if ($isHttpUrl) { 873 874 // defend against HTTP Response Splitting 875 // https://owasp.org/www-community/attacks/HTTP_Response_Splitting 876 $targetUrl = stripctl($targetIdOrUrl); 877 878 } else { 879 880 881 // Explode the page ID and the anchor (#) 882 $link = explode('#', $targetIdOrUrl, 2); 883 884 $url = UrlEndpoint::createDokuUrl(); 885 886 $urlParams = []; 887 // if this is search engine redirect 888 if ($targetOrigin == self::TARGET_ORIGIN_SEARCH_ENGINE) { 889 $replacementPart = array(':', '_', '-'); 890 $query = str_replace($replacementPart, ' ', $ID); 891 $url->setQueryParameter(ExecutionContext::DO_ATTRIBUTE, ExecutionContext::SEARCH_ACTION); 892 $url->setQueryParameter("q", $query); 893 } 894 895 /** 896 * Doing a permanent redirect with a added query string 897 * create a new page url on the search engine 898 * 899 * ie 900 * http://host/page 901 * is not the same 902 * than 903 * http://host/page?whatever 904 * 905 * We can't pass query string otherwise, we get 906 * the SEO warning / error 907 * `Alternative page with proper canonical tag` 908 * 909 * Use HTTP X header for debug 910 */ 911 if ($method !== self::REDIRECT_PERMANENT_METHOD) { 912 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_PAGE, $ID); 913 $url->setQueryParameter(action_plugin_combo_routermessage::ORIGIN_TYPE, $targetOrigin); 914 } 915 916 $id = $link[0]; 917 $url->setQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $id); 918 if (array_key_exists(1, $link)) { 919 $url->setFragment($link[1]); 920 } 921 $targetUrl = $url->toAbsoluteUrlString(); 922 923 } 924 925 /** 926 * The dokuwiki function {@link send_redirect()} 927 * set the `Location header` and in php, the header function 928 * in this case change the status code to 302 Arghhhh. 929 * The code below is adapted from this function {@link send_redirect()} 930 */ 931 global $MSG; // are there any undisplayed messages? keep them in session for display 932 if (isset($MSG) && count($MSG) && !defined('NOSESSION')) { 933 //reopen session, store data and close session again 934 @session_start(); 935 $_SESSION[DOKU_COOKIE]['msg'] = $MSG; 936 } 937 session_write_close(); // always close the session 938 939 switch ($method) { 940 941 case self::REDIRECT_PERMANENT_METHOD: 942 ExecutionContext::getActualOrCreateFromEnv() 943 ->response() 944 ->setStatus(HttpResponseStatus::PERMANENT_REDIRECT) 945 ->addHeader(self::LOCATION_HEADER_PREFIX . $targetUrl) 946 ->end(); 947 return true; 948 949 case self::REDIRECT_NOTFOUND_METHOD: 950 951 952 // Empty 404 body to not get the standard 404 page of the browser 953 // but a blank page to avoid a sort of FOUC. 954 // ie the user see a page briefly 955 ExecutionContext::getActualOrCreateFromEnv() 956 ->response() 957 ->setStatus(HttpResponseStatus::NOT_FOUND) 958 ->addHeader(self::REFRESH_HEADER_PREFIX . $targetUrl) 959 ->setBody(self::PAGE_404, Mime::getHtml()) 960 ->end(); 961 return true; 962 963 default: 964 LogUtility::msg("The method ($method) is not an http redirection"); 965 return false; 966 } 967 968 969 } 970 971 /** 972 * @param $id 973 * @return array 974 */ 975 private 976 function getBestPage($id): array 977 { 978 979 // The return parameters 980 $bestPageId = null; 981 $scorePageName = null; 982 983 // Get Score from a page 984 $pageName = noNS($id); 985 $pagesWithSameName = ft_pageLookup($pageName); 986 if (count($pagesWithSameName) > 0) { 987 988 // Search same namespace in the page found than in the Id page asked. 989 $bestNbWordFound = 0; 990 991 992 $wordsInPageSourceId = explode(':', $id); 993 foreach ($pagesWithSameName as $targetPageId => $title) { 994 995 // Nb of word found in the target page id 996 // that are in the source page id 997 $nbWordFound = 0; 998 foreach ($wordsInPageSourceId as $word) { 999 $nbWordFound = $nbWordFound + substr_count($targetPageId, $word); 1000 } 1001 1002 if ($bestPageId == null) { 1003 1004 $bestNbWordFound = $nbWordFound; 1005 $bestPageId = $targetPageId; 1006 1007 } else { 1008 1009 if ($nbWordFound >= $bestNbWordFound && strlen($bestPageId) > strlen($targetPageId)) { 1010 1011 $bestNbWordFound = $nbWordFound; 1012 $bestPageId = $targetPageId; 1013 1014 } 1015 1016 } 1017 1018 } 1019 $scorePageName = $this->getConf('WeightFactorForSamePageName') + ($bestNbWordFound - 1) * $this->getConf('WeightFactorForSameNamespace'); 1020 return array( 1021 'id' => $bestPageId, 1022 'score' => $scorePageName); 1023 } 1024 return array( 1025 'id' => $bestPageId, 1026 'score' => $scorePageName 1027 ); 1028 1029 } 1030 1031 1032 /** 1033 * Redirect to the search engine 1034 */ 1035 private 1036 function redirectToSearchEngine() 1037 { 1038 1039 global $ID; 1040 $this->performNotFoundRedirect($ID, self::TARGET_ORIGIN_SEARCH_ENGINE); 1041 1042 } 1043 1044 1045 /** 1046 * 1047 * * For a conf file, it will update the Redirection Action Data as Referrer, Count Of Redirection, Redirection Date 1048 * * For a SQlite database, it will add a row into the log 1049 * 1050 * @param string $sourcePageId 1051 * @param $targetPageId 1052 * @param $algorithmic 1053 * @param $method - http or rewrite 1054 */ 1055 function logRedirection(string $sourcePageId, $targetPageId, $algorithmic, $method) 1056 { 1057 1058 $row = array( 1059 "TIMESTAMP" => date("c"), 1060 "SOURCE" => $sourcePageId, 1061 "TARGET" => $targetPageId, 1062 "REFERRER" => $_SERVER['HTTP_REFERER'] ?? null, 1063 "TYPE" => $algorithmic, 1064 "METHOD" => $method 1065 ); 1066 $request = Sqlite::createOrGetBackendSqlite() 1067 ->createRequest() 1068 ->setTableRow('redirections_log', $row); 1069 try { 1070 $request 1071 ->execute(); 1072 } catch (ExceptionCompile $e) { 1073 LogUtility::msg("Redirection Log Insert Error. {$e->getMessage()}"); 1074 } finally { 1075 $request->close(); 1076 } 1077 1078 1079 } 1080 1081 /** 1082 * This function check if there is a redirection declared 1083 * in the redirection table 1084 * @return bool - true if a rewrite or redirection occurs 1085 * @throws Exception 1086 */ 1087 private function processingPageRules(): bool 1088 { 1089 global $ID; 1090 1091 $calculatedTarget = null; 1092 $ruleMatcher = null; // Used in a warning message if the target page does not exist 1093 // Known redirection in the table 1094 // Get the page from redirection data 1095 $rules = $this->pageRules->getRules(); 1096 foreach ($rules as $rule) { 1097 1098 $ruleMatcher = strtolower($rule[PageRules::MATCHER_NAME]); 1099 $ruleTarget = $rule[PageRules::TARGET_NAME]; 1100 1101 // Glob to Rexgexp 1102 $regexpPattern = '/' . str_replace("*", "(.*)", $ruleMatcher) . '/i'; 1103 1104 // Match ? 1105 // https://www.php.net/manual/en/function.preg-match.php 1106 $pregMatchResult = @preg_match($regexpPattern, $ID, $matches); 1107 if ($pregMatchResult === false) { 1108 // The `if` to take into account this problem 1109 // PHP Warning: preg_match(): Unknown modifier 'd' in /opt/www/datacadamia.com/lib/plugins/combo/action/router.php on line 972 1110 LogUtility::log2file("processing Page Rules An error occurred with the pattern ($regexpPattern)", LogUtility::LVL_MSG_WARNING); 1111 return false; 1112 } 1113 if ($pregMatchResult) { 1114 $calculatedTarget = $ruleTarget; 1115 foreach ($matches as $key => $match) { 1116 if ($key == 0) { 1117 continue; 1118 } else { 1119 $calculatedTarget = str_replace('$' . $key, $match, $calculatedTarget); 1120 } 1121 } 1122 break; 1123 } 1124 } 1125 1126 if ($calculatedTarget == null) { 1127 return false; 1128 } 1129 1130 // If this is an external redirect (other domain) 1131 try { 1132 $isHttpUrl = Url::createFromString($calculatedTarget)->isHttpUrl(); 1133 } catch (ExceptionBadSyntax $e) { 1134 $isHttpUrl = false; 1135 } 1136 if ($isHttpUrl) { 1137 $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1138 return true; 1139 } 1140 1141 // If the page exist 1142 if (page_exists($calculatedTarget)) { 1143 1144 // This is DokuWiki Id and should always be lowercase 1145 // The page rule may have change that 1146 $calculatedTarget = strtolower($calculatedTarget); 1147 $res = $this->executeHttpRedirect($calculatedTarget, self::TARGET_ORIGIN_PAGE_RULES, self::REDIRECT_PERMANENT_METHOD); 1148 if ($res) { 1149 return true; 1150 } else { 1151 return false; 1152 } 1153 1154 } else { 1155 1156 LogUtility::msg("The calculated target page ($calculatedTarget) (for the non-existing page `$ID` with the matcher `$ruleMatcher`) does not exist", LogUtility::LVL_MSG_ERROR); 1157 return false; 1158 1159 } 1160 1161 } 1162 1163 private function performNotFoundRedirect(string $targetId, string $origin): bool 1164 { 1165 return $this->executeHttpRedirect($targetId, $origin, self::REDIRECT_NOTFOUND_METHOD); 1166 } 1167 1168 1169} 1170