1<?php 2 3/** 4 * Utilities for handling pagenames 5 * 6 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 7 * @author Andreas Gohr <andi@splitbrain.org> 8 * @todo Combine similar functions like {wiki,media,meta}FN() 9 */ 10 11use dokuwiki\Utf8\PhpString; 12use dokuwiki\Utf8\Clean; 13use dokuwiki\File\Resolver; 14use dokuwiki\Extension\Event; 15use dokuwiki\ChangeLog\MediaChangeLog; 16use dokuwiki\ChangeLog\PageChangeLog; 17use dokuwiki\File\MediaResolver; 18use dokuwiki\File\PageResolver; 19 20/** 21 * Fetch the an ID from request 22 * 23 * Uses either standard $_REQUEST variable or extracts it from 24 * the full request URI when userewrite is set to 2 25 * 26 * For $param='id' $conf['start'] is returned if no id was found. 27 * If the second parameter is true (default) the ID is cleaned. 28 * 29 * @author Andreas Gohr <andi@splitbrain.org> 30 * 31 * @param string $param the $_REQUEST variable name, default 'id' 32 * @param bool $clean if true, ID is cleaned 33 * @return string 34 */ 35function getID($param = 'id', $clean = true) 36{ 37 /** @var Input $INPUT */ 38 global $INPUT; 39 global $conf; 40 global $ACT; 41 42 $id = $INPUT->str($param); 43 44 //construct page id from request URI 45 if (empty($id) && $conf['userewrite'] == 2) { 46 $request = $INPUT->server->str('REQUEST_URI'); 47 $script = ''; 48 49 //get the script URL 50 if ($conf['basedir']) { 51 $relpath = ''; 52 if ($param != 'id') { 53 $relpath = 'lib/exe/'; 54 } 55 $script = $conf['basedir'] . $relpath . 56 PhpString::basename($INPUT->server->str('SCRIPT_FILENAME')); 57 } elseif ($INPUT->server->str('PATH_INFO')) { 58 $request = $INPUT->server->str('PATH_INFO'); 59 } elseif ($INPUT->server->str('SCRIPT_NAME')) { 60 $script = $INPUT->server->str('SCRIPT_NAME'); 61 } elseif ($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')) { 62 $script = preg_replace( 63 '/^' . preg_quote($INPUT->server->str('DOCUMENT_ROOT'), '/') . '/', 64 '', 65 $INPUT->server->str('SCRIPT_FILENAME') 66 ); 67 $script = '/' . $script; 68 } 69 70 //clean script and request (fixes a windows problem) 71 $script = preg_replace('/\/\/+/', '/', $script); 72 $request = preg_replace('/\/\/+/', '/', $request); 73 74 //remove script URL and Querystring to gain the id 75 if (preg_match('/^' . preg_quote($script, '/') . '(.*)/', $request, $match)) { 76 $id = preg_replace('/\?.*/', '', $match[1]); 77 } 78 $id = urldecode($id); 79 //strip leading slashes 80 $id = preg_replace('!^/+!', '', $id); 81 } 82 83 // Namespace autolinking from URL 84 if (str_ends_with($id, ':') || ($conf['useslash'] && str_ends_with($id, '/'))) { 85 if (page_exists($id . $conf['start'])) { 86 // start page inside namespace 87 $id .= $conf['start']; 88 } elseif (page_exists($id . noNS(cleanID($id)))) { 89 // page named like the NS inside the NS 90 $id .= noNS(cleanID($id)); 91 } elseif (page_exists($id)) { 92 // page like namespace exists 93 $id = substr($id, 0, -1); 94 } else { 95 // fall back to default 96 $id .= $conf['start']; 97 } 98 if (isset($ACT) && $ACT === 'show') { 99 $urlParameters = $_GET; 100 if (isset($urlParameters['id'])) { 101 unset($urlParameters['id']); 102 } 103 send_redirect(wl($id, $urlParameters, true, '&')); 104 } 105 } 106 if ($clean) $id = cleanID($id); 107 if ($id === '' && $param == 'id') $id = $conf['start']; 108 109 return $id; 110} 111 112/** 113 * Remove unwanted chars from ID 114 * 115 * Cleans a given ID to only use allowed characters. Accented characters are 116 * converted to unaccented ones 117 * 118 * @author Andreas Gohr <andi@splitbrain.org> 119 * 120 * @param string $raw_id The pageid to clean 121 * @param boolean $ascii Force ASCII 122 * @return string cleaned id 123 */ 124function cleanID($raw_id, $ascii = false) 125{ 126 global $conf; 127 static $sepcharpat = null; 128 129 global $cache_cleanid; 130 $cache = & $cache_cleanid; 131 132 // check if it's already in the memory cache 133 if (!$ascii && isset($cache[(string)$raw_id])) { 134 return $cache[(string)$raw_id]; 135 } 136 137 $sepchar = $conf['sepchar']; 138 if ($sepcharpat == null) // build string only once to save clock cycles 139 $sepcharpat = '#\\' . $sepchar . '+#'; 140 141 $id = trim((string)$raw_id); 142 $id = PhpString::strtolower($id); 143 144 //alternative namespace seperator 145 if ($conf['useslash']) { 146 $id = strtr($id, ';/', '::'); 147 } else { 148 $id = strtr($id, ';/', ':' . $sepchar); 149 } 150 151 if ($conf['deaccent'] == 2 || $ascii) $id = Clean::romanize($id); 152 if ($conf['deaccent'] || $ascii) $id = Clean::deaccent($id, -1); 153 154 //remove specials 155 $id = Clean::stripspecials($id, $sepchar, '\*'); 156 157 if ($ascii) $id = Clean::strip($id); 158 159 //clean up 160 $id = preg_replace($sepcharpat, $sepchar, $id); 161 $id = preg_replace('#:+#', ':', $id); 162 $id = trim($id, ':._-'); 163 $id = preg_replace('#:[:\._\-]+#', ':', $id); 164 $id = preg_replace('#[:\._\-]+:#', ':', $id); 165 166 if (!$ascii) $cache[(string)$raw_id] = $id; 167 return($id); 168} 169 170/** 171 * Return namespacepart of a wiki ID 172 * 173 * @author Andreas Gohr <andi@splitbrain.org> 174 * 175 * @param string $id 176 * @return string|false the namespace part or false if the given ID has no namespace (root) 177 */ 178function getNS($id) 179{ 180 $pos = strrpos((string)$id, ':'); 181 if ($pos !== false) { 182 return substr((string)$id, 0, $pos); 183 } 184 return false; 185} 186 187/** 188 * Returns the ID without the namespace 189 * 190 * @author Andreas Gohr <andi@splitbrain.org> 191 * 192 * @param string $id 193 * @return string 194 */ 195function noNS($id) 196{ 197 $pos = strrpos($id, ':'); 198 if ($pos !== false) { 199 return substr($id, $pos + 1); 200 } else { 201 return $id; 202 } 203} 204 205/** 206 * Returns the current namespace 207 * 208 * @author Nathan Fritz <fritzn@crown.edu> 209 * 210 * @param string $id 211 * @return string 212 */ 213function curNS($id) 214{ 215 return noNS(getNS($id)); 216} 217 218/** 219 * Returns the ID without the namespace or current namespace for 'start' pages 220 * 221 * @author Nathan Fritz <fritzn@crown.edu> 222 * 223 * @param string $id 224 * @return string 225 */ 226function noNSorNS($id) 227{ 228 global $conf; 229 230 $p = noNS($id); 231 if (in_array($p, [$conf['start'], false, ''], true)) { 232 $p = curNS($id); 233 if ($p === false || $p === '') { 234 return $conf['start']; 235 } 236 } 237 return $p; 238} 239 240/** 241 * Creates a XHTML valid linkid from a given headline title 242 * 243 * @param string $title The headline title 244 * @param array|bool $check Existing IDs 245 * @return string the title 246 * 247 * @author Andreas Gohr <andi@splitbrain.org> 248 */ 249function sectionID($title, &$check) 250{ 251 $title = str_replace([':', '.'], '', cleanID($title)); 252 $new = ltrim($title, '0123456789_-'); 253 if (empty($new)) { 254 $title = 'section' . preg_replace('/[^0-9]+/', '', $title); //keep numbers from headline 255 } else { 256 $title = $new; 257 } 258 259 if (is_array($check)) { 260 $suffix = 0; 261 $candidateTitle = $title; 262 while (in_array($candidateTitle, $check)) { 263 $candidateTitle = $title . ++$suffix; 264 } 265 $check [] = $candidateTitle; 266 return $candidateTitle; 267 } else { 268 return $title; 269 } 270} 271 272/** 273 * Wiki page existence check 274 * 275 * parameters as for wikiFN 276 * 277 * @author Chris Smith <chris@jalakai.co.uk> 278 * 279 * @param string $id page id 280 * @param string|int $rev empty or revision timestamp 281 * @param bool $clean flag indicating that $id should be cleaned (see wikiFN as well) 282 * @param bool $date_at 283 * @return bool exists? 284 */ 285function page_exists($id, $rev = '', $clean = true, $date_at = false) 286{ 287 $id = (explode('#', $id, 2))[0]; // #3608 288 289 if ($rev !== '' && $date_at) { 290 $pagelog = new PageChangeLog($id); 291 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 292 if ($pagelog_rev !== false) 293 $rev = $pagelog_rev; 294 } 295 return file_exists(wikiFN($id, $rev, $clean)); 296} 297 298/** 299 * Media existence check 300 * 301 * @param string $id page id 302 * @param string|int $rev empty or revision timestamp 303 * @param bool $clean flag indicating that $id should be cleaned (see mediaFN as well) 304 * @param bool $date_at 305 * @return bool exists? 306 */ 307function media_exists($id, $rev = '', $clean = true, $date_at = false) 308{ 309 if ($rev !== '' && $date_at) { 310 $changeLog = new MediaChangeLog($id); 311 $changelog_rev = $changeLog->getLastRevisionAt($rev); 312 if ($changelog_rev !== false) { 313 $rev = $changelog_rev; 314 } 315 } 316 return file_exists(mediaFN($id, $rev, $clean)); 317} 318 319/** 320 * returns the full path to the datafile specified by ID and optional revision 321 * 322 * The filename is URL encoded to protect Unicode chars 323 * 324 * @param $raw_id string id of wikipage 325 * @param $rev int|string page revision, empty string for current 326 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 327 * when $id is guaranteed to have been cleaned already. 328 * @return string full path 329 * 330 * @author Andreas Gohr <andi@splitbrain.org> 331 */ 332function wikiFN($raw_id, $rev = '', $clean = true) 333{ 334 global $conf; 335 336 global $cache_wikifn; 337 $cache = & $cache_wikifn; 338 339 $id = $raw_id; 340 $rev = (int) $rev; // any falsy rev will be rev 0 in the cache 341 342 if ($clean) $id = cleanID($id); 343 $id = str_replace(':', '/', $id); 344 345 if (isset($cache[$id]) && isset($cache[$id][$rev])) { 346 return $cache[$id][$rev]; 347 } 348 349 if (empty($rev)) { 350 $fn = $conf['datadir'] . '/' . utf8_encodeFN($id) . '.txt'; 351 } else { 352 $fn = $conf['olddir'] . '/' . utf8_encodeFN($id) . '.' . $rev . '.txt'; 353 if ($conf['compression']) { 354 //test for extensions here, we want to read both compressions 355 if (file_exists($fn . '.gz')) { 356 $fn .= '.gz'; 357 } elseif (file_exists($fn . '.bz2')) { 358 $fn .= '.bz2'; 359 } else { 360 //file doesnt exist yet, so we take the configured extension 361 $fn .= '.' . $conf['compression']; 362 } 363 } 364 } 365 366 if (!isset($cache[$id])) { 367 $cache[$id] = []; 368 } 369 $cache[$id][$rev] = $fn; 370 return $fn; 371} 372 373/** 374 * Returns the full path to the file for locking the page while editing. 375 * 376 * @author Ben Coburn <btcoburn@silicodon.net> 377 * 378 * @param string $id page id 379 * @return string full path 380 */ 381function wikiLockFN($id) 382{ 383 global $conf; 384 return $conf['lockdir'] . '/' . md5(cleanID($id)) . '.lock'; 385} 386 387 388/** 389 * returns the full path to the meta file specified by ID and extension 390 * 391 * @author Steven Danz <steven-danz@kc.rr.com> 392 * 393 * @param string $id page id 394 * @param string $ext file extension 395 * @return string full path 396 */ 397function metaFN($id, $ext) 398{ 399 global $conf; 400 $id = cleanID($id); 401 $id = str_replace(':', '/', $id); 402 403 $fn = $conf['metadir'] . '/' . utf8_encodeFN($id) . $ext; 404 return $fn; 405} 406 407/** 408 * returns the full path to the media's meta file specified by ID and extension 409 * 410 * @author Kate Arzamastseva <pshns@ukr.net> 411 * 412 * @param string $id media id 413 * @param string $ext extension of media 414 * @return string 415 */ 416function mediaMetaFN($id, $ext) 417{ 418 global $conf; 419 $id = cleanID($id); 420 $id = str_replace(':', '/', $id); 421 422 $fn = $conf['mediametadir'] . '/' . utf8_encodeFN($id) . $ext; 423 return $fn; 424} 425 426/** 427 * returns an array of full paths to all metafiles of a given ID 428 * 429 * @author Esther Brunner <esther@kaffeehaus.ch> 430 * @author Michael Hamann <michael@content-space.de> 431 * 432 * @param string $id page id 433 * @return array 434 */ 435function metaFiles($id) 436{ 437 $basename = metaFN($id, ''); 438 $files = glob($basename . '.*', GLOB_MARK); 439 // filter files like foo.bar.meta when $id == 'foo' 440 return $files ? preg_grep('/^' . preg_quote($basename, '/') . '\.[^.\/]*$/u', $files) : []; 441} 442 443/** 444 * returns the full path to the mediafile specified by ID 445 * 446 * The filename is URL encoded to protect Unicode chars 447 * 448 * @author Andreas Gohr <andi@splitbrain.org> 449 * @author Kate Arzamastseva <pshns@ukr.net> 450 * 451 * @param string $id media id 452 * @param string|int $rev empty string or revision timestamp 453 * @param bool $clean 454 * 455 * @return string full path 456 */ 457function mediaFN($id, $rev = '', $clean = true) 458{ 459 global $conf; 460 if ($clean) $id = cleanID($id); 461 $id = str_replace(':', '/', $id); 462 if (empty($rev)) { 463 $fn = $conf['mediadir'] . '/' . utf8_encodeFN($id); 464 } else { 465 $ext = mimetype($id); 466 $name = substr($id, 0, -1 * strlen($ext[0]) - 1); 467 $fn = $conf['mediaolddir'] . '/' . utf8_encodeFN($name . '.' . ( (int) $rev ) . '.' . $ext[0]); 468 } 469 return $fn; 470} 471 472/** 473 * Returns the full filepath to a localized file if local 474 * version isn't found the english one is returned 475 * 476 * @param string $id The id of the local file 477 * @param string $ext The file extension (usually txt) 478 * @return string full filepath to localized file 479 * 480 * @author Andreas Gohr <andi@splitbrain.org> 481 */ 482function localeFN($id, $ext = 'txt') 483{ 484 global $conf; 485 $file = DOKU_CONF . 'lang/' . $conf['lang'] . '/' . $id . '.' . $ext; 486 if (!file_exists($file)) { 487 $file = DOKU_INC . 'inc/lang/' . $conf['lang'] . '/' . $id . '.' . $ext; 488 if (!file_exists($file)) { 489 //fall back to english 490 $file = DOKU_INC . 'inc/lang/en/' . $id . '.' . $ext; 491 } 492 } 493 return $file; 494} 495 496/** 497 * Resolve relative paths in IDs 498 * 499 * Do not call directly use resolve_mediaid or resolve_pageid 500 * instead 501 * 502 * Partyly based on a cleanPath function found at 503 * http://php.net/manual/en/function.realpath.php#57016 504 * 505 * @deprecated 2020-09-30 506 * @param string $ns namespace which is context of id 507 * @param string $id relative id 508 * @param bool $clean flag indicating that id should be cleaned 509 * @return string 510 */ 511function resolve_id($ns, $id, $clean = true) 512{ 513 global $conf; 514 dbg_deprecated(Resolver::class . ' and its children'); 515 516 // some pre cleaning for useslash: 517 if ($conf['useslash']) $id = str_replace('/', ':', $id); 518 519 // if the id starts with a dot we need to handle the 520 // relative stuff 521 if ($id && $id[0] == '.') { 522 // normalize initial dots without a colon 523 $id = preg_replace('/^((\.+:)*)(\.+)(?=[^:\.])/', '\1\3:', $id); 524 // prepend the current namespace 525 $id = $ns . ':' . $id; 526 527 // cleanup relatives 528 $result = []; 529 $pathA = explode(':', $id); 530 if (!$pathA[0]) $result[] = ''; 531 foreach ($pathA as $dir) { 532 if ($dir == '..') { 533 if (end($result) == '..') { 534 $result[] = '..'; 535 } elseif (!array_pop($result)) { 536 $result[] = '..'; 537 } 538 } elseif ($dir && $dir != '.') { 539 $result[] = $dir; 540 } 541 } 542 if (!end($pathA)) $result[] = ''; 543 $id = implode(':', $result); 544 } elseif ($ns !== false && !str_contains($id, ':')) { 545 //if link contains no namespace. add current namespace (if any) 546 $id = $ns . ':' . $id; 547 } 548 549 if ($clean) $id = cleanID($id); 550 return $id; 551} 552 553/** 554 * Returns a full media id 555 * 556 * @param string $ns namespace which is context of id 557 * @param string &$media (reference) relative media id, updated to resolved id 558 * @param bool &$exists (reference) updated with existance of media 559 * @param int|string $rev 560 * @param bool $date_at 561 * @deprecated 2020-09-30 562 */ 563function resolve_mediaid($ns, &$media, &$exists, $rev = '', $date_at = false) 564{ 565 dbg_deprecated(MediaResolver::class); 566 $resolver = new MediaResolver("$ns:deprecated"); 567 $media = $resolver->resolveId($media, $rev, $date_at); 568 $exists = media_exists($media, $rev, false, $date_at); 569} 570 571/** 572 * Returns a full page id 573 * 574 * @deprecated 2020-09-30 575 * @param string $ns namespace which is context of id 576 * @param string &$page (reference) relative page id, updated to resolved id 577 * @param bool &$exists (reference) updated with existance of media 578 * @param string $rev 579 * @param bool $date_at 580 */ 581function resolve_pageid($ns, &$page, &$exists, $rev = '', $date_at = false) 582{ 583 dbg_deprecated(PageResolver::class); 584 585 global $ID; 586 if (getNS($ID) == $ns) { 587 $context = $ID; // this is usually the case 588 } else { 589 $context = "$ns:deprecated"; // only used when a different context namespace was given 590 } 591 592 $resolver = new PageResolver($context); 593 $page = $resolver->resolveId($page, $rev, $date_at); 594 $exists = page_exists($page, $rev, false, $date_at); 595} 596 597/** 598 * Returns the name of a cachefile from given data 599 * 600 * The needed directory is created by this function! 601 * 602 * @author Andreas Gohr <andi@splitbrain.org> 603 * 604 * @param string $data This data is used to create a unique md5 name 605 * @param string $ext This is appended to the filename if given 606 * @return string The filename of the cachefile 607 */ 608function getCacheName($data, $ext = '') 609{ 610 global $conf; 611 $md5 = md5($data); 612 $file = $conf['cachedir'] . '/' . $md5[0] . '/' . $md5 . $ext; 613 io_makeFileDir($file); 614 return $file; 615} 616 617/** 618 * Checks a pageid against $conf['hidepages'] 619 * 620 * @author Andreas Gohr <gohr@cosmocode.de> 621 * 622 * @param string $id page id 623 * @return bool 624 */ 625function isHiddenPage($id) 626{ 627 $data = ['id' => $id, 'hidden' => false]; 628 Event::createAndTrigger('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage'); 629 return $data['hidden']; 630} 631 632/** 633 * callback checks if page is hidden 634 * 635 * @param array $data event data - see isHiddenPage() 636 */ 637function _isHiddenPage(&$data) 638{ 639 global $conf; 640 global $ACT; 641 642 if ($data['hidden']) return; 643 if (empty($conf['hidepages'])) return; 644 if ($ACT == 'admin') return; 645 646 if (preg_match('/' . $conf['hidepages'] . '/ui', ':' . $data['id'])) { 647 $data['hidden'] = true; 648 } 649} 650 651/** 652 * Reverse of isHiddenPage 653 * 654 * @author Andreas Gohr <gohr@cosmocode.de> 655 * 656 * @param string $id page id 657 * @return bool 658 */ 659function isVisiblePage($id) 660{ 661 return !isHiddenPage($id); 662} 663 664/** 665 * Format an id for output to a user 666 * 667 * Namespaces are denoted by a trailing “:*”. The root namespace is 668 * “*”. Output is escaped. 669 * 670 * @author Adrian Lang <lang@cosmocode.de> 671 * 672 * @param string $id page id 673 * @return string 674 */ 675function prettyprint_id($id) 676{ 677 if (!$id || $id === ':') { 678 return '*'; 679 } 680 if (str_ends_with($id, ':')) { 681 $id .= '*'; 682 } 683 return hsc($id); 684} 685 686/** 687 * Encode a UTF-8 filename to use on any filesystem 688 * 689 * Uses the 'fnencode' option to determine encoding 690 * 691 * When the second parameter is true the string will 692 * be encoded only if non ASCII characters are detected - 693 * This makes it safe to run it multiple times on the 694 * same string (default is true) 695 * 696 * @author Andreas Gohr <andi@splitbrain.org> 697 * @see urlencode 698 * 699 * @param string $file file name 700 * @param bool $safe if true, only encoded when non ASCII characters detected 701 * @return string 702 */ 703function utf8_encodeFN($file, $safe = true) 704{ 705 global $conf; 706 if ($conf['fnencode'] == 'utf-8') return $file; 707 708 if ($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#', $file)) { 709 return $file; 710 } 711 712 if ($conf['fnencode'] == 'safe') { 713 return SafeFN::encode($file); 714 } 715 716 $file = urlencode($file); 717 $file = str_replace('%2F', '/', $file); 718 return $file; 719} 720 721/** 722 * Decode a filename back to UTF-8 723 * 724 * Uses the 'fnencode' option to determine encoding 725 * 726 * @author Andreas Gohr <andi@splitbrain.org> 727 * @see urldecode 728 * 729 * @param string $file file name 730 * @return string 731 */ 732function utf8_decodeFN($file) 733{ 734 global $conf; 735 if ($conf['fnencode'] == 'utf-8') return $file; 736 737 if ($conf['fnencode'] == 'safe') { 738 return SafeFN::decode($file); 739 } 740 741 return urldecode($file); 742} 743 744/** 745 * Find a page in the current namespace (determined from $ID) or any 746 * higher namespace that can be accessed by the current user, 747 * this condition can be overriden by an optional parameter. 748 * 749 * Used for sidebars, but can be used other stuff as well 750 * 751 * @todo add event hook 752 * 753 * @param string $page the pagename you're looking for 754 * @param bool $useacl only return pages readable by the current user, false to ignore ACLs 755 * @return false|string the full page id of the found page, false if any 756 */ 757function page_findnearest($page, $useacl = true) 758{ 759 if ((string) $page === '') return false; 760 global $ID; 761 762 $ns = $ID; 763 do { 764 $ns = getNS($ns); 765 $pageid = cleanID("$ns:$page"); 766 if (page_exists($pageid) && (!$useacl || auth_quickaclcheck($pageid) >= AUTH_READ)) { 767 return $pageid; 768 } 769 } while ($ns !== false); 770 771 return false; 772} 773