1<?php 2 3/** 4 * Utilities for handling pagenames 5 * 6 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 7 * @author Andreas Gohr <andi@splitbrain.org> 8 * @todo Combine similar functions like {wiki,media,meta}FN() 9 */ 10 11use dokuwiki\Utf8\PhpString; 12use dokuwiki\Utf8\Clean; 13use dokuwiki\File\Resolver; 14use dokuwiki\Extension\Event; 15use dokuwiki\ChangeLog\MediaChangeLog; 16use dokuwiki\ChangeLog\PageChangeLog; 17use dokuwiki\File\MediaResolver; 18use dokuwiki\File\PageResolver; 19 20/** 21 * Fetch the an ID from request 22 * 23 * Uses either standard $_REQUEST variable or extracts it from 24 * the full request URI when userewrite is set to 2 25 * 26 * For $param='id' $conf['start'] is returned if no id was found. 27 * If the second parameter is true (default) the ID is cleaned. 28 * 29 * @author Andreas Gohr <andi@splitbrain.org> 30 * 31 * @param string $param the $_REQUEST variable name, default 'id' 32 * @param bool $clean if true, ID is cleaned 33 * @return string 34 */ 35function getID($param = 'id', $clean = true) 36{ 37 /** @var Input $INPUT */ 38 global $INPUT; 39 global $conf; 40 global $ACT; 41 42 $id = $INPUT->str($param); 43 44 //construct page id from request URI 45 if (empty($id) && $conf['userewrite'] == 2) { 46 $request = $INPUT->server->str('REQUEST_URI'); 47 $script = ''; 48 49 //get the script URL 50 if ($conf['basedir']) { 51 $relpath = ''; 52 if ($param != 'id') { 53 $relpath = 'lib/exe/'; 54 } 55 $script = $conf['basedir'] . $relpath . 56 PhpString::basename($INPUT->server->str('SCRIPT_FILENAME')); 57 } elseif ($INPUT->server->str('PATH_INFO')) { 58 $request = $INPUT->server->str('PATH_INFO'); 59 } elseif ($INPUT->server->str('SCRIPT_NAME')) { 60 $script = $INPUT->server->str('SCRIPT_NAME'); 61 } elseif ($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')) { 62 $script = preg_replace( 63 '/^' . preg_quote($INPUT->server->str('DOCUMENT_ROOT'), '/') . '/', 64 '', 65 $INPUT->server->str('SCRIPT_FILENAME') 66 ); 67 $script = '/' . $script; 68 } 69 70 //clean script and request (fixes a windows problem) 71 $script = preg_replace('/\/\/+/', '/', $script); 72 $request = preg_replace('/\/\/+/', '/', $request); 73 74 //remove script URL and Querystring to gain the id 75 if (preg_match('/^' . preg_quote($script, '/') . '(.*)/', $request, $match)) { 76 $id = preg_replace('/\?.*/', '', $match[1]); 77 } 78 $id = urldecode($id); 79 //strip leading slashes 80 $id = preg_replace('!^/+!', '', $id); 81 } 82 83 // Namespace autolinking from URL 84 if (str_ends_with($id, ':') || ($conf['useslash'] && str_ends_with($id, '/'))) { 85 if (page_exists($id . $conf['start'])) { 86 // start page inside namespace 87 $id .= $conf['start']; 88 } elseif (page_exists($id . noNS(cleanID($id)))) { 89 // page named like the NS inside the NS 90 $id .= noNS(cleanID($id)); 91 } elseif (page_exists($id)) { 92 // page like namespace exists 93 $id = substr($id, 0, -1); 94 } else { 95 // fall back to default 96 $id .= $conf['start']; 97 } 98 if (isset($ACT) && $ACT === 'show') { 99 $urlParameters = $_GET; 100 if (isset($urlParameters['id'])) { 101 unset($urlParameters['id']); 102 } 103 send_redirect(wl($id, $urlParameters, true, '&')); 104 } 105 } 106 if ($clean) $id = cleanID($id); 107 if ($id === '' && $param == 'id') $id = $conf['start']; 108 109 return $id; 110} 111 112/** 113 * Remove unwanted chars from ID 114 * 115 * Cleans a given ID to only use allowed characters. Accented characters are 116 * converted to unaccented ones 117 * 118 * @author Andreas Gohr <andi@splitbrain.org> 119 * 120 * @param string $raw_id The pageid to clean 121 * @param boolean $ascii Force ASCII 122 * @return string cleaned id 123 */ 124function cleanID($raw_id, $ascii = false) 125{ 126 global $conf; 127 static $sepcharpat = null; 128 129 global $cache_cleanid; 130 $cache = & $cache_cleanid; 131 132 // check if it's already in the memory cache 133 if (!$ascii && isset($cache[(string)$raw_id])) { 134 return $cache[(string)$raw_id]; 135 } 136 137 $sepchar = $conf['sepchar']; 138 if ($sepcharpat == null) // build string only once to save clock cycles 139 $sepcharpat = '#\\' . $sepchar . '+#'; 140 141 $id = trim((string)$raw_id); 142 $id = PhpString::strtolower($id); 143 144 //alternative namespace seperator 145 if ($conf['useslash']) { 146 $id = strtr($id, ';/', '::'); 147 } else { 148 $id = strtr($id, ';/', ':' . $sepchar); 149 } 150 151 if ($conf['deaccent'] == 2 || $ascii) $id = Clean::romanize($id); 152 if ($conf['deaccent'] || $ascii) $id = Clean::deaccent($id, -1); 153 154 //remove specials 155 $id = Clean::stripspecials($id, $sepchar, '\*'); 156 157 if ($ascii) $id = Clean::strip($id); 158 159 //clean up 160 $id = preg_replace($sepcharpat, $sepchar, $id); 161 $id = preg_replace('#:+#', ':', $id); 162 $id = trim($id, ':._-'); 163 $id = preg_replace('#:[:\._\-]+#', ':', $id); 164 $id = preg_replace('#[:\._\-]+:#', ':', $id); 165 166 if (!$ascii) $cache[(string)$raw_id] = $id; 167 return($id); 168} 169 170/** 171 * Return namespacepart of a wiki ID 172 * 173 * @author Andreas Gohr <andi@splitbrain.org> 174 * 175 * @param string $id 176 * @return string|false the namespace part or false if the given ID has no namespace (root) 177 */ 178function getNS($id) 179{ 180 $pos = strrpos((string)$id, ':'); 181 if ($pos !== false) { 182 return substr((string)$id, 0, $pos); 183 } 184 return false; 185} 186 187/** 188 * Returns the ID without the namespace 189 * 190 * @author Andreas Gohr <andi@splitbrain.org> 191 * 192 * @param string $id 193 * @return string 194 */ 195function noNS($id) 196{ 197 $pos = strrpos($id, ':'); 198 if ($pos !== false) { 199 return substr($id, $pos + 1); 200 } else { 201 return $id; 202 } 203} 204 205/** 206 * Returns the current namespace 207 * 208 * @author Nathan Fritz <fritzn@crown.edu> 209 * 210 * @param string $id 211 * @return string 212 */ 213function curNS($id) 214{ 215 return noNS(getNS($id)); 216} 217 218/** 219 * Returns the ID without the namespace or current namespace for 'start' pages 220 * 221 * @author Nathan Fritz <fritzn@crown.edu> 222 * 223 * @param string $id 224 * @return string 225 */ 226function noNSorNS($id) 227{ 228 global $conf; 229 230 $p = noNS($id); 231 if ($p === $conf['start'] || $p === false || $p === '') { 232 $p = curNS($id); 233 if ($p === false || $p === '') { 234 return $conf['start']; 235 } 236 } 237 return $p; 238} 239 240/** 241 * Creates a XHTML valid linkid from a given headline title 242 * 243 * @param string $title The headline title 244 * @param array|bool $check Existing IDs 245 * @return string the title 246 * 247 * @author Andreas Gohr <andi@splitbrain.org> 248 */ 249function sectionID($title, &$check) 250{ 251 $title = str_replace([':', '.'], '', cleanID($title)); 252 $new = ltrim($title, '0123456789_-'); 253 if (empty($new)) { 254 $title = 'section' . preg_replace('/[^0-9]+/', '', $title); //keep numbers from headline 255 } else { 256 $title = $new; 257 } 258 259 if (is_array($check)) { 260 $suffix = 0; 261 $candidateTitle = $title; 262 while (in_array($candidateTitle, $check)) { 263 $candidateTitle = $title . ++$suffix; 264 } 265 $check [] = $candidateTitle; 266 return $candidateTitle; 267 } else { 268 return $title; 269 } 270} 271 272/** 273 * Wiki page existence check 274 * 275 * parameters as for wikiFN 276 * 277 * @author Chris Smith <chris@jalakai.co.uk> 278 * 279 * @param string $id page id 280 * @param string|int $rev empty or revision timestamp 281 * @param bool $clean flag indicating that $id should be cleaned (see wikiFN as well) 282 * @param bool $date_at 283 * @return bool exists? 284 */ 285function page_exists($id, $rev = '', $clean = true, $date_at = false) 286{ 287 $id = (explode('#', $id, 2))[0]; // #3608 288 289 if ($rev !== '' && $date_at) { 290 $pagelog = new PageChangeLog($id); 291 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 292 if ($pagelog_rev !== false) 293 $rev = $pagelog_rev; 294 } 295 return file_exists(wikiFN($id, $rev, $clean)); 296} 297 298/** 299 * Media existence check 300 * 301 * @param string $id page id 302 * @param string|int $rev empty or revision timestamp 303 * @param bool $clean flag indicating that $id should be cleaned (see mediaFN as well) 304 * @param bool $date_at 305 * @return bool exists? 306 */ 307function media_exists($id, $rev = '', $clean = true, $date_at = false) 308{ 309 if ($rev !== '' && $date_at) { 310 $changeLog = new MediaChangeLog($id); 311 $changelog_rev = $changeLog->getLastRevisionAt($rev); 312 if ($changelog_rev !== false) { 313 $rev = $changelog_rev; 314 } 315 } 316 return file_exists(mediaFN($id, $rev, $clean)); 317} 318 319/** 320 * returns the full path to the datafile specified by ID and optional revision 321 * 322 * The filename is URL encoded to protect Unicode chars 323 * 324 * @param $raw_id string id of wikipage 325 * @param $rev int|string page revision, empty string for current 326 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 327 * when $id is guaranteed to have been cleaned already. 328 * @return string full path 329 * 330 * @author Andreas Gohr <andi@splitbrain.org> 331 */ 332function wikiFN($raw_id, $rev = '', $clean = true) 333{ 334 global $conf; 335 336 global $cache_wikifn; 337 $cache = & $cache_wikifn; 338 339 $id = $raw_id; 340 341 if ($clean) $id = cleanID($id); 342 $id = str_replace(':', '/', $id); 343 344 if (isset($cache[$id]) && isset($cache[$id][$rev])) { 345 return $cache[$id][$rev]; 346 } 347 348 if (empty($rev)) { 349 $fn = $conf['datadir'] . '/' . utf8_encodeFN($id) . '.txt'; 350 } else { 351 $fn = $conf['olddir'] . '/' . utf8_encodeFN($id) . '.' . $rev . '.txt'; 352 if ($conf['compression']) { 353 //test for extensions here, we want to read both compressions 354 if (file_exists($fn . '.gz')) { 355 $fn .= '.gz'; 356 } elseif (file_exists($fn . '.bz2')) { 357 $fn .= '.bz2'; 358 } else { 359 //file doesnt exist yet, so we take the configured extension 360 $fn .= '.' . $conf['compression']; 361 } 362 } 363 } 364 365 if (!isset($cache[$id])) { 366 $cache[$id] = []; 367 } 368 $cache[$id][$rev] = $fn; 369 return $fn; 370} 371 372/** 373 * Returns the full path to the file for locking the page while editing. 374 * 375 * @author Ben Coburn <btcoburn@silicodon.net> 376 * 377 * @param string $id page id 378 * @return string full path 379 */ 380function wikiLockFN($id) 381{ 382 global $conf; 383 return $conf['lockdir'] . '/' . md5(cleanID($id)) . '.lock'; 384} 385 386 387/** 388 * returns the full path to the meta file specified by ID and extension 389 * 390 * @author Steven Danz <steven-danz@kc.rr.com> 391 * 392 * @param string $id page id 393 * @param string $ext file extension 394 * @return string full path 395 */ 396function metaFN($id, $ext) 397{ 398 global $conf; 399 $id = cleanID($id); 400 $id = str_replace(':', '/', $id); 401 402 $fn = $conf['metadir'] . '/' . utf8_encodeFN($id) . $ext; 403 return $fn; 404} 405 406/** 407 * returns the full path to the media's meta file specified by ID and extension 408 * 409 * @author Kate Arzamastseva <pshns@ukr.net> 410 * 411 * @param string $id media id 412 * @param string $ext extension of media 413 * @return string 414 */ 415function mediaMetaFN($id, $ext) 416{ 417 global $conf; 418 $id = cleanID($id); 419 $id = str_replace(':', '/', $id); 420 421 $fn = $conf['mediametadir'] . '/' . utf8_encodeFN($id) . $ext; 422 return $fn; 423} 424 425/** 426 * returns an array of full paths to all metafiles of a given ID 427 * 428 * @author Esther Brunner <esther@kaffeehaus.ch> 429 * @author Michael Hamann <michael@content-space.de> 430 * 431 * @param string $id page id 432 * @return array 433 */ 434function metaFiles($id) 435{ 436 $basename = metaFN($id, ''); 437 $files = glob($basename . '.*', GLOB_MARK); 438 // filter files like foo.bar.meta when $id == 'foo' 439 return $files ? preg_grep('/^' . preg_quote($basename, '/') . '\.[^.\/]*$/u', $files) : []; 440} 441 442/** 443 * returns the full path to the mediafile specified by ID 444 * 445 * The filename is URL encoded to protect Unicode chars 446 * 447 * @author Andreas Gohr <andi@splitbrain.org> 448 * @author Kate Arzamastseva <pshns@ukr.net> 449 * 450 * @param string $id media id 451 * @param string|int $rev empty string or revision timestamp 452 * @param bool $clean 453 * 454 * @return string full path 455 */ 456function mediaFN($id, $rev = '', $clean = true) 457{ 458 global $conf; 459 if ($clean) $id = cleanID($id); 460 $id = str_replace(':', '/', $id); 461 if (empty($rev)) { 462 $fn = $conf['mediadir'] . '/' . utf8_encodeFN($id); 463 } else { 464 $ext = mimetype($id); 465 $name = substr($id, 0, -1 * strlen($ext[0]) - 1); 466 $fn = $conf['mediaolddir'] . '/' . utf8_encodeFN($name . '.' . ( (int) $rev ) . '.' . $ext[0]); 467 } 468 return $fn; 469} 470 471/** 472 * Returns the full filepath to a localized file if local 473 * version isn't found the english one is returned 474 * 475 * @param string $id The id of the local file 476 * @param string $ext The file extension (usually txt) 477 * @return string full filepath to localized file 478 * 479 * @author Andreas Gohr <andi@splitbrain.org> 480 */ 481function localeFN($id, $ext = 'txt') 482{ 483 global $conf; 484 $file = DOKU_CONF . 'lang/' . $conf['lang'] . '/' . $id . '.' . $ext; 485 if (!file_exists($file)) { 486 $file = DOKU_INC . 'inc/lang/' . $conf['lang'] . '/' . $id . '.' . $ext; 487 if (!file_exists($file)) { 488 //fall back to english 489 $file = DOKU_INC . 'inc/lang/en/' . $id . '.' . $ext; 490 } 491 } 492 return $file; 493} 494 495/** 496 * Resolve relative paths in IDs 497 * 498 * Do not call directly use resolve_mediaid or resolve_pageid 499 * instead 500 * 501 * Partyly based on a cleanPath function found at 502 * http://php.net/manual/en/function.realpath.php#57016 503 * 504 * @deprecated 2020-09-30 505 * @param string $ns namespace which is context of id 506 * @param string $id relative id 507 * @param bool $clean flag indicating that id should be cleaned 508 * @return string 509 */ 510function resolve_id($ns, $id, $clean = true) 511{ 512 global $conf; 513 dbg_deprecated(Resolver::class . ' and its children'); 514 515 // some pre cleaning for useslash: 516 if ($conf['useslash']) $id = str_replace('/', ':', $id); 517 518 // if the id starts with a dot we need to handle the 519 // relative stuff 520 if ($id && $id[0] == '.') { 521 // normalize initial dots without a colon 522 $id = preg_replace('/^((\.+:)*)(\.+)(?=[^:\.])/', '\1\3:', $id); 523 // prepend the current namespace 524 $id = $ns . ':' . $id; 525 526 // cleanup relatives 527 $result = []; 528 $pathA = explode(':', $id); 529 if (!$pathA[0]) $result[] = ''; 530 foreach ($pathA as $dir) { 531 if ($dir == '..') { 532 if (end($result) == '..') { 533 $result[] = '..'; 534 } elseif (!array_pop($result)) { 535 $result[] = '..'; 536 } 537 } elseif ($dir && $dir != '.') { 538 $result[] = $dir; 539 } 540 } 541 if (!end($pathA)) $result[] = ''; 542 $id = implode(':', $result); 543 } elseif ($ns !== false && strpos($id, ':') === false) { 544 //if link contains no namespace. add current namespace (if any) 545 $id = $ns . ':' . $id; 546 } 547 548 if ($clean) $id = cleanID($id); 549 return $id; 550} 551 552/** 553 * Returns a full media id 554 * 555 * @param string $ns namespace which is context of id 556 * @param string &$media (reference) relative media id, updated to resolved id 557 * @param bool &$exists (reference) updated with existance of media 558 * @param int|string $rev 559 * @param bool $date_at 560 * @deprecated 2020-09-30 561 */ 562function resolve_mediaid($ns, &$media, &$exists, $rev = '', $date_at = false) 563{ 564 dbg_deprecated(MediaResolver::class); 565 $resolver = new MediaResolver("$ns:deprecated"); 566 $media = $resolver->resolveId($media, $rev, $date_at); 567 $exists = media_exists($media, $rev, false, $date_at); 568} 569 570/** 571 * Returns a full page id 572 * 573 * @deprecated 2020-09-30 574 * @param string $ns namespace which is context of id 575 * @param string &$page (reference) relative page id, updated to resolved id 576 * @param bool &$exists (reference) updated with existance of media 577 * @param string $rev 578 * @param bool $date_at 579 */ 580function resolve_pageid($ns, &$page, &$exists, $rev = '', $date_at = false) 581{ 582 dbg_deprecated(PageResolver::class); 583 584 global $ID; 585 if (getNS($ID) == $ns) { 586 $context = $ID; // this is usually the case 587 } else { 588 $context = "$ns:deprecated"; // only used when a different context namespace was given 589 } 590 591 $resolver = new PageResolver($context); 592 $page = $resolver->resolveId($page, $rev, $date_at); 593 $exists = page_exists($page, $rev, false, $date_at); 594} 595 596/** 597 * Returns the name of a cachefile from given data 598 * 599 * The needed directory is created by this function! 600 * 601 * @author Andreas Gohr <andi@splitbrain.org> 602 * 603 * @param string $data This data is used to create a unique md5 name 604 * @param string $ext This is appended to the filename if given 605 * @return string The filename of the cachefile 606 */ 607function getCacheName($data, $ext = '') 608{ 609 global $conf; 610 $md5 = md5($data); 611 $file = $conf['cachedir'] . '/' . $md5[0] . '/' . $md5 . $ext; 612 io_makeFileDir($file); 613 return $file; 614} 615 616/** 617 * Checks a pageid against $conf['hidepages'] 618 * 619 * @author Andreas Gohr <gohr@cosmocode.de> 620 * 621 * @param string $id page id 622 * @return bool 623 */ 624function isHiddenPage($id) 625{ 626 $data = ['id' => $id, 'hidden' => false]; 627 Event::createAndTrigger('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage'); 628 return $data['hidden']; 629} 630 631/** 632 * callback checks if page is hidden 633 * 634 * @param array $data event data - see isHiddenPage() 635 */ 636function _isHiddenPage(&$data) 637{ 638 global $conf; 639 global $ACT; 640 641 if ($data['hidden']) return; 642 if (empty($conf['hidepages'])) return; 643 if ($ACT == 'admin') return; 644 645 if (preg_match('/' . $conf['hidepages'] . '/ui', ':' . $data['id'])) { 646 $data['hidden'] = true; 647 } 648} 649 650/** 651 * Reverse of isHiddenPage 652 * 653 * @author Andreas Gohr <gohr@cosmocode.de> 654 * 655 * @param string $id page id 656 * @return bool 657 */ 658function isVisiblePage($id) 659{ 660 return !isHiddenPage($id); 661} 662 663/** 664 * Format an id for output to a user 665 * 666 * Namespaces are denoted by a trailing “:*”. The root namespace is 667 * “*”. Output is escaped. 668 * 669 * @author Adrian Lang <lang@cosmocode.de> 670 * 671 * @param string $id page id 672 * @return string 673 */ 674function prettyprint_id($id) 675{ 676 if (!$id || $id === ':') { 677 return '*'; 678 } 679 if (str_ends_with($id, ':')) { 680 $id .= '*'; 681 } 682 return hsc($id); 683} 684 685/** 686 * Encode a UTF-8 filename to use on any filesystem 687 * 688 * Uses the 'fnencode' option to determine encoding 689 * 690 * When the second parameter is true the string will 691 * be encoded only if non ASCII characters are detected - 692 * This makes it safe to run it multiple times on the 693 * same string (default is true) 694 * 695 * @author Andreas Gohr <andi@splitbrain.org> 696 * @see urlencode 697 * 698 * @param string $file file name 699 * @param bool $safe if true, only encoded when non ASCII characters detected 700 * @return string 701 */ 702function utf8_encodeFN($file, $safe = true) 703{ 704 global $conf; 705 if ($conf['fnencode'] == 'utf-8') return $file; 706 707 if ($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#', $file)) { 708 return $file; 709 } 710 711 if ($conf['fnencode'] == 'safe') { 712 return SafeFN::encode($file); 713 } 714 715 $file = urlencode($file); 716 $file = str_replace('%2F', '/', $file); 717 return $file; 718} 719 720/** 721 * Decode a filename back to UTF-8 722 * 723 * Uses the 'fnencode' option to determine encoding 724 * 725 * @author Andreas Gohr <andi@splitbrain.org> 726 * @see urldecode 727 * 728 * @param string $file file name 729 * @return string 730 */ 731function utf8_decodeFN($file) 732{ 733 global $conf; 734 if ($conf['fnencode'] == 'utf-8') return $file; 735 736 if ($conf['fnencode'] == 'safe') { 737 return SafeFN::decode($file); 738 } 739 740 return urldecode($file); 741} 742 743/** 744 * Find a page in the current namespace (determined from $ID) or any 745 * higher namespace that can be accessed by the current user, 746 * this condition can be overriden by an optional parameter. 747 * 748 * Used for sidebars, but can be used other stuff as well 749 * 750 * @todo add event hook 751 * 752 * @param string $page the pagename you're looking for 753 * @param bool $useacl only return pages readable by the current user, false to ignore ACLs 754 * @return false|string the full page id of the found page, false if any 755 */ 756function page_findnearest($page, $useacl = true) 757{ 758 if ((string) $page === '') return false; 759 global $ID; 760 761 $ns = $ID; 762 do { 763 $ns = getNS($ns); 764 $pageid = cleanID("$ns:$page"); 765 if (page_exists($pageid) && (!$useacl || auth_quickaclcheck($pageid) >= AUTH_READ)) { 766 return $pageid; 767 } 768 } while ($ns !== false); 769 770 return false; 771} 772