1<?php 2 3/** 4 * Utilities for handling pagenames 5 * 6 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 7 * @author Andreas Gohr <andi@splitbrain.org> 8 * @todo Combine similar functions like {wiki,media,meta}FN() 9 */ 10use dokuwiki\Utf8\PhpString; 11use dokuwiki\Utf8\Clean; 12use dokuwiki\File\Resolver; 13use dokuwiki\Extension\Event; 14use dokuwiki\ChangeLog\MediaChangeLog; 15use dokuwiki\ChangeLog\PageChangeLog; 16use dokuwiki\File\MediaResolver; 17use dokuwiki\File\PageResolver; 18 19/** 20 * Fetch the an ID from request 21 * 22 * Uses either standard $_REQUEST variable or extracts it from 23 * the full request URI when userewrite is set to 2 24 * 25 * For $param='id' $conf['start'] is returned if no id was found. 26 * If the second parameter is true (default) the ID is cleaned. 27 * 28 * @author Andreas Gohr <andi@splitbrain.org> 29 * 30 * @param string $param the $_REQUEST variable name, default 'id' 31 * @param bool $clean if true, ID is cleaned 32 * @return string 33 */ 34function getID($param = 'id', $clean = true) 35{ 36 /** @var Input $INPUT */ 37 global $INPUT; 38 global $conf; 39 global $ACT; 40 41 $id = $INPUT->str($param); 42 43 //construct page id from request URI 44 if (empty($id) && $conf['userewrite'] == 2) { 45 $request = $INPUT->server->str('REQUEST_URI'); 46 $script = ''; 47 48 //get the script URL 49 if ($conf['basedir']) { 50 $relpath = ''; 51 if ($param != 'id') { 52 $relpath = 'lib/exe/'; 53 } 54 $script = $conf['basedir'] . $relpath . 55 PhpString::basename($INPUT->server->str('SCRIPT_FILENAME')); 56 } elseif ($INPUT->server->str('PATH_INFO')) { 57 $request = $INPUT->server->str('PATH_INFO'); 58 } elseif ($INPUT->server->str('SCRIPT_NAME')) { 59 $script = $INPUT->server->str('SCRIPT_NAME'); 60 } elseif ($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')) { 61 $script = preg_replace( 62 '/^' . preg_quote($INPUT->server->str('DOCUMENT_ROOT'), '/') . '/', 63 '', 64 $INPUT->server->str('SCRIPT_FILENAME') 65 ); 66 $script = '/' . $script; 67 } 68 69 //clean script and request (fixes a windows problem) 70 $script = preg_replace('/\/\/+/', '/', $script); 71 $request = preg_replace('/\/\/+/', '/', $request); 72 73 //remove script URL and Querystring to gain the id 74 if (preg_match('/^' . preg_quote($script, '/') . '(.*)/', $request, $match)) { 75 $id = preg_replace('/\?.*/', '', $match[1]); 76 } 77 $id = urldecode($id); 78 //strip leading slashes 79 $id = preg_replace('!^/+!', '', $id); 80 } 81 82 // Namespace autolinking from URL 83 if (substr($id, -1) == ':' || ($conf['useslash'] && substr($id, -1) == '/')) { 84 if (page_exists($id . $conf['start'])) { 85 // start page inside namespace 86 $id .= $conf['start']; 87 } elseif (page_exists($id . noNS(cleanID($id)))) { 88 // page named like the NS inside the NS 89 $id .= noNS(cleanID($id)); 90 } elseif (page_exists($id)) { 91 // page like namespace exists 92 $id = substr($id, 0, -1); 93 } else { 94 // fall back to default 95 $id .= $conf['start']; 96 } 97 if (isset($ACT) && $ACT === 'show') { 98 $urlParameters = $_GET; 99 if (isset($urlParameters['id'])) { 100 unset($urlParameters['id']); 101 } 102 send_redirect(wl($id, $urlParameters, true, '&')); 103 } 104 } 105 if ($clean) $id = cleanID($id); 106 if ($id === '' && $param == 'id') $id = $conf['start']; 107 108 return $id; 109} 110 111/** 112 * Remove unwanted chars from ID 113 * 114 * Cleans a given ID to only use allowed characters. Accented characters are 115 * converted to unaccented ones 116 * 117 * @author Andreas Gohr <andi@splitbrain.org> 118 * 119 * @param string $raw_id The pageid to clean 120 * @param boolean $ascii Force ASCII 121 * @return string cleaned id 122 */ 123function cleanID($raw_id, $ascii = false) 124{ 125 global $conf; 126 static $sepcharpat = null; 127 128 global $cache_cleanid; 129 $cache = & $cache_cleanid; 130 131 // check if it's already in the memory cache 132 if (!$ascii && isset($cache[(string)$raw_id])) { 133 return $cache[(string)$raw_id]; 134 } 135 136 $sepchar = $conf['sepchar']; 137 if ($sepcharpat == null) // build string only once to save clock cycles 138 $sepcharpat = '#\\' . $sepchar . '+#'; 139 140 $id = trim((string)$raw_id); 141 $id = PhpString::strtolower($id); 142 143 //alternative namespace seperator 144 if ($conf['useslash']) { 145 $id = strtr($id, ';/', '::'); 146 } else { 147 $id = strtr($id, ';/', ':' . $sepchar); 148 } 149 150 if ($conf['deaccent'] == 2 || $ascii) $id = Clean::romanize($id); 151 if ($conf['deaccent'] || $ascii) $id = Clean::deaccent($id, -1); 152 153 //remove specials 154 $id = Clean::stripspecials($id, $sepchar, '\*'); 155 156 if ($ascii) $id = Clean::strip($id); 157 158 //clean up 159 $id = preg_replace($sepcharpat, $sepchar, $id); 160 $id = preg_replace('#:+#', ':', $id); 161 $id = trim($id, ':._-'); 162 $id = preg_replace('#:[:\._\-]+#', ':', $id); 163 $id = preg_replace('#[:\._\-]+:#', ':', $id); 164 165 if (!$ascii) $cache[(string)$raw_id] = $id; 166 return($id); 167} 168 169/** 170 * Return namespacepart of a wiki ID 171 * 172 * @author Andreas Gohr <andi@splitbrain.org> 173 * 174 * @param string $id 175 * @return string|false the namespace part or false if the given ID has no namespace (root) 176 */ 177function getNS($id) 178{ 179 $pos = strrpos((string)$id, ':'); 180 if ($pos !== false) { 181 return substr((string)$id, 0, $pos); 182 } 183 return false; 184} 185 186/** 187 * Returns the ID without the namespace 188 * 189 * @author Andreas Gohr <andi@splitbrain.org> 190 * 191 * @param string $id 192 * @return string 193 */ 194function noNS($id) 195{ 196 $pos = strrpos($id, ':'); 197 if ($pos !== false) { 198 return substr($id, $pos + 1); 199 } else { 200 return $id; 201 } 202} 203 204/** 205 * Returns the current namespace 206 * 207 * @author Nathan Fritz <fritzn@crown.edu> 208 * 209 * @param string $id 210 * @return string 211 */ 212function curNS($id) 213{ 214 return noNS(getNS($id)); 215} 216 217/** 218 * Returns the ID without the namespace or current namespace for 'start' pages 219 * 220 * @author Nathan Fritz <fritzn@crown.edu> 221 * 222 * @param string $id 223 * @return string 224 */ 225function noNSorNS($id) 226{ 227 global $conf; 228 229 $p = noNS($id); 230 if ($p === $conf['start'] || $p === false || $p === '') { 231 $p = curNS($id); 232 if ($p === false || $p === '') { 233 return $conf['start']; 234 } 235 } 236 return $p; 237} 238 239/** 240 * Creates a XHTML valid linkid from a given headline title 241 * 242 * @param string $title The headline title 243 * @param array|bool $check Existing IDs 244 * @return string the title 245 * 246 * @author Andreas Gohr <andi@splitbrain.org> 247 */ 248function sectionID($title, &$check) 249{ 250 $title = str_replace([':', '.'], '', cleanID($title)); 251 $new = ltrim($title, '0123456789_-'); 252 if (empty($new)) { 253 $title = 'section' . preg_replace('/[^0-9]+/', '', $title); //keep numbers from headline 254 } else { 255 $title = $new; 256 } 257 258 if (is_array($check)) { 259 $suffix = 0; 260 $candidateTitle = $title; 261 while (in_array($candidateTitle, $check)) { 262 $candidateTitle = $title . ++$suffix; 263 } 264 $check [] = $candidateTitle; 265 return $candidateTitle; 266 } else { 267 return $title; 268 } 269} 270 271/** 272 * Wiki page existence check 273 * 274 * parameters as for wikiFN 275 * 276 * @author Chris Smith <chris@jalakai.co.uk> 277 * 278 * @param string $id page id 279 * @param string|int $rev empty or revision timestamp 280 * @param bool $clean flag indicating that $id should be cleaned (see wikiFN as well) 281 * @param bool $date_at 282 * @return bool exists? 283 */ 284function page_exists($id, $rev = '', $clean = true, $date_at = false) 285{ 286 $id = (explode('#', $id, 2))[0]; // #3608 287 288 if ($rev !== '' && $date_at) { 289 $pagelog = new PageChangeLog($id); 290 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 291 if ($pagelog_rev !== false) 292 $rev = $pagelog_rev; 293 } 294 return file_exists(wikiFN($id, $rev, $clean)); 295} 296 297/** 298 * Media existence check 299 * 300 * @param string $id page id 301 * @param string|int $rev empty or revision timestamp 302 * @param bool $clean flag indicating that $id should be cleaned (see mediaFN as well) 303 * @param bool $date_at 304 * @return bool exists? 305 */ 306function media_exists($id, $rev = '', $clean = true, $date_at = false) 307{ 308 if ($rev !== '' && $date_at) { 309 $changeLog = new MediaChangeLog($id); 310 $changelog_rev = $changeLog->getLastRevisionAt($rev); 311 if ($changelog_rev !== false) { 312 $rev = $changelog_rev; 313 } 314 } 315 return file_exists(mediaFN($id, $rev, $clean)); 316} 317 318/** 319 * returns the full path to the datafile specified by ID and optional revision 320 * 321 * The filename is URL encoded to protect Unicode chars 322 * 323 * @param $raw_id string id of wikipage 324 * @param $rev int|string page revision, empty string for current 325 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 326 * when $id is guaranteed to have been cleaned already. 327 * @return string full path 328 * 329 * @author Andreas Gohr <andi@splitbrain.org> 330 */ 331function wikiFN($raw_id, $rev = '', $clean = true) 332{ 333 global $conf; 334 335 global $cache_wikifn; 336 $cache = & $cache_wikifn; 337 338 $id = $raw_id; 339 340 if ($clean) $id = cleanID($id); 341 $id = str_replace(':', '/', $id); 342 343 if (isset($cache[$id]) && isset($cache[$id][$rev])) { 344 return $cache[$id][$rev]; 345 } 346 347 if (empty($rev)) { 348 $fn = $conf['datadir'] . '/' . utf8_encodeFN($id) . '.txt'; 349 } else { 350 $fn = $conf['olddir'] . '/' . utf8_encodeFN($id) . '.' . $rev . '.txt'; 351 if ($conf['compression']) { 352 //test for extensions here, we want to read both compressions 353 if (file_exists($fn . '.gz')) { 354 $fn .= '.gz'; 355 } elseif (file_exists($fn . '.bz2')) { 356 $fn .= '.bz2'; 357 } else { 358 //file doesnt exist yet, so we take the configured extension 359 $fn .= '.' . $conf['compression']; 360 } 361 } 362 } 363 364 if (!isset($cache[$id])) { 365 $cache[$id] = []; 366 } 367 $cache[$id][$rev] = $fn; 368 return $fn; 369} 370 371/** 372 * Returns the full path to the file for locking the page while editing. 373 * 374 * @author Ben Coburn <btcoburn@silicodon.net> 375 * 376 * @param string $id page id 377 * @return string full path 378 */ 379function wikiLockFN($id) 380{ 381 global $conf; 382 return $conf['lockdir'] . '/' . md5(cleanID($id)) . '.lock'; 383} 384 385 386/** 387 * returns the full path to the meta file specified by ID and extension 388 * 389 * @author Steven Danz <steven-danz@kc.rr.com> 390 * 391 * @param string $id page id 392 * @param string $ext file extension 393 * @return string full path 394 */ 395function metaFN($id, $ext) 396{ 397 global $conf; 398 $id = cleanID($id); 399 $id = str_replace(':', '/', $id); 400 401 $fn = $conf['metadir'] . '/' . utf8_encodeFN($id) . $ext; 402 return $fn; 403} 404 405/** 406 * returns the full path to the media's meta file specified by ID and extension 407 * 408 * @author Kate Arzamastseva <pshns@ukr.net> 409 * 410 * @param string $id media id 411 * @param string $ext extension of media 412 * @return string 413 */ 414function mediaMetaFN($id, $ext) 415{ 416 global $conf; 417 $id = cleanID($id); 418 $id = str_replace(':', '/', $id); 419 420 $fn = $conf['mediametadir'] . '/' . utf8_encodeFN($id) . $ext; 421 return $fn; 422} 423 424/** 425 * returns an array of full paths to all metafiles of a given ID 426 * 427 * @author Esther Brunner <esther@kaffeehaus.ch> 428 * @author Michael Hamann <michael@content-space.de> 429 * 430 * @param string $id page id 431 * @return array 432 */ 433function metaFiles($id) 434{ 435 $basename = metaFN($id, ''); 436 $files = glob($basename . '.*', GLOB_MARK); 437 // filter files like foo.bar.meta when $id == 'foo' 438 return $files ? preg_grep('/^' . preg_quote($basename, '/') . '\.[^.\/]*$/u', $files) : []; 439} 440 441/** 442 * returns the full path to the mediafile specified by ID 443 * 444 * The filename is URL encoded to protect Unicode chars 445 * 446 * @author Andreas Gohr <andi@splitbrain.org> 447 * @author Kate Arzamastseva <pshns@ukr.net> 448 * 449 * @param string $id media id 450 * @param string|int $rev empty string or revision timestamp 451 * @param bool $clean 452 * 453 * @return string full path 454 */ 455function mediaFN($id, $rev = '', $clean = true) 456{ 457 global $conf; 458 if ($clean) $id = cleanID($id); 459 $id = str_replace(':', '/', $id); 460 if (empty($rev)) { 461 $fn = $conf['mediadir'] . '/' . utf8_encodeFN($id); 462 } else { 463 $ext = mimetype($id); 464 $name = substr($id, 0, -1 * strlen($ext[0]) - 1); 465 $fn = $conf['mediaolddir'] . '/' . utf8_encodeFN($name . '.' . ( (int) $rev ) . '.' . $ext[0]); 466 } 467 return $fn; 468} 469 470/** 471 * Returns the full filepath to a localized file if local 472 * version isn't found the english one is returned 473 * 474 * @param string $id The id of the local file 475 * @param string $ext The file extension (usually txt) 476 * @return string full filepath to localized file 477 * 478 * @author Andreas Gohr <andi@splitbrain.org> 479 */ 480function localeFN($id, $ext = 'txt') 481{ 482 global $conf; 483 $file = DOKU_CONF . 'lang/' . $conf['lang'] . '/' . $id . '.' . $ext; 484 if (!file_exists($file)) { 485 $file = DOKU_INC . 'inc/lang/' . $conf['lang'] . '/' . $id . '.' . $ext; 486 if (!file_exists($file)) { 487 //fall back to english 488 $file = DOKU_INC . 'inc/lang/en/' . $id . '.' . $ext; 489 } 490 } 491 return $file; 492} 493 494/** 495 * Resolve relative paths in IDs 496 * 497 * Do not call directly use resolve_mediaid or resolve_pageid 498 * instead 499 * 500 * Partyly based on a cleanPath function found at 501 * http://php.net/manual/en/function.realpath.php#57016 502 * 503 * @deprecated 2020-09-30 504 * @param string $ns namespace which is context of id 505 * @param string $id relative id 506 * @param bool $clean flag indicating that id should be cleaned 507 * @return string 508 */ 509function resolve_id($ns, $id, $clean = true) 510{ 511 global $conf; 512 dbg_deprecated(Resolver::class . ' and its children'); 513 514 // some pre cleaning for useslash: 515 if ($conf['useslash']) $id = str_replace('/', ':', $id); 516 517 // if the id starts with a dot we need to handle the 518 // relative stuff 519 if ($id && $id[0] == '.') { 520 // normalize initial dots without a colon 521 $id = preg_replace('/^((\.+:)*)(\.+)(?=[^:\.])/', '\1\3:', $id); 522 // prepend the current namespace 523 $id = $ns . ':' . $id; 524 525 // cleanup relatives 526 $result = []; 527 $pathA = explode(':', $id); 528 if (!$pathA[0]) $result[] = ''; 529 foreach ($pathA as $dir) { 530 if ($dir == '..') { 531 if (end($result) == '..') { 532 $result[] = '..'; 533 } elseif (!array_pop($result)) { 534 $result[] = '..'; 535 } 536 } elseif ($dir && $dir != '.') { 537 $result[] = $dir; 538 } 539 } 540 if (!end($pathA)) $result[] = ''; 541 $id = implode(':', $result); 542 } elseif ($ns !== false && strpos($id, ':') === false) { 543 //if link contains no namespace. add current namespace (if any) 544 $id = $ns . ':' . $id; 545 } 546 547 if ($clean) $id = cleanID($id); 548 return $id; 549} 550 551/** 552 * Returns a full media id 553 * 554 * @param string $ns namespace which is context of id 555 * @param string &$media (reference) relative media id, updated to resolved id 556 * @param bool &$exists (reference) updated with existance of media 557 * @param int|string $rev 558 * @param bool $date_at 559 * @deprecated 2020-09-30 560 */ 561function resolve_mediaid($ns, &$media, &$exists, $rev = '', $date_at = false) 562{ 563 dbg_deprecated(MediaResolver::class); 564 $resolver = new MediaResolver("$ns:deprecated"); 565 $media = $resolver->resolveId($media, $rev, $date_at); 566 $exists = media_exists($media, $rev, false, $date_at); 567} 568 569/** 570 * Returns a full page id 571 * 572 * @deprecated 2020-09-30 573 * @param string $ns namespace which is context of id 574 * @param string &$page (reference) relative page id, updated to resolved id 575 * @param bool &$exists (reference) updated with existance of media 576 * @param string $rev 577 * @param bool $date_at 578 */ 579function resolve_pageid($ns, &$page, &$exists, $rev = '', $date_at = false) 580{ 581 dbg_deprecated(PageResolver::class); 582 583 global $ID; 584 if (getNS($ID) == $ns) { 585 $context = $ID; // this is usually the case 586 } else { 587 $context = "$ns:deprecated"; // only used when a different context namespace was given 588 } 589 590 $resolver = new PageResolver($context); 591 $page = $resolver->resolveId($page, $rev, $date_at); 592 $exists = page_exists($page, $rev, false, $date_at); 593} 594 595/** 596 * Returns the name of a cachefile from given data 597 * 598 * The needed directory is created by this function! 599 * 600 * @author Andreas Gohr <andi@splitbrain.org> 601 * 602 * @param string $data This data is used to create a unique md5 name 603 * @param string $ext This is appended to the filename if given 604 * @return string The filename of the cachefile 605 */ 606function getCacheName($data, $ext = '') 607{ 608 global $conf; 609 $md5 = md5($data); 610 $file = $conf['cachedir'] . '/' . $md5[0] . '/' . $md5 . $ext; 611 io_makeFileDir($file); 612 return $file; 613} 614 615/** 616 * Checks a pageid against $conf['hidepages'] 617 * 618 * @author Andreas Gohr <gohr@cosmocode.de> 619 * 620 * @param string $id page id 621 * @return bool 622 */ 623function isHiddenPage($id) 624{ 625 $data = ['id' => $id, 'hidden' => false]; 626 Event::createAndTrigger('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage'); 627 return $data['hidden']; 628} 629 630/** 631 * callback checks if page is hidden 632 * 633 * @param array $data event data - see isHiddenPage() 634 */ 635function _isHiddenPage(&$data) 636{ 637 global $conf; 638 global $ACT; 639 640 if ($data['hidden']) return; 641 if (empty($conf['hidepages'])) return; 642 if ($ACT == 'admin') return; 643 644 if (preg_match('/' . $conf['hidepages'] . '/ui', ':' . $data['id'])) { 645 $data['hidden'] = true; 646 } 647} 648 649/** 650 * Reverse of isHiddenPage 651 * 652 * @author Andreas Gohr <gohr@cosmocode.de> 653 * 654 * @param string $id page id 655 * @return bool 656 */ 657function isVisiblePage($id) 658{ 659 return !isHiddenPage($id); 660} 661 662/** 663 * Format an id for output to a user 664 * 665 * Namespaces are denoted by a trailing “:*”. The root namespace is 666 * “*”. Output is escaped. 667 * 668 * @author Adrian Lang <lang@cosmocode.de> 669 * 670 * @param string $id page id 671 * @return string 672 */ 673function prettyprint_id($id) 674{ 675 if (!$id || $id === ':') { 676 return '*'; 677 } 678 if ((substr($id, -1, 1) === ':')) { 679 $id .= '*'; 680 } 681 return hsc($id); 682} 683 684/** 685 * Encode a UTF-8 filename to use on any filesystem 686 * 687 * Uses the 'fnencode' option to determine encoding 688 * 689 * When the second parameter is true the string will 690 * be encoded only if non ASCII characters are detected - 691 * This makes it safe to run it multiple times on the 692 * same string (default is true) 693 * 694 * @author Andreas Gohr <andi@splitbrain.org> 695 * @see urlencode 696 * 697 * @param string $file file name 698 * @param bool $safe if true, only encoded when non ASCII characters detected 699 * @return string 700 */ 701function utf8_encodeFN($file, $safe = true) 702{ 703 global $conf; 704 if ($conf['fnencode'] == 'utf-8') return $file; 705 706 if ($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#', $file)) { 707 return $file; 708 } 709 710 if ($conf['fnencode'] == 'safe') { 711 return SafeFN::encode($file); 712 } 713 714 $file = urlencode($file); 715 $file = str_replace('%2F', '/', $file); 716 return $file; 717} 718 719/** 720 * Decode a filename back to UTF-8 721 * 722 * Uses the 'fnencode' option to determine encoding 723 * 724 * @author Andreas Gohr <andi@splitbrain.org> 725 * @see urldecode 726 * 727 * @param string $file file name 728 * @return string 729 */ 730function utf8_decodeFN($file) 731{ 732 global $conf; 733 if ($conf['fnencode'] == 'utf-8') return $file; 734 735 if ($conf['fnencode'] == 'safe') { 736 return SafeFN::decode($file); 737 } 738 739 return urldecode($file); 740} 741 742/** 743 * Find a page in the current namespace (determined from $ID) or any 744 * higher namespace that can be accessed by the current user, 745 * this condition can be overriden by an optional parameter. 746 * 747 * Used for sidebars, but can be used other stuff as well 748 * 749 * @todo add event hook 750 * 751 * @param string $page the pagename you're looking for 752 * @param bool $useacl only return pages readable by the current user, false to ignore ACLs 753 * @return false|string the full page id of the found page, false if any 754 */ 755function page_findnearest($page, $useacl = true) 756{ 757 if ((string) $page === '') return false; 758 global $ID; 759 760 $ns = $ID; 761 do { 762 $ns = getNS($ns); 763 $pageid = cleanID("$ns:$page"); 764 if (page_exists($pageid) && (!$useacl || auth_quickaclcheck($pageid) >= AUTH_READ)) { 765 return $pageid; 766 } 767 } while ($ns !== false); 768 769 return false; 770} 771