1<?php 2 3/** 4 * Utilities for handling pagenames 5 * 6 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 7 * @author Andreas Gohr <andi@splitbrain.org> 8 * @todo Combine similar functions like {wiki,media,meta}FN() 9 */ 10 11use dokuwiki\ChangeLog\MediaChangeLog; 12use dokuwiki\ChangeLog\PageChangeLog; 13use dokuwiki\File\MediaResolver; 14use dokuwiki\File\PageResolver; 15 16/** 17 * Fetch the an ID from request 18 * 19 * Uses either standard $_REQUEST variable or extracts it from 20 * the full request URI when userewrite is set to 2 21 * 22 * For $param='id' $conf['start'] is returned if no id was found. 23 * If the second parameter is true (default) the ID is cleaned. 24 * 25 * @author Andreas Gohr <andi@splitbrain.org> 26 * 27 * @param string $param the $_REQUEST variable name, default 'id' 28 * @param bool $clean if true, ID is cleaned 29 * @return string 30 */ 31function getID($param = 'id', $clean = true) 32{ 33 /** @var Input $INPUT */ 34 global $INPUT; 35 global $conf; 36 global $ACT; 37 38 $id = $INPUT->str($param); 39 40 //construct page id from request URI 41 if (empty($id) && $conf['userewrite'] == 2) { 42 $request = $INPUT->server->str('REQUEST_URI'); 43 $script = ''; 44 45 //get the script URL 46 if ($conf['basedir']) { 47 $relpath = ''; 48 if ($param != 'id') { 49 $relpath = 'lib/exe/'; 50 } 51 $script = $conf['basedir'] . $relpath . 52 \dokuwiki\Utf8\PhpString::basename($INPUT->server->str('SCRIPT_FILENAME')); 53 } elseif ($INPUT->server->str('PATH_INFO')) { 54 $request = $INPUT->server->str('PATH_INFO'); 55 } elseif ($INPUT->server->str('SCRIPT_NAME')) { 56 $script = $INPUT->server->str('SCRIPT_NAME'); 57 } elseif ($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')) { 58 $script = preg_replace( 59 '/^' . preg_quote($INPUT->server->str('DOCUMENT_ROOT'), '/') . '/', 60 '', 61 $INPUT->server->str('SCRIPT_FILENAME') 62 ); 63 $script = '/' . $script; 64 } 65 66 //clean script and request (fixes a windows problem) 67 $script = preg_replace('/\/\/+/', '/', $script); 68 $request = preg_replace('/\/\/+/', '/', $request); 69 70 //remove script URL and Querystring to gain the id 71 if (preg_match('/^' . preg_quote($script, '/') . '(.*)/', $request, $match)) { 72 $id = preg_replace('/\?.*/', '', $match[1]); 73 } 74 $id = urldecode($id); 75 //strip leading slashes 76 $id = preg_replace('!^/+!', '', $id); 77 } 78 79 // Namespace autolinking from URL 80 if (substr($id, -1) == ':' || ($conf['useslash'] && substr($id, -1) == '/')) { 81 if (page_exists($id . $conf['start'])) { 82 // start page inside namespace 83 $id = $id . $conf['start']; 84 } elseif (page_exists($id . noNS(cleanID($id)))) { 85 // page named like the NS inside the NS 86 $id = $id . noNS(cleanID($id)); 87 } elseif (page_exists($id)) { 88 // page like namespace exists 89 $id = substr($id, 0, -1); 90 } else { 91 // fall back to default 92 $id = $id . $conf['start']; 93 } 94 if (isset($ACT) && $ACT === 'show') { 95 $urlParameters = $_GET; 96 if (isset($urlParameters['id'])) { 97 unset($urlParameters['id']); 98 } 99 send_redirect(wl($id, $urlParameters, true, '&')); 100 } 101 } 102 if ($clean) $id = cleanID($id); 103 if ($id === '' && $param == 'id') $id = $conf['start']; 104 105 return $id; 106} 107 108/** 109 * Remove unwanted chars from ID 110 * 111 * Cleans a given ID to only use allowed characters. Accented characters are 112 * converted to unaccented ones 113 * 114 * @author Andreas Gohr <andi@splitbrain.org> 115 * 116 * @param string $raw_id The pageid to clean 117 * @param boolean $ascii Force ASCII 118 * @return string cleaned id 119 */ 120function cleanID($raw_id, $ascii = false) 121{ 122 global $conf; 123 static $sepcharpat = null; 124 125 global $cache_cleanid; 126 $cache = & $cache_cleanid; 127 128 // check if it's already in the memory cache 129 if (!$ascii && isset($cache[(string)$raw_id])) { 130 return $cache[(string)$raw_id]; 131 } 132 133 $sepchar = $conf['sepchar']; 134 if ($sepcharpat == null) // build string only once to save clock cycles 135 $sepcharpat = '#\\' . $sepchar . '+#'; 136 137 $id = trim((string)$raw_id); 138 $id = \dokuwiki\Utf8\PhpString::strtolower($id); 139 140 //alternative namespace seperator 141 if ($conf['useslash']) { 142 $id = strtr($id, ';/', '::'); 143 } else { 144 $id = strtr($id, ';/', ':' . $sepchar); 145 } 146 147 if ($conf['deaccent'] == 2 || $ascii) $id = \dokuwiki\Utf8\Clean::romanize($id); 148 if ($conf['deaccent'] || $ascii) $id = \dokuwiki\Utf8\Clean::deaccent($id, -1); 149 150 //remove specials 151 $id = \dokuwiki\Utf8\Clean::stripspecials($id, $sepchar, '\*'); 152 153 if ($ascii) $id = \dokuwiki\Utf8\Clean::strip($id); 154 155 //clean up 156 $id = preg_replace($sepcharpat, $sepchar, $id); 157 $id = preg_replace('#:+#', ':', $id); 158 $id = trim($id, ':._-'); 159 $id = preg_replace('#:[:\._\-]+#', ':', $id); 160 $id = preg_replace('#[:\._\-]+:#', ':', $id); 161 162 if (!$ascii) $cache[(string)$raw_id] = $id; 163 return($id); 164} 165 166/** 167 * Return namespacepart of a wiki ID 168 * 169 * @author Andreas Gohr <andi@splitbrain.org> 170 * 171 * @param string $id 172 * @return string|false the namespace part or false if the given ID has no namespace (root) 173 */ 174function getNS($id) 175{ 176 $pos = strrpos((string)$id, ':'); 177 if ($pos !== false) { 178 return substr((string)$id, 0, $pos); 179 } 180 return false; 181} 182 183/** 184 * Returns the ID without the namespace 185 * 186 * @author Andreas Gohr <andi@splitbrain.org> 187 * 188 * @param string $id 189 * @return string 190 */ 191function noNS($id) 192{ 193 $pos = strrpos($id, ':'); 194 if ($pos !== false) { 195 return substr($id, $pos + 1); 196 } else { 197 return $id; 198 } 199} 200 201/** 202 * Returns the current namespace 203 * 204 * @author Nathan Fritz <fritzn@crown.edu> 205 * 206 * @param string $id 207 * @return string 208 */ 209function curNS($id) 210{ 211 return noNS(getNS($id)); 212} 213 214/** 215 * Returns the ID without the namespace or current namespace for 'start' pages 216 * 217 * @author Nathan Fritz <fritzn@crown.edu> 218 * 219 * @param string $id 220 * @return string 221 */ 222function noNSorNS($id) 223{ 224 global $conf; 225 226 $p = noNS($id); 227 if ($p === $conf['start'] || $p === false || $p === '') { 228 $p = curNS($id); 229 if ($p === false || $p === '') { 230 return $conf['start']; 231 } 232 } 233 return $p; 234} 235 236/** 237 * Creates a XHTML valid linkid from a given headline title 238 * 239 * @param string $title The headline title 240 * @param array|bool $check Existing IDs 241 * @return string the title 242 * 243 * @author Andreas Gohr <andi@splitbrain.org> 244 */ 245function sectionID($title, &$check) 246{ 247 $title = str_replace(array(':','.'), '', cleanID($title)); 248 $new = ltrim($title, '0123456789_-'); 249 if (empty($new)) { 250 $title = 'section' . preg_replace('/[^0-9]+/', '', $title); //keep numbers from headline 251 } else { 252 $title = $new; 253 } 254 255 if (is_array($check)) { 256 $suffix = 0; 257 $candidateTitle = $title; 258 while (in_array($candidateTitle, $check)) { 259 $candidateTitle = $title . ++$suffix; 260 } 261 $check [] = $candidateTitle; 262 return $candidateTitle; 263 } else { 264 return $title; 265 } 266} 267 268/** 269 * Wiki page existence check 270 * 271 * parameters as for wikiFN 272 * 273 * @author Chris Smith <chris@jalakai.co.uk> 274 * 275 * @param string $id page id 276 * @param string|int $rev empty or revision timestamp 277 * @param bool $clean flag indicating that $id should be cleaned (see wikiFN as well) 278 * @param bool $date_at 279 * @return bool exists? 280 */ 281function page_exists($id, $rev = '', $clean = true, $date_at = false) 282{ 283 if ($rev !== '' && $date_at) { 284 $pagelog = new PageChangeLog($id); 285 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 286 if ($pagelog_rev !== false) 287 $rev = $pagelog_rev; 288 } 289 return file_exists(wikiFN($id, $rev, $clean)); 290} 291 292/** 293 * Media existence check 294 * 295 * @param string $id page id 296 * @param string|int $rev empty or revision timestamp 297 * @param bool $clean flag indicating that $id should be cleaned (see mediaFN as well) 298 * @param bool $date_at 299 * @return bool exists? 300 */ 301function media_exists($id, $rev = '', $clean = true, $date_at = false) 302{ 303 if ($rev !== '' && $date_at) { 304 $changeLog = new MediaChangeLog($id); 305 $changelog_rev = $changeLog->getLastRevisionAt($rev); 306 if ($changelog_rev !== false) { 307 $rev = $changelog_rev; 308 } 309 } 310 return file_exists(mediaFN($id, $rev, $clean)); 311} 312 313/** 314 * returns the full path to the datafile specified by ID and optional revision 315 * 316 * The filename is URL encoded to protect Unicode chars 317 * 318 * @param $raw_id string id of wikipage 319 * @param $rev int|string page revision, empty string for current 320 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 321 * when $id is guaranteed to have been cleaned already. 322 * @return string full path 323 * 324 * @author Andreas Gohr <andi@splitbrain.org> 325 */ 326function wikiFN($raw_id, $rev = '', $clean = true) 327{ 328 global $conf; 329 330 global $cache_wikifn; 331 $cache = & $cache_wikifn; 332 333 $id = $raw_id; 334 335 if ($clean) $id = cleanID($id); 336 $id = str_replace(':', '/', $id); 337 338 if (isset($cache[$id]) && isset($cache[$id][$rev])) { 339 return $cache[$id][$rev]; 340 } 341 342 if (empty($rev)) { 343 $fn = $conf['datadir'] . '/' . utf8_encodeFN($id) . '.txt'; 344 } else { 345 $fn = $conf['olddir'] . '/' . utf8_encodeFN($id) . '.' . $rev . '.txt'; 346 if ($conf['compression']) { 347 //test for extensions here, we want to read both compressions 348 if (file_exists($fn . '.gz')) { 349 $fn .= '.gz'; 350 } elseif (file_exists($fn . '.bz2')) { 351 $fn .= '.bz2'; 352 } else { 353 //file doesnt exist yet, so we take the configured extension 354 $fn .= '.' . $conf['compression']; 355 } 356 } 357 } 358 359 if (!isset($cache[$id])) { 360 $cache[$id] = array(); 361 } 362 $cache[$id][$rev] = $fn; 363 return $fn; 364} 365 366/** 367 * Returns the full path to the file for locking the page while editing. 368 * 369 * @author Ben Coburn <btcoburn@silicodon.net> 370 * 371 * @param string $id page id 372 * @return string full path 373 */ 374function wikiLockFN($id) 375{ 376 global $conf; 377 return $conf['lockdir'] . '/' . md5(cleanID($id)) . '.lock'; 378} 379 380 381/** 382 * returns the full path to the meta file specified by ID and extension 383 * 384 * @author Steven Danz <steven-danz@kc.rr.com> 385 * 386 * @param string $id page id 387 * @param string $ext file extension 388 * @return string full path 389 */ 390function metaFN($id, $ext) 391{ 392 global $conf; 393 $id = cleanID($id); 394 $id = str_replace(':', '/', $id); 395 $fn = $conf['metadir'] . '/' . utf8_encodeFN($id) . $ext; 396 return $fn; 397} 398 399/** 400 * returns the full path to the media's meta file specified by ID and extension 401 * 402 * @author Kate Arzamastseva <pshns@ukr.net> 403 * 404 * @param string $id media id 405 * @param string $ext extension of media 406 * @return string 407 */ 408function mediaMetaFN($id, $ext) 409{ 410 global $conf; 411 $id = cleanID($id); 412 $id = str_replace(':', '/', $id); 413 $fn = $conf['mediametadir'] . '/' . utf8_encodeFN($id) . $ext; 414 return $fn; 415} 416 417/** 418 * returns an array of full paths to all metafiles of a given ID 419 * 420 * @author Esther Brunner <esther@kaffeehaus.ch> 421 * @author Michael Hamann <michael@content-space.de> 422 * 423 * @param string $id page id 424 * @return array 425 */ 426function metaFiles($id) 427{ 428 $basename = metaFN($id, ''); 429 $files = glob($basename . '.*', GLOB_MARK); 430 // filter files like foo.bar.meta when $id == 'foo' 431 return $files ? preg_grep('/^' . preg_quote($basename, '/') . '\.[^.\/]*$/u', $files) : array(); 432} 433 434/** 435 * returns the full path to the mediafile specified by ID 436 * 437 * The filename is URL encoded to protect Unicode chars 438 * 439 * @author Andreas Gohr <andi@splitbrain.org> 440 * @author Kate Arzamastseva <pshns@ukr.net> 441 * 442 * @param string $id media id 443 * @param string|int $rev empty string or revision timestamp 444 * @param bool $clean 445 * 446 * @return string full path 447 */ 448function mediaFN($id, $rev = '', $clean = true) 449{ 450 global $conf; 451 if ($clean) $id = cleanID($id); 452 $id = str_replace(':', '/', $id); 453 if (empty($rev)) { 454 $fn = $conf['mediadir'] . '/' . utf8_encodeFN($id); 455 } else { 456 $ext = mimetype($id); 457 $name = substr($id, 0, -1 * strlen($ext[0]) - 1); 458 $fn = $conf['mediaolddir'] . '/' . utf8_encodeFN($name . '.' . ( (int) $rev ) . '.' . $ext[0]); 459 } 460 return $fn; 461} 462 463/** 464 * Returns the full filepath to a localized file if local 465 * version isn't found the english one is returned 466 * 467 * @param string $id The id of the local file 468 * @param string $ext The file extension (usually txt) 469 * @return string full filepath to localized file 470 * 471 * @author Andreas Gohr <andi@splitbrain.org> 472 */ 473function localeFN($id, $ext = 'txt') 474{ 475 global $conf; 476 $file = DOKU_CONF . 'lang/' . $conf['lang'] . '/' . $id . '.' . $ext; 477 if (!file_exists($file)) { 478 $file = DOKU_INC . 'inc/lang/' . $conf['lang'] . '/' . $id . '.' . $ext; 479 if (!file_exists($file)) { 480 //fall back to english 481 $file = DOKU_INC . 'inc/lang/en/' . $id . '.' . $ext; 482 } 483 } 484 return $file; 485} 486 487/** 488 * Resolve relative paths in IDs 489 * 490 * Do not call directly use resolve_mediaid or resolve_pageid 491 * instead 492 * 493 * Partyly based on a cleanPath function found at 494 * http://php.net/manual/en/function.realpath.php#57016 495 * 496 * @deprecated 2020-09-30 497 * @param string $ns namespace which is context of id 498 * @param string $id relative id 499 * @param bool $clean flag indicating that id should be cleaned 500 * @return string 501 */ 502function resolve_id($ns, $id, $clean = true) 503{ 504 global $conf; 505 dbg_deprecated(\dokuwiki\File\Resolver::class . ' and its children'); 506 507 // some pre cleaning for useslash: 508 if ($conf['useslash']) $id = str_replace('/', ':', $id); 509 510 // if the id starts with a dot we need to handle the 511 // relative stuff 512 if ($id && $id[0] == '.') { 513 // normalize initial dots without a colon 514 $id = preg_replace('/^((\.+:)*)(\.+)(?=[^:\.])/', '\1\3:', $id); 515 // prepend the current namespace 516 $id = $ns . ':' . $id; 517 518 // cleanup relatives 519 $result = array(); 520 $pathA = explode(':', $id); 521 if (!$pathA[0]) $result[] = ''; 522 foreach ($pathA as $key => $dir) { 523 if ($dir == '..') { 524 if (end($result) == '..') { 525 $result[] = '..'; 526 } elseif (!array_pop($result)) { 527 $result[] = '..'; 528 } 529 } elseif ($dir && $dir != '.') { 530 $result[] = $dir; 531 } 532 } 533 if (!end($pathA)) $result[] = ''; 534 $id = implode(':', $result); 535 } elseif ($ns !== false && strpos($id, ':') === false) { 536 //if link contains no namespace. add current namespace (if any) 537 $id = $ns . ':' . $id; 538 } 539 540 if ($clean) $id = cleanID($id); 541 return $id; 542} 543 544/** 545 * Returns a full media id 546 * 547 * @param string $ns namespace which is context of id 548 * @param string &$media (reference) relative media id, updated to resolved id 549 * @param bool &$exists (reference) updated with existance of media 550 * @param int|string $rev 551 * @param bool $date_at 552 * @deprecated 2020-09-30 553 */ 554function resolve_mediaid($ns, &$media, &$exists, $rev = '', $date_at = false) 555{ 556 dbg_deprecated(MediaResolver::class); 557 $resolver = new MediaResolver("$ns:deprecated"); 558 $media = $resolver->resolveId($media, $rev, $date_at); 559 $exists = media_exists($media, $rev, false, $date_at); 560} 561 562/** 563 * Returns a full page id 564 * 565 * @deprecated 2020-09-30 566 * @param string $ns namespace which is context of id 567 * @param string &$page (reference) relative page id, updated to resolved id 568 * @param bool &$exists (reference) updated with existance of media 569 * @param string $rev 570 * @param bool $date_at 571 */ 572function resolve_pageid($ns, &$page, &$exists, $rev = '', $date_at = false) 573{ 574 dbg_deprecated(PageResolver::class); 575 576 global $ID; 577 if (getNS($ID) == $ns) { 578 $context = $ID; // this is usually the case 579 } else { 580 $context = "$ns:deprecated"; // only used when a different context namespace was given 581 } 582 583 $resolver = new PageResolver($context); 584 $page = $resolver->resolveId($page, $rev, $date_at); 585 $exists = page_exists($page, $rev, false, $date_at); 586} 587 588/** 589 * Returns the name of a cachefile from given data 590 * 591 * The needed directory is created by this function! 592 * 593 * @author Andreas Gohr <andi@splitbrain.org> 594 * 595 * @param string $data This data is used to create a unique md5 name 596 * @param string $ext This is appended to the filename if given 597 * @return string The filename of the cachefile 598 */ 599function getCacheName($data, $ext = '') 600{ 601 global $conf; 602 $md5 = md5($data); 603 $file = $conf['cachedir'] . '/' . $md5[0] . '/' . $md5 . $ext; 604 io_makeFileDir($file); 605 return $file; 606} 607 608/** 609 * Checks a pageid against $conf['hidepages'] 610 * 611 * @author Andreas Gohr <gohr@cosmocode.de> 612 * 613 * @param string $id page id 614 * @return bool 615 */ 616function isHiddenPage($id) 617{ 618 $data = array( 619 'id' => $id, 620 'hidden' => false 621 ); 622 \dokuwiki\Extension\Event::createAndTrigger('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage'); 623 return $data['hidden']; 624} 625 626/** 627 * callback checks if page is hidden 628 * 629 * @param array $data event data - see isHiddenPage() 630 */ 631function _isHiddenPage(&$data) 632{ 633 global $conf; 634 global $ACT; 635 636 if ($data['hidden']) return; 637 if (empty($conf['hidepages'])) return; 638 if ($ACT == 'admin') return; 639 640 if (preg_match('/' . $conf['hidepages'] . '/ui', ':' . $data['id'])) { 641 $data['hidden'] = true; 642 } 643} 644 645/** 646 * Reverse of isHiddenPage 647 * 648 * @author Andreas Gohr <gohr@cosmocode.de> 649 * 650 * @param string $id page id 651 * @return bool 652 */ 653function isVisiblePage($id) 654{ 655 return !isHiddenPage($id); 656} 657 658/** 659 * Format an id for output to a user 660 * 661 * Namespaces are denoted by a trailing “:*”. The root namespace is 662 * “*”. Output is escaped. 663 * 664 * @author Adrian Lang <lang@cosmocode.de> 665 * 666 * @param string $id page id 667 * @return string 668 */ 669function prettyprint_id($id) 670{ 671 if (!$id || $id === ':') { 672 return '*'; 673 } 674 if ((substr($id, -1, 1) === ':')) { 675 $id .= '*'; 676 } 677 return hsc($id); 678} 679 680/** 681 * Encode a UTF-8 filename to use on any filesystem 682 * 683 * Uses the 'fnencode' option to determine encoding 684 * 685 * When the second parameter is true the string will 686 * be encoded only if non ASCII characters are detected - 687 * This makes it safe to run it multiple times on the 688 * same string (default is true) 689 * 690 * @author Andreas Gohr <andi@splitbrain.org> 691 * @see urlencode 692 * 693 * @param string $file file name 694 * @param bool $safe if true, only encoded when non ASCII characters detected 695 * @return string 696 */ 697function utf8_encodeFN($file, $safe = true) 698{ 699 global $conf; 700 if ($conf['fnencode'] == 'utf-8') return $file; 701 702 if ($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#', $file)) { 703 return $file; 704 } 705 706 if ($conf['fnencode'] == 'safe') { 707 return SafeFN::encode($file); 708 } 709 710 $file = urlencode($file); 711 $file = str_replace('%2F', '/', $file); 712 return $file; 713} 714 715/** 716 * Decode a filename back to UTF-8 717 * 718 * Uses the 'fnencode' option to determine encoding 719 * 720 * @author Andreas Gohr <andi@splitbrain.org> 721 * @see urldecode 722 * 723 * @param string $file file name 724 * @return string 725 */ 726function utf8_decodeFN($file) 727{ 728 global $conf; 729 if ($conf['fnencode'] == 'utf-8') return $file; 730 731 if ($conf['fnencode'] == 'safe') { 732 return SafeFN::decode($file); 733 } 734 735 return urldecode($file); 736} 737 738/** 739 * Find a page in the current namespace (determined from $ID) or any 740 * higher namespace that can be accessed by the current user, 741 * this condition can be overriden by an optional parameter. 742 * 743 * Used for sidebars, but can be used other stuff as well 744 * 745 * @todo add event hook 746 * 747 * @param string $page the pagename you're looking for 748 * @param bool $useacl only return pages readable by the current user, false to ignore ACLs 749 * @return false|string the full page id of the found page, false if any 750 */ 751function page_findnearest($page, $useacl = true) 752{ 753 if ((string) $page === '') return false; 754 global $ID; 755 756 $ns = $ID; 757 do { 758 $ns = getNS($ns); 759 $pageid = cleanID("$ns:$page"); 760 if (page_exists($pageid) && (!$useacl || auth_quickaclcheck($pageid) >= AUTH_READ)) { 761 return $pageid; 762 } 763 } while ($ns !== false); 764 765 return false; 766} 767