1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10use dokuwiki\ChangeLog\MediaChangeLog; 11use dokuwiki\ChangeLog\PageChangeLog; 12use dokuwiki\Utils\MediaResolver; 13use dokuwiki\Utils\PageResolver; 14 15/** 16 * Fetch the an ID from request 17 * 18 * Uses either standard $_REQUEST variable or extracts it from 19 * the full request URI when userewrite is set to 2 20 * 21 * For $param='id' $conf['start'] is returned if no id was found. 22 * If the second parameter is true (default) the ID is cleaned. 23 * 24 * @author Andreas Gohr <andi@splitbrain.org> 25 * 26 * @param string $param the $_REQUEST variable name, default 'id' 27 * @param bool $clean if true, ID is cleaned 28 * @return string 29 */ 30function getID($param='id',$clean=true){ 31 /** @var Input $INPUT */ 32 global $INPUT; 33 global $conf; 34 global $ACT; 35 36 $id = $INPUT->str($param); 37 38 //construct page id from request URI 39 if(empty($id) && $conf['userewrite'] == 2){ 40 $request = $INPUT->server->str('REQUEST_URI'); 41 $script = ''; 42 43 //get the script URL 44 if($conf['basedir']){ 45 $relpath = ''; 46 if($param != 'id') { 47 $relpath = 'lib/exe/'; 48 } 49 $script = $conf['basedir'] . $relpath . 50 \dokuwiki\Utf8\PhpString::basename($INPUT->server->str('SCRIPT_FILENAME')); 51 52 }elseif($INPUT->server->str('PATH_INFO')){ 53 $request = $INPUT->server->str('PATH_INFO'); 54 }elseif($INPUT->server->str('SCRIPT_NAME')){ 55 $script = $INPUT->server->str('SCRIPT_NAME'); 56 }elseif($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')){ 57 $script = preg_replace ('/^'.preg_quote($INPUT->server->str('DOCUMENT_ROOT'),'/').'/','', 58 $INPUT->server->str('SCRIPT_FILENAME')); 59 $script = '/'.$script; 60 } 61 62 //clean script and request (fixes a windows problem) 63 $script = preg_replace('/\/\/+/','/',$script); 64 $request = preg_replace('/\/\/+/','/',$request); 65 66 //remove script URL and Querystring to gain the id 67 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 68 $id = preg_replace ('/\?.*/','',$match[1]); 69 } 70 $id = urldecode($id); 71 //strip leading slashes 72 $id = preg_replace('!^/+!','',$id); 73 } 74 75 // Namespace autolinking from URL 76 if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){ 77 if(page_exists($id.$conf['start'])){ 78 // start page inside namespace 79 $id = $id.$conf['start']; 80 }elseif(page_exists($id.noNS(cleanID($id)))){ 81 // page named like the NS inside the NS 82 $id = $id.noNS(cleanID($id)); 83 }elseif(page_exists($id)){ 84 // page like namespace exists 85 $id = substr($id,0,-1); 86 }else{ 87 // fall back to default 88 $id = $id.$conf['start']; 89 } 90 if (isset($ACT) && $ACT === 'show') { 91 $urlParameters = $_GET; 92 if (isset($urlParameters['id'])) { 93 unset($urlParameters['id']); 94 } 95 send_redirect(wl($id, $urlParameters, true, '&')); 96 } 97 } 98 if($clean) $id = cleanID($id); 99 if($id === '' && $param=='id') $id = $conf['start']; 100 101 return $id; 102} 103 104/** 105 * Remove unwanted chars from ID 106 * 107 * Cleans a given ID to only use allowed characters. Accented characters are 108 * converted to unaccented ones 109 * 110 * @author Andreas Gohr <andi@splitbrain.org> 111 * 112 * @param string $raw_id The pageid to clean 113 * @param boolean $ascii Force ASCII 114 * @return string cleaned id 115 */ 116function cleanID($raw_id,$ascii=false){ 117 global $conf; 118 static $sepcharpat = null; 119 120 global $cache_cleanid; 121 $cache = & $cache_cleanid; 122 123 // check if it's already in the memory cache 124 if (!$ascii && isset($cache[(string)$raw_id])) { 125 return $cache[(string)$raw_id]; 126 } 127 128 $sepchar = $conf['sepchar']; 129 if($sepcharpat == null) // build string only once to save clock cycles 130 $sepcharpat = '#\\'.$sepchar.'+#'; 131 132 $id = trim((string)$raw_id); 133 $id = \dokuwiki\Utf8\PhpString::strtolower($id); 134 135 //alternative namespace seperator 136 if($conf['useslash']){ 137 $id = strtr($id,';/','::'); 138 }else{ 139 $id = strtr($id,';/',':'.$sepchar); 140 } 141 142 if($conf['deaccent'] == 2 || $ascii) $id = \dokuwiki\Utf8\Clean::romanize($id); 143 if($conf['deaccent'] || $ascii) $id = \dokuwiki\Utf8\Clean::deaccent($id,-1); 144 145 //remove specials 146 $id = \dokuwiki\Utf8\Clean::stripspecials($id,$sepchar,'\*'); 147 148 if($ascii) $id = \dokuwiki\Utf8\Clean::strip($id); 149 150 //clean up 151 $id = preg_replace($sepcharpat,$sepchar,$id); 152 $id = preg_replace('#:+#',':',$id); 153 $id = trim($id,':._-'); 154 $id = preg_replace('#:[:\._\-]+#',':',$id); 155 $id = preg_replace('#[:\._\-]+:#',':',$id); 156 157 if (!$ascii) $cache[(string)$raw_id] = $id; 158 return($id); 159} 160 161/** 162 * Return namespacepart of a wiki ID 163 * 164 * @author Andreas Gohr <andi@splitbrain.org> 165 * 166 * @param string $id 167 * @return string|false the namespace part or false if the given ID has no namespace (root) 168 */ 169function getNS($id){ 170 $pos = strrpos((string)$id,':'); 171 if($pos!==false){ 172 return substr((string)$id,0,$pos); 173 } 174 return false; 175} 176 177/** 178 * Returns the ID without the namespace 179 * 180 * @author Andreas Gohr <andi@splitbrain.org> 181 * 182 * @param string $id 183 * @return string 184 */ 185function noNS($id) { 186 $pos = strrpos($id, ':'); 187 if ($pos!==false) { 188 return substr($id, $pos+1); 189 } else { 190 return $id; 191 } 192} 193 194/** 195 * Returns the current namespace 196 * 197 * @author Nathan Fritz <fritzn@crown.edu> 198 * 199 * @param string $id 200 * @return string 201 */ 202function curNS($id) { 203 return noNS(getNS($id)); 204} 205 206/** 207 * Returns the ID without the namespace or current namespace for 'start' pages 208 * 209 * @author Nathan Fritz <fritzn@crown.edu> 210 * 211 * @param string $id 212 * @return string 213 */ 214function noNSorNS($id) { 215 global $conf; 216 217 $p = noNS($id); 218 if ($p === $conf['start'] || $p === false || $p === '') { 219 $p = curNS($id); 220 if ($p === false || $p === '') { 221 return $conf['start']; 222 } 223 } 224 return $p; 225} 226 227/** 228 * Creates a XHTML valid linkid from a given headline title 229 * 230 * @param string $title The headline title 231 * @param array|bool $check Existing IDs (title => number) 232 * @return string the title 233 * 234 * @author Andreas Gohr <andi@splitbrain.org> 235 */ 236function sectionID($title,&$check) { 237 $title = str_replace(array(':','.'),'',cleanID($title)); 238 $new = ltrim($title,'0123456789_-'); 239 if(empty($new)){ 240 $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline 241 }else{ 242 $title = $new; 243 } 244 245 if(is_array($check)){ 246 // make sure tiles are unique 247 if (!array_key_exists ($title,$check)) { 248 $check[$title] = 0; 249 } else { 250 $title .= ++ $check[$title]; 251 } 252 } 253 254 return $title; 255} 256 257/** 258 * Wiki page existence check 259 * 260 * parameters as for wikiFN 261 * 262 * @author Chris Smith <chris@jalakai.co.uk> 263 * 264 * @param string $id page id 265 * @param string|int $rev empty or revision timestamp 266 * @param bool $clean flag indicating that $id should be cleaned (see wikiFN as well) 267 * @param bool $date_at 268 * @return bool exists? 269 */ 270function page_exists($id,$rev='',$clean=true, $date_at=false) { 271 if($rev !== '' && $date_at) { 272 $pagelog = new PageChangeLog($id); 273 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 274 if($pagelog_rev !== false) 275 $rev = $pagelog_rev; 276 } 277 return file_exists(wikiFN($id,$rev,$clean)); 278} 279 280/** 281 * Media existence check 282 * 283 * @param string $id page id 284 * @param string|int $rev empty or revision timestamp 285 * @param bool $clean flag indicating that $id should be cleaned (see mediaFN as well) 286 * @param bool $date_at 287 * @return bool exists? 288 */ 289function media_exists($id, $rev = '', $clean = true, $date_at = false) 290{ 291 if ($rev !== '' && $date_at) { 292 $changeLog = new MediaChangeLog($id); 293 $changelog_rev = $changeLog->getLastRevisionAt($rev); 294 if ($changelog_rev !== false) { 295 $rev = $changelog_rev; 296 } 297 } 298 return file_exists(mediaFN($id, $rev, $clean)); 299} 300 301/** 302 * returns the full path to the datafile specified by ID and optional revision 303 * 304 * The filename is URL encoded to protect Unicode chars 305 * 306 * @param $raw_id string id of wikipage 307 * @param $rev int|string page revision, empty string for current 308 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 309 * when $id is guaranteed to have been cleaned already. 310 * @return string full path 311 * 312 * @author Andreas Gohr <andi@splitbrain.org> 313 */ 314function wikiFN($raw_id,$rev='',$clean=true){ 315 global $conf; 316 317 global $cache_wikifn; 318 $cache = & $cache_wikifn; 319 320 $id = $raw_id; 321 322 if ($clean) $id = cleanID($id); 323 $id = str_replace(':','/',$id); 324 325 if (isset($cache[$id]) && isset($cache[$id][$rev])) { 326 return $cache[$id][$rev]; 327 } 328 329 if(empty($rev)){ 330 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 331 }else{ 332 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 333 if($conf['compression']){ 334 //test for extensions here, we want to read both compressions 335 if (file_exists($fn . '.gz')){ 336 $fn .= '.gz'; 337 }else if(file_exists($fn . '.bz2')){ 338 $fn .= '.bz2'; 339 }else{ 340 //file doesnt exist yet, so we take the configured extension 341 $fn .= '.' . $conf['compression']; 342 } 343 } 344 } 345 346 if (!isset($cache[$id])) { $cache[$id] = array(); } 347 $cache[$id][$rev] = $fn; 348 return $fn; 349} 350 351/** 352 * Returns the full path to the file for locking the page while editing. 353 * 354 * @author Ben Coburn <btcoburn@silicodon.net> 355 * 356 * @param string $id page id 357 * @return string full path 358 */ 359function wikiLockFN($id) { 360 global $conf; 361 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 362} 363 364 365/** 366 * returns the full path to the meta file specified by ID and extension 367 * 368 * @author Steven Danz <steven-danz@kc.rr.com> 369 * 370 * @param string $id page id 371 * @param string $ext file extension 372 * @return string full path 373 */ 374function metaFN($id,$ext){ 375 global $conf; 376 $id = cleanID($id); 377 $id = str_replace(':','/',$id); 378 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 379 return $fn; 380} 381 382/** 383 * returns the full path to the media's meta file specified by ID and extension 384 * 385 * @author Kate Arzamastseva <pshns@ukr.net> 386 * 387 * @param string $id media id 388 * @param string $ext extension of media 389 * @return string 390 */ 391function mediaMetaFN($id,$ext){ 392 global $conf; 393 $id = cleanID($id); 394 $id = str_replace(':','/',$id); 395 $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext; 396 return $fn; 397} 398 399/** 400 * returns an array of full paths to all metafiles of a given ID 401 * 402 * @author Esther Brunner <esther@kaffeehaus.ch> 403 * @author Michael Hamann <michael@content-space.de> 404 * 405 * @param string $id page id 406 * @return array 407 */ 408function metaFiles($id){ 409 $basename = metaFN($id, ''); 410 $files = glob($basename.'.*', GLOB_MARK); 411 // filter files like foo.bar.meta when $id == 'foo' 412 return $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array(); 413} 414 415/** 416 * returns the full path to the mediafile specified by ID 417 * 418 * The filename is URL encoded to protect Unicode chars 419 * 420 * @author Andreas Gohr <andi@splitbrain.org> 421 * @author Kate Arzamastseva <pshns@ukr.net> 422 * 423 * @param string $id media id 424 * @param string|int $rev empty string or revision timestamp 425 * @param bool $clean 426 * 427 * @return string full path 428 */ 429function mediaFN($id, $rev='', $clean=true){ 430 global $conf; 431 if ($clean) $id = cleanID($id); 432 $id = str_replace(':','/',$id); 433 if(empty($rev)){ 434 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 435 }else{ 436 $ext = mimetype($id); 437 $name = substr($id,0, -1*strlen($ext[0])-1); 438 $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]); 439 } 440 return $fn; 441} 442 443/** 444 * Returns the full filepath to a localized file if local 445 * version isn't found the english one is returned 446 * 447 * @param string $id The id of the local file 448 * @param string $ext The file extension (usually txt) 449 * @return string full filepath to localized file 450 * 451 * @author Andreas Gohr <andi@splitbrain.org> 452 */ 453function localeFN($id,$ext='txt'){ 454 global $conf; 455 $file = DOKU_CONF.'lang/'.$conf['lang'].'/'.$id.'.'.$ext; 456 if(!file_exists($file)){ 457 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext; 458 if(!file_exists($file)){ 459 //fall back to english 460 $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext; 461 } 462 } 463 return $file; 464} 465 466/** 467 * Resolve relative paths in IDs 468 * 469 * Do not call directly use resolve_mediaid or resolve_pageid 470 * instead 471 * 472 * Partyly based on a cleanPath function found at 473 * http://php.net/manual/en/function.realpath.php#57016 474 * 475 * @deprecated 2020-09-30 476 * @param string $ns namespace which is context of id 477 * @param string $id relative id 478 * @param bool $clean flag indicating that id should be cleaned 479 * @return string 480 */ 481function resolve_id($ns,$id,$clean=true){ 482 global $conf; 483 dbg_deprecated(\dokuwiki\Utils\Resolver::class.' and its children'); 484 485 // some pre cleaning for useslash: 486 if($conf['useslash']) $id = str_replace('/',':',$id); 487 488 // if the id starts with a dot we need to handle the 489 // relative stuff 490 if($id && $id[0] == '.'){ 491 // normalize initial dots without a colon 492 $id = preg_replace('/^((\.+:)*)(\.+)(?=[^:\.])/','\1\3:',$id); 493 // prepend the current namespace 494 $id = $ns.':'.$id; 495 496 // cleanup relatives 497 $result = array(); 498 $pathA = explode(':', $id); 499 if (!$pathA[0]) $result[] = ''; 500 foreach ($pathA AS $key => $dir) { 501 if ($dir == '..') { 502 if (end($result) == '..') { 503 $result[] = '..'; 504 } elseif (!array_pop($result)) { 505 $result[] = '..'; 506 } 507 } elseif ($dir && $dir != '.') { 508 $result[] = $dir; 509 } 510 } 511 if (!end($pathA)) $result[] = ''; 512 $id = implode(':', $result); 513 }elseif($ns !== false && strpos($id,':') === false){ 514 //if link contains no namespace. add current namespace (if any) 515 $id = $ns.':'.$id; 516 } 517 518 if($clean) $id = cleanID($id); 519 return $id; 520} 521 522/** 523 * Returns a full media id 524 * 525 * @param string $ns namespace which is context of id 526 * @param string &$media (reference) relative media id, updated to resolved id 527 * @param bool &$exists (reference) updated with existance of media 528 * @param int|string $rev 529 * @param bool $date_at 530 * @deprecated 2020-09-30 531 */ 532function resolve_mediaid($ns,&$media,&$exists,$rev='',$date_at=false){ 533 dbg_deprecated(MediaResolver::class); 534 $resolver = new MediaResolver("$ns:deprecated"); 535 $media = $resolver->resolveId($media, $rev, $date_at); 536 $exists = media_exists($media, $rev, false, $date_at); 537} 538 539/** 540 * Returns a full page id 541 * 542 * @deprecated 2020-09-30 543 * @param string $ns namespace which is context of id 544 * @param string &$page (reference) relative page id, updated to resolved id 545 * @param bool &$exists (reference) updated with existance of media 546 * @param string $rev 547 * @param bool $date_at 548 */ 549function resolve_pageid($ns,&$page,&$exists,$rev='',$date_at=false ) 550{ 551 dbg_deprecated(PageResolver::class); 552 $resolver = new PageResolver("$ns:deprecated"); 553 $page = $resolver->resolveId($page, $rev, $date_at); 554 $exists = page_exists($page, $rev, false, $date_at); 555} 556 557/** 558 * Returns the name of a cachefile from given data 559 * 560 * The needed directory is created by this function! 561 * 562 * @author Andreas Gohr <andi@splitbrain.org> 563 * 564 * @param string $data This data is used to create a unique md5 name 565 * @param string $ext This is appended to the filename if given 566 * @return string The filename of the cachefile 567 */ 568function getCacheName($data,$ext=''){ 569 global $conf; 570 $md5 = md5($data); 571 $file = $conf['cachedir'].'/'.$md5[0].'/'.$md5.$ext; 572 io_makeFileDir($file); 573 return $file; 574} 575 576/** 577 * Checks a pageid against $conf['hidepages'] 578 * 579 * @author Andreas Gohr <gohr@cosmocode.de> 580 * 581 * @param string $id page id 582 * @return bool 583 */ 584function isHiddenPage($id){ 585 $data = array( 586 'id' => $id, 587 'hidden' => false 588 ); 589 \dokuwiki\Extension\Event::createAndTrigger('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage'); 590 return $data['hidden']; 591} 592 593/** 594 * callback checks if page is hidden 595 * 596 * @param array $data event data - see isHiddenPage() 597 */ 598function _isHiddenPage(&$data) { 599 global $conf; 600 global $ACT; 601 602 if ($data['hidden']) return; 603 if(empty($conf['hidepages'])) return; 604 if($ACT == 'admin') return; 605 606 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$data['id'])){ 607 $data['hidden'] = true; 608 } 609} 610 611/** 612 * Reverse of isHiddenPage 613 * 614 * @author Andreas Gohr <gohr@cosmocode.de> 615 * 616 * @param string $id page id 617 * @return bool 618 */ 619function isVisiblePage($id){ 620 return !isHiddenPage($id); 621} 622 623/** 624 * Format an id for output to a user 625 * 626 * Namespaces are denoted by a trailing “:*”. The root namespace is 627 * “*”. Output is escaped. 628 * 629 * @author Adrian Lang <lang@cosmocode.de> 630 * 631 * @param string $id page id 632 * @return string 633 */ 634function prettyprint_id($id) { 635 if (!$id || $id === ':') { 636 return '*'; 637 } 638 if ((substr($id, -1, 1) === ':')) { 639 $id .= '*'; 640 } 641 return hsc($id); 642} 643 644/** 645 * Encode a UTF-8 filename to use on any filesystem 646 * 647 * Uses the 'fnencode' option to determine encoding 648 * 649 * When the second parameter is true the string will 650 * be encoded only if non ASCII characters are detected - 651 * This makes it safe to run it multiple times on the 652 * same string (default is true) 653 * 654 * @author Andreas Gohr <andi@splitbrain.org> 655 * @see urlencode 656 * 657 * @param string $file file name 658 * @param bool $safe if true, only encoded when non ASCII characters detected 659 * @return string 660 */ 661function utf8_encodeFN($file,$safe=true){ 662 global $conf; 663 if($conf['fnencode'] == 'utf-8') return $file; 664 665 if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){ 666 return $file; 667 } 668 669 if($conf['fnencode'] == 'safe'){ 670 return SafeFN::encode($file); 671 } 672 673 $file = urlencode($file); 674 $file = str_replace('%2F','/',$file); 675 return $file; 676} 677 678/** 679 * Decode a filename back to UTF-8 680 * 681 * Uses the 'fnencode' option to determine encoding 682 * 683 * @author Andreas Gohr <andi@splitbrain.org> 684 * @see urldecode 685 * 686 * @param string $file file name 687 * @return string 688 */ 689function utf8_decodeFN($file){ 690 global $conf; 691 if($conf['fnencode'] == 'utf-8') return $file; 692 693 if($conf['fnencode'] == 'safe'){ 694 return SafeFN::decode($file); 695 } 696 697 return urldecode($file); 698} 699 700/** 701 * Find a page in the current namespace (determined from $ID) or any 702 * higher namespace that can be accessed by the current user, 703 * this condition can be overriden by an optional parameter. 704 * 705 * Used for sidebars, but can be used other stuff as well 706 * 707 * @todo add event hook 708 * 709 * @param string $page the pagename you're looking for 710 * @param bool $useacl only return pages readable by the current user, false to ignore ACLs 711 * @return false|string the full page id of the found page, false if any 712 */ 713function page_findnearest($page, $useacl = true){ 714 if ((string) $page === '') return false; 715 global $ID; 716 717 $ns = $ID; 718 do { 719 $ns = getNS($ns); 720 $pageid = cleanID("$ns:$page"); 721 if(page_exists($pageid) && (!$useacl || auth_quickaclcheck($pageid) >= AUTH_READ)){ 722 return $pageid; 723 } 724 } while($ns !== false); 725 726 return false; 727} 728