1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10use dokuwiki\ChangeLog\MediaChangeLog; 11use dokuwiki\ChangeLog\PageChangeLog; 12 13/** 14 * Fetch the an ID from request 15 * 16 * Uses either standard $_REQUEST variable or extracts it from 17 * the full request URI when userewrite is set to 2 18 * 19 * For $param='id' $conf['start'] is returned if no id was found. 20 * If the second parameter is true (default) the ID is cleaned. 21 * 22 * @author Andreas Gohr <andi@splitbrain.org> 23 * 24 * @param string $param the $_REQUEST variable name, default 'id' 25 * @param bool $clean if true, ID is cleaned 26 * @return string 27 */ 28function getID($param='id',$clean=true){ 29 /** @var Input $INPUT */ 30 global $INPUT; 31 global $conf; 32 global $ACT; 33 34 $id = $INPUT->str($param); 35 36 //construct page id from request URI 37 if(empty($id) && $conf['userewrite'] == 2){ 38 $request = $INPUT->server->str('REQUEST_URI'); 39 $script = ''; 40 41 //get the script URL 42 if($conf['basedir']){ 43 $relpath = ''; 44 if($param != 'id') { 45 $relpath = 'lib/exe/'; 46 } 47 $script = $conf['basedir'].$relpath.utf8_basename($INPUT->server->str('SCRIPT_FILENAME')); 48 49 }elseif($INPUT->server->str('PATH_INFO')){ 50 $request = $INPUT->server->str('PATH_INFO'); 51 }elseif($INPUT->server->str('SCRIPT_NAME')){ 52 $script = $INPUT->server->str('SCRIPT_NAME'); 53 }elseif($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')){ 54 $script = preg_replace ('/^'.preg_quote($INPUT->server->str('DOCUMENT_ROOT'),'/').'/','', 55 $INPUT->server->str('SCRIPT_FILENAME')); 56 $script = '/'.$script; 57 } 58 59 //clean script and request (fixes a windows problem) 60 $script = preg_replace('/\/\/+/','/',$script); 61 $request = preg_replace('/\/\/+/','/',$request); 62 63 //remove script URL and Querystring to gain the id 64 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 65 $id = preg_replace ('/\?.*/','',$match[1]); 66 } 67 $id = urldecode($id); 68 //strip leading slashes 69 $id = preg_replace('!^/+!','',$id); 70 } 71 72 // Namespace autolinking from URL 73 if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){ 74 if(page_exists($id.$conf['start'])){ 75 // start page inside namespace 76 $id = $id.$conf['start']; 77 }elseif(page_exists($id.noNS(cleanID($id)))){ 78 // page named like the NS inside the NS 79 $id = $id.noNS(cleanID($id)); 80 }elseif(page_exists($id)){ 81 // page like namespace exists 82 $id = substr($id,0,-1); 83 }else{ 84 // fall back to default 85 $id = $id.$conf['start']; 86 } 87 if (isset($ACT) && $ACT === 'show') { 88 $urlParameters = $_GET; 89 if (isset($urlParameters['id'])) { 90 unset($urlParameters['id']); 91 } 92 send_redirect(wl($id, $urlParameters, true, '&')); 93 } 94 } 95 if($clean) $id = cleanID($id); 96 if($id === '' && $param=='id') $id = $conf['start']; 97 98 return $id; 99} 100 101/** 102 * Remove unwanted chars from ID 103 * 104 * Cleans a given ID to only use allowed characters. Accented characters are 105 * converted to unaccented ones 106 * 107 * @author Andreas Gohr <andi@splitbrain.org> 108 * 109 * @param string $raw_id The pageid to clean 110 * @param boolean $ascii Force ASCII 111 * @return string cleaned id 112 */ 113function cleanID($raw_id,$ascii=false){ 114 global $conf; 115 static $sepcharpat = null; 116 117 global $cache_cleanid; 118 $cache = & $cache_cleanid; 119 120 // check if it's already in the memory cache 121 if (!$ascii && isset($cache[(string)$raw_id])) { 122 return $cache[(string)$raw_id]; 123 } 124 125 $sepchar = $conf['sepchar']; 126 if($sepcharpat == null) // build string only once to save clock cycles 127 $sepcharpat = '#\\'.$sepchar.'+#'; 128 129 $id = trim((string)$raw_id); 130 $id = utf8_strtolower($id); 131 132 //alternative namespace seperator 133 if($conf['useslash']){ 134 $id = strtr($id,';/','::'); 135 }else{ 136 $id = strtr($id,';/',':'.$sepchar); 137 } 138 139 if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id); 140 if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1); 141 142 //remove specials 143 $id = utf8_stripspecials($id,$sepchar,'\*'); 144 145 if($ascii) $id = utf8_strip($id); 146 147 //clean up 148 $id = preg_replace($sepcharpat,$sepchar,$id); 149 $id = preg_replace('#:+#',':',$id); 150 $id = trim($id,':._-'); 151 $id = preg_replace('#:[:\._\-]+#',':',$id); 152 $id = preg_replace('#[:\._\-]+:#',':',$id); 153 154 if (!$ascii) $cache[(string)$raw_id] = $id; 155 return($id); 156} 157 158/** 159 * Return namespacepart of a wiki ID 160 * 161 * @author Andreas Gohr <andi@splitbrain.org> 162 * 163 * @param string $id 164 * @return string|false the namespace part or false if the given ID has no namespace (root) 165 */ 166function getNS($id){ 167 $pos = strrpos((string)$id,':'); 168 if($pos!==false){ 169 return substr((string)$id,0,$pos); 170 } 171 return false; 172} 173 174/** 175 * Returns the ID without the namespace 176 * 177 * @author Andreas Gohr <andi@splitbrain.org> 178 * 179 * @param string $id 180 * @return string 181 */ 182function noNS($id) { 183 $pos = strrpos($id, ':'); 184 if ($pos!==false) { 185 return substr($id, $pos+1); 186 } else { 187 return $id; 188 } 189} 190 191/** 192 * Returns the current namespace 193 * 194 * @author Nathan Fritz <fritzn@crown.edu> 195 * 196 * @param string $id 197 * @return string 198 */ 199function curNS($id) { 200 return noNS(getNS($id)); 201} 202 203/** 204 * Returns the ID without the namespace or current namespace for 'start' pages 205 * 206 * @author Nathan Fritz <fritzn@crown.edu> 207 * 208 * @param string $id 209 * @return string 210 */ 211function noNSorNS($id) { 212 global $conf; 213 214 $p = noNS($id); 215 if ($p == $conf['start'] || $p == false) { 216 $p = curNS($id); 217 if ($p == false) { 218 return $conf['start']; 219 } 220 } 221 return $p; 222} 223 224/** 225 * Creates a XHTML valid linkid from a given headline title 226 * 227 * @param string $title The headline title 228 * @param array|bool $check Existing IDs (title => number) 229 * @return string the title 230 * 231 * @author Andreas Gohr <andi@splitbrain.org> 232 */ 233function sectionID($title,&$check) { 234 $title = str_replace(array(':','.'),'',cleanID($title)); 235 $new = ltrim($title,'0123456789_-'); 236 if(empty($new)){ 237 $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline 238 }else{ 239 $title = $new; 240 } 241 242 if(is_array($check)){ 243 // make sure tiles are unique 244 if (!array_key_exists ($title,$check)) { 245 $check[$title] = 0; 246 } else { 247 $title .= ++ $check[$title]; 248 } 249 } 250 251 return $title; 252} 253 254/** 255 * Wiki page existence check 256 * 257 * parameters as for wikiFN 258 * 259 * @author Chris Smith <chris@jalakai.co.uk> 260 * 261 * @param string $id page id 262 * @param string|int $rev empty or revision timestamp 263 * @param bool $clean flag indicating that $id should be cleaned (see wikiFN as well) 264 * @param bool $date_at 265 * @return bool exists? 266 */ 267function page_exists($id,$rev='',$clean=true, $date_at=false) { 268 if($rev !== '' && $date_at) { 269 $pagelog = new PageChangeLog($id); 270 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 271 if($pagelog_rev !== false) 272 $rev = $pagelog_rev; 273 } 274 return file_exists(wikiFN($id,$rev,$clean)); 275} 276 277/** 278 * returns the full path to the datafile specified by ID and optional revision 279 * 280 * The filename is URL encoded to protect Unicode chars 281 * 282 * @param $raw_id string id of wikipage 283 * @param $rev int|string page revision, empty string for current 284 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 285 * when $id is guaranteed to have been cleaned already. 286 * @return string full path 287 * 288 * @author Andreas Gohr <andi@splitbrain.org> 289 */ 290function wikiFN($raw_id,$rev='',$clean=true){ 291 global $conf; 292 293 global $cache_wikifn; 294 $cache = & $cache_wikifn; 295 296 $id = $raw_id; 297 298 if ($clean) $id = cleanID($id); 299 $id = str_replace(':','/',$id); 300 301 if (isset($cache[$id]) && isset($cache[$id][$rev])) { 302 return $cache[$id][$rev]; 303 } 304 305 if(empty($rev)){ 306 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 307 }else{ 308 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 309 if($conf['compression']){ 310 //test for extensions here, we want to read both compressions 311 if (file_exists($fn . '.gz')){ 312 $fn .= '.gz'; 313 }else if(file_exists($fn . '.bz2')){ 314 $fn .= '.bz2'; 315 }else{ 316 //file doesnt exist yet, so we take the configured extension 317 $fn .= '.' . $conf['compression']; 318 } 319 } 320 } 321 322 if (!isset($cache[$id])) { $cache[$id] = array(); } 323 $cache[$id][$rev] = $fn; 324 return $fn; 325} 326 327/** 328 * Returns the full path to the file for locking the page while editing. 329 * 330 * @author Ben Coburn <btcoburn@silicodon.net> 331 * 332 * @param string $id page id 333 * @return string full path 334 */ 335function wikiLockFN($id) { 336 global $conf; 337 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 338} 339 340 341/** 342 * returns the full path to the meta file specified by ID and extension 343 * 344 * @author Steven Danz <steven-danz@kc.rr.com> 345 * 346 * @param string $id page id 347 * @param string $ext file extension 348 * @return string full path 349 */ 350function metaFN($id,$ext){ 351 global $conf; 352 $id = cleanID($id); 353 $id = str_replace(':','/',$id); 354 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 355 return $fn; 356} 357 358/** 359 * returns the full path to the media's meta file specified by ID and extension 360 * 361 * @author Kate Arzamastseva <pshns@ukr.net> 362 * 363 * @param string $id media id 364 * @param string $ext extension of media 365 * @return string 366 */ 367function mediaMetaFN($id,$ext){ 368 global $conf; 369 $id = cleanID($id); 370 $id = str_replace(':','/',$id); 371 $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext; 372 return $fn; 373} 374 375/** 376 * returns an array of full paths to all metafiles of a given ID 377 * 378 * @author Esther Brunner <esther@kaffeehaus.ch> 379 * @author Michael Hamann <michael@content-space.de> 380 * 381 * @param string $id page id 382 * @return array 383 */ 384function metaFiles($id){ 385 $basename = metaFN($id, ''); 386 $files = glob($basename.'.*', GLOB_MARK); 387 // filter files like foo.bar.meta when $id == 'foo' 388 return $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array(); 389} 390 391/** 392 * returns the full path to the mediafile specified by ID 393 * 394 * The filename is URL encoded to protect Unicode chars 395 * 396 * @author Andreas Gohr <andi@splitbrain.org> 397 * @author Kate Arzamastseva <pshns@ukr.net> 398 * 399 * @param string $id media id 400 * @param string|int $rev empty string or revision timestamp 401 * @param bool $clean 402 * 403 * @return string full path 404 */ 405function mediaFN($id, $rev='', $clean=true){ 406 global $conf; 407 if ($clean) $id = cleanID($id); 408 $id = str_replace(':','/',$id); 409 if(empty($rev)){ 410 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 411 }else{ 412 $ext = mimetype($id); 413 $name = substr($id,0, -1*strlen($ext[0])-1); 414 $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]); 415 } 416 return $fn; 417} 418 419/** 420 * Returns the full filepath to a localized file if local 421 * version isn't found the english one is returned 422 * 423 * @param string $id The id of the local file 424 * @param string $ext The file extension (usually txt) 425 * @return string full filepath to localized file 426 * 427 * @author Andreas Gohr <andi@splitbrain.org> 428 */ 429function localeFN($id,$ext='txt'){ 430 global $conf; 431 $file = DOKU_CONF.'lang/'.$conf['lang'].'/'.$id.'.'.$ext; 432 if(!file_exists($file)){ 433 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext; 434 if(!file_exists($file)){ 435 //fall back to english 436 $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext; 437 } 438 } 439 return $file; 440} 441 442/** 443 * Resolve relative paths in IDs 444 * 445 * Do not call directly use resolve_mediaid or resolve_pageid 446 * instead 447 * 448 * Partyly based on a cleanPath function found at 449 * http://php.net/manual/en/function.realpath.php#57016 450 * 451 * @author <bart at mediawave dot nl> 452 * 453 * @param string $ns namespace which is context of id 454 * @param string $id relative id 455 * @param bool $clean flag indicating that id should be cleaned 456 * @return string 457 */ 458function resolve_id($ns,$id,$clean=true){ 459 global $conf; 460 461 // some pre cleaning for useslash: 462 if($conf['useslash']) $id = str_replace('/',':',$id); 463 464 // if the id starts with a dot we need to handle the 465 // relative stuff 466 if($id && $id{0} == '.'){ 467 // normalize initial dots without a colon 468 $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id); 469 // prepend the current namespace 470 $id = $ns.':'.$id; 471 472 // cleanup relatives 473 $result = array(); 474 $pathA = explode(':', $id); 475 if (!$pathA[0]) $result[] = ''; 476 foreach ($pathA AS $key => $dir) { 477 if ($dir == '..') { 478 if (end($result) == '..') { 479 $result[] = '..'; 480 } elseif (!array_pop($result)) { 481 $result[] = '..'; 482 } 483 } elseif ($dir && $dir != '.') { 484 $result[] = $dir; 485 } 486 } 487 if (!end($pathA)) $result[] = ''; 488 $id = implode(':', $result); 489 }elseif($ns !== false && strpos($id,':') === false){ 490 //if link contains no namespace. add current namespace (if any) 491 $id = $ns.':'.$id; 492 } 493 494 if($clean) $id = cleanID($id); 495 return $id; 496} 497 498/** 499 * Returns a full media id 500 * 501 * @author Andreas Gohr <andi@splitbrain.org> 502 * 503 * @param string $ns namespace which is context of id 504 * @param string &$page (reference) relative media id, updated to resolved id 505 * @param bool &$exists (reference) updated with existance of media 506 * @param int|string $rev 507 * @param bool $date_at 508 */ 509function resolve_mediaid($ns,&$page,&$exists,$rev='',$date_at=false){ 510 $page = resolve_id($ns,$page); 511 if($rev !== '' && $date_at){ 512 $medialog = new MediaChangeLog($page); 513 $medialog_rev = $medialog->getLastRevisionAt($rev); 514 if($medialog_rev !== false) { 515 $rev = $medialog_rev; 516 } 517 } 518 519 $file = mediaFN($page,$rev); 520 $exists = file_exists($file); 521} 522 523/** 524 * Returns a full page id 525 * 526 * @author Andreas Gohr <andi@splitbrain.org> 527 * 528 * @param string $ns namespace which is context of id 529 * @param string &$page (reference) relative page id, updated to resolved id 530 * @param bool &$exists (reference) updated with existance of media 531 * @param string $rev 532 * @param bool $date_at 533 */ 534function resolve_pageid($ns,&$page,&$exists,$rev='',$date_at=false ){ 535 global $conf; 536 global $ID; 537 $exists = false; 538 539 //empty address should point to current page 540 if ($page === "") { 541 $page = $ID; 542 } 543 544 //keep hashlink if exists then clean both parts 545 if (strpos($page,'#')) { 546 list($page,$hash) = explode('#',$page,2); 547 } else { 548 $hash = ''; 549 } 550 $hash = cleanID($hash); 551 $page = resolve_id($ns,$page,false); // resolve but don't clean, yet 552 553 // get filename (calls clean itself) 554 if($rev !== '' && $date_at) { 555 $pagelog = new PageChangeLog($page); 556 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 557 if($pagelog_rev !== false)//something found 558 $rev = $pagelog_rev; 559 } 560 $file = wikiFN($page,$rev); 561 562 // if ends with colon or slash we have a namespace link 563 if(in_array(substr($page,-1), array(':', ';')) || 564 ($conf['useslash'] && substr($page,-1) == '/')){ 565 if(page_exists($page.$conf['start'],$rev,true,$date_at)){ 566 // start page inside namespace 567 $page = $page.$conf['start']; 568 $exists = true; 569 }elseif(page_exists($page.noNS(cleanID($page)),$rev,true,$date_at)){ 570 // page named like the NS inside the NS 571 $page = $page.noNS(cleanID($page)); 572 $exists = true; 573 }elseif(page_exists($page,$rev,true,$date_at)){ 574 // page like namespace exists 575 $page = $page; 576 $exists = true; 577 }else{ 578 // fall back to default 579 $page = $page.$conf['start']; 580 } 581 }else{ 582 //check alternative plural/nonplural form 583 if(!file_exists($file)){ 584 if( $conf['autoplural'] ){ 585 if(substr($page,-1) == 's'){ 586 $try = substr($page,0,-1); 587 }else{ 588 $try = $page.'s'; 589 } 590 if(page_exists($try,$rev,true,$date_at)){ 591 $page = $try; 592 $exists = true; 593 } 594 } 595 }else{ 596 $exists = true; 597 } 598 } 599 600 // now make sure we have a clean page 601 $page = cleanID($page); 602 603 //add hash if any 604 if(!empty($hash)) $page .= '#'.$hash; 605} 606 607/** 608 * Returns the name of a cachefile from given data 609 * 610 * The needed directory is created by this function! 611 * 612 * @author Andreas Gohr <andi@splitbrain.org> 613 * 614 * @param string $data This data is used to create a unique md5 name 615 * @param string $ext This is appended to the filename if given 616 * @return string The filename of the cachefile 617 */ 618function getCacheName($data,$ext=''){ 619 global $conf; 620 $md5 = md5($data); 621 $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext; 622 io_makeFileDir($file); 623 return $file; 624} 625 626/** 627 * Checks a pageid against $conf['hidepages'] 628 * 629 * @author Andreas Gohr <gohr@cosmocode.de> 630 * 631 * @param string $id page id 632 * @return bool 633 */ 634function isHiddenPage($id){ 635 $data = array( 636 'id' => $id, 637 'hidden' => false 638 ); 639 trigger_event('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage'); 640 return $data['hidden']; 641} 642 643/** 644 * callback checks if page is hidden 645 * 646 * @param array $data event data - see isHiddenPage() 647 */ 648function _isHiddenPage(&$data) { 649 global $conf; 650 global $ACT; 651 652 if ($data['hidden']) return; 653 if(empty($conf['hidepages'])) return; 654 if($ACT == 'admin') return; 655 656 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$data['id'])){ 657 $data['hidden'] = true; 658 } 659} 660 661/** 662 * Reverse of isHiddenPage 663 * 664 * @author Andreas Gohr <gohr@cosmocode.de> 665 * 666 * @param string $id page id 667 * @return bool 668 */ 669function isVisiblePage($id){ 670 return !isHiddenPage($id); 671} 672 673/** 674 * Format an id for output to a user 675 * 676 * Namespaces are denoted by a trailing “:*”. The root namespace is 677 * “*”. Output is escaped. 678 * 679 * @author Adrian Lang <lang@cosmocode.de> 680 * 681 * @param string $id page id 682 * @return string 683 */ 684function prettyprint_id($id) { 685 if (!$id || $id === ':') { 686 return '*'; 687 } 688 if ((substr($id, -1, 1) === ':')) { 689 $id .= '*'; 690 } 691 return hsc($id); 692} 693 694/** 695 * Encode a UTF-8 filename to use on any filesystem 696 * 697 * Uses the 'fnencode' option to determine encoding 698 * 699 * When the second parameter is true the string will 700 * be encoded only if non ASCII characters are detected - 701 * This makes it safe to run it multiple times on the 702 * same string (default is true) 703 * 704 * @author Andreas Gohr <andi@splitbrain.org> 705 * @see urlencode 706 * 707 * @param string $file file name 708 * @param bool $safe if true, only encoded when non ASCII characters detected 709 * @return string 710 */ 711function utf8_encodeFN($file,$safe=true){ 712 global $conf; 713 if($conf['fnencode'] == 'utf-8') return $file; 714 715 if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){ 716 return $file; 717 } 718 719 if($conf['fnencode'] == 'safe'){ 720 return SafeFN::encode($file); 721 } 722 723 $file = urlencode($file); 724 $file = str_replace('%2F','/',$file); 725 return $file; 726} 727 728/** 729 * Decode a filename back to UTF-8 730 * 731 * Uses the 'fnencode' option to determine encoding 732 * 733 * @author Andreas Gohr <andi@splitbrain.org> 734 * @see urldecode 735 * 736 * @param string $file file name 737 * @return string 738 */ 739function utf8_decodeFN($file){ 740 global $conf; 741 if($conf['fnencode'] == 'utf-8') return $file; 742 743 if($conf['fnencode'] == 'safe'){ 744 return SafeFN::decode($file); 745 } 746 747 return urldecode($file); 748} 749 750/** 751 * Find a page in the current namespace (determined from $ID) or any 752 * higher namespace that can be accessed by the current user, 753 * this condition can be overriden by an optional parameter. 754 * 755 * Used for sidebars, but can be used other stuff as well 756 * 757 * @todo add event hook 758 * 759 * @param string $page the pagename you're looking for 760 * @param bool $useacl only return pages readable by the current user, false to ignore ACLs 761 * @return false|string the full page id of the found page, false if any 762 */ 763function page_findnearest($page, $useacl = true){ 764 if ((string) $page === '') return false; 765 global $ID; 766 767 $ns = $ID; 768 do { 769 $ns = getNS($ns); 770 $pageid = cleanID("$ns:$page"); 771 if(page_exists($pageid) && (!$useacl || auth_quickaclcheck($pageid) >= AUTH_READ)){ 772 return $pageid; 773 } 774 } while($ns !== false); 775 776 return false; 777} 778