1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10use dokuwiki\ChangeLog\MediaChangeLog; 11use dokuwiki\ChangeLog\PageChangeLog; 12 13/** 14 * Fetch the an ID from request 15 * 16 * Uses either standard $_REQUEST variable or extracts it from 17 * the full request URI when userewrite is set to 2 18 * 19 * For $param='id' $conf['start'] is returned if no id was found. 20 * If the second parameter is true (default) the ID is cleaned. 21 * 22 * @author Andreas Gohr <andi@splitbrain.org> 23 * 24 * @param string $param the $_REQUEST variable name, default 'id' 25 * @param bool $clean if true, ID is cleaned 26 * @return string 27 */ 28function getID($param='id',$clean=true){ 29 /** @var Input $INPUT */ 30 global $INPUT; 31 global $conf; 32 global $ACT; 33 34 $id = $INPUT->str($param); 35 36 //construct page id from request URI 37 if(empty($id) && $conf['userewrite'] == 2){ 38 $request = $INPUT->server->str('REQUEST_URI'); 39 $script = ''; 40 41 //get the script URL 42 if($conf['basedir']){ 43 $relpath = ''; 44 if($param != 'id') { 45 $relpath = 'lib/exe/'; 46 } 47 $script = $conf['basedir'] . $relpath . 48 \dokuwiki\Utf8\PhpString::basename($INPUT->server->str('SCRIPT_FILENAME')); 49 50 }elseif($INPUT->server->str('PATH_INFO')){ 51 $request = $INPUT->server->str('PATH_INFO'); 52 }elseif($INPUT->server->str('SCRIPT_NAME')){ 53 $script = $INPUT->server->str('SCRIPT_NAME'); 54 }elseif($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')){ 55 $script = preg_replace ('/^'.preg_quote($INPUT->server->str('DOCUMENT_ROOT'),'/').'/','', 56 $INPUT->server->str('SCRIPT_FILENAME')); 57 $script = '/'.$script; 58 } 59 60 //clean script and request (fixes a windows problem) 61 $script = preg_replace('/\/\/+/','/',$script); 62 $request = preg_replace('/\/\/+/','/',$request); 63 64 //remove script URL and Querystring to gain the id 65 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 66 $id = preg_replace ('/\?.*/','',$match[1]); 67 } 68 $id = urldecode($id); 69 //strip leading slashes 70 $id = preg_replace('!^/+!','',$id); 71 } 72 73 // Namespace autolinking from URL 74 if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){ 75 if(page_exists($id.$conf['start'])){ 76 // start page inside namespace 77 $id = $id.$conf['start']; 78 }elseif(page_exists($id.noNS(cleanID($id)))){ 79 // page named like the NS inside the NS 80 $id = $id.noNS(cleanID($id)); 81 }elseif(page_exists($id)){ 82 // page like namespace exists 83 $id = substr($id,0,-1); 84 }else{ 85 // fall back to default 86 $id = $id.$conf['start']; 87 } 88 if (isset($ACT) && $ACT === 'show') { 89 $urlParameters = $_GET; 90 if (isset($urlParameters['id'])) { 91 unset($urlParameters['id']); 92 } 93 send_redirect(wl($id, $urlParameters, true, '&')); 94 } 95 } 96 if($clean) $id = cleanID($id); 97 if($id === '' && $param=='id') $id = $conf['start']; 98 99 return $id; 100} 101 102/** 103 * Remove unwanted chars from ID 104 * 105 * Cleans a given ID to only use allowed characters. Accented characters are 106 * converted to unaccented ones 107 * 108 * @author Andreas Gohr <andi@splitbrain.org> 109 * 110 * @param string $raw_id The pageid to clean 111 * @param boolean $ascii Force ASCII 112 * @return string cleaned id 113 */ 114function cleanID($raw_id,$ascii=false){ 115 global $conf; 116 static $sepcharpat = null; 117 118 global $cache_cleanid; 119 $cache = & $cache_cleanid; 120 121 // check if it's already in the memory cache 122 if (!$ascii && isset($cache[(string)$raw_id])) { 123 return $cache[(string)$raw_id]; 124 } 125 126 $sepchar = $conf['sepchar']; 127 if($sepcharpat == null) // build string only once to save clock cycles 128 $sepcharpat = '#\\'.$sepchar.'+#'; 129 130 $id = trim((string)$raw_id); 131 $id = \dokuwiki\Utf8\PhpString::strtolower($id); 132 133 //alternative namespace seperator 134 if($conf['useslash']){ 135 $id = strtr($id,';/','::'); 136 }else{ 137 $id = strtr($id,';/',':'.$sepchar); 138 } 139 140 if($conf['deaccent'] == 2 || $ascii) $id = \dokuwiki\Utf8\Clean::romanize($id); 141 if($conf['deaccent'] || $ascii) $id = \dokuwiki\Utf8\Clean::deaccent($id,-1); 142 143 //remove specials 144 $id = \dokuwiki\Utf8\Clean::stripspecials($id,$sepchar,'\*'); 145 146 if($ascii) $id = \dokuwiki\Utf8\Clean::strip($id); 147 148 //clean up 149 $id = preg_replace($sepcharpat,$sepchar,$id); 150 $id = preg_replace('#:+#',':',$id); 151 $id = trim($id,':._-'); 152 $id = preg_replace('#:[:\._\-]+#',':',$id); 153 $id = preg_replace('#[:\._\-]+:#',':',$id); 154 155 if (!$ascii) $cache[(string)$raw_id] = $id; 156 return($id); 157} 158 159/** 160 * Return namespacepart of a wiki ID 161 * 162 * @author Andreas Gohr <andi@splitbrain.org> 163 * 164 * @param string $id 165 * @return string|false the namespace part or false if the given ID has no namespace (root) 166 */ 167function getNS($id){ 168 $pos = strrpos((string)$id,':'); 169 if($pos!==false){ 170 return substr((string)$id,0,$pos); 171 } 172 return false; 173} 174 175/** 176 * Returns the ID without the namespace 177 * 178 * @author Andreas Gohr <andi@splitbrain.org> 179 * 180 * @param string $id 181 * @return string 182 */ 183function noNS($id) { 184 $pos = strrpos($id, ':'); 185 if ($pos!==false) { 186 return substr($id, $pos+1); 187 } else { 188 return $id; 189 } 190} 191 192/** 193 * Returns the current namespace 194 * 195 * @author Nathan Fritz <fritzn@crown.edu> 196 * 197 * @param string $id 198 * @return string 199 */ 200function curNS($id) { 201 return noNS(getNS($id)); 202} 203 204/** 205 * Returns the ID without the namespace or current namespace for 'start' pages 206 * 207 * @author Nathan Fritz <fritzn@crown.edu> 208 * 209 * @param string $id 210 * @return string 211 */ 212function noNSorNS($id) { 213 global $conf; 214 215 $p = noNS($id); 216 if ($p === $conf['start'] || $p === false || $p === '') { 217 $p = curNS($id); 218 if ($p === false || $p === '') { 219 return $conf['start']; 220 } 221 } 222 return $p; 223} 224 225/** 226 * Creates a XHTML valid linkid from a given headline title 227 * 228 * @param string $title The headline title 229 * @param array|bool $check Existing IDs 230 * @return string the title 231 * 232 * @author Andreas Gohr <andi@splitbrain.org> 233 */ 234function sectionID($title,&$check) { 235 $title = str_replace(array(':','.'),'',cleanID($title)); 236 $new = ltrim($title,'0123456789_-'); 237 if(empty($new)){ 238 $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline 239 }else{ 240 $title = $new; 241 } 242 243 if(is_array($check)){ 244 $suffix=0; 245 $candidateTitle = $title; 246 while(in_array($candidateTitle, $check)){ 247 $candidateTitle = $title . ++$suffix; 248 } 249 $check []= $candidateTitle; 250 return $candidateTitle; 251 } else { 252 return $title; 253 } 254} 255 256/** 257 * Wiki page existence check 258 * 259 * parameters as for wikiFN 260 * 261 * @author Chris Smith <chris@jalakai.co.uk> 262 * 263 * @param string $id page id 264 * @param string|int $rev empty or revision timestamp 265 * @param bool $clean flag indicating that $id should be cleaned (see wikiFN as well) 266 * @param bool $date_at 267 * @return bool exists? 268 */ 269function page_exists($id,$rev='',$clean=true, $date_at=false) { 270 if($rev !== '' && $date_at) { 271 $pagelog = new PageChangeLog($id); 272 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 273 if($pagelog_rev !== false) 274 $rev = $pagelog_rev; 275 } 276 return file_exists(wikiFN($id,$rev,$clean)); 277} 278 279/** 280 * returns the full path to the datafile specified by ID and optional revision 281 * 282 * The filename is URL encoded to protect Unicode chars 283 * 284 * @param $raw_id string id of wikipage 285 * @param $rev int|string page revision, empty string for current 286 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 287 * when $id is guaranteed to have been cleaned already. 288 * @return string full path 289 * 290 * @author Andreas Gohr <andi@splitbrain.org> 291 */ 292function wikiFN($raw_id,$rev='',$clean=true){ 293 global $conf; 294 295 global $cache_wikifn; 296 $cache = & $cache_wikifn; 297 298 $id = $raw_id; 299 300 if ($clean) $id = cleanID($id); 301 $id = str_replace(':','/',$id); 302 303 if (isset($cache[$id]) && isset($cache[$id][$rev])) { 304 return $cache[$id][$rev]; 305 } 306 307 if(empty($rev)){ 308 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 309 }else{ 310 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 311 if($conf['compression']){ 312 //test for extensions here, we want to read both compressions 313 if (file_exists($fn . '.gz')){ 314 $fn .= '.gz'; 315 }else if(file_exists($fn . '.bz2')){ 316 $fn .= '.bz2'; 317 }else{ 318 //file doesnt exist yet, so we take the configured extension 319 $fn .= '.' . $conf['compression']; 320 } 321 } 322 } 323 324 if (!isset($cache[$id])) { $cache[$id] = array(); } 325 $cache[$id][$rev] = $fn; 326 return $fn; 327} 328 329/** 330 * Returns the full path to the file for locking the page while editing. 331 * 332 * @author Ben Coburn <btcoburn@silicodon.net> 333 * 334 * @param string $id page id 335 * @return string full path 336 */ 337function wikiLockFN($id) { 338 global $conf; 339 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 340} 341 342 343/** 344 * returns the full path to the meta file specified by ID and extension 345 * 346 * @author Steven Danz <steven-danz@kc.rr.com> 347 * 348 * @param string $id page id 349 * @param string $ext file extension 350 * @return string full path 351 */ 352function metaFN($id,$ext){ 353 global $conf; 354 $id = cleanID($id); 355 $id = str_replace(':','/',$id); 356 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 357 return $fn; 358} 359 360/** 361 * returns the full path to the media's meta file specified by ID and extension 362 * 363 * @author Kate Arzamastseva <pshns@ukr.net> 364 * 365 * @param string $id media id 366 * @param string $ext extension of media 367 * @return string 368 */ 369function mediaMetaFN($id,$ext){ 370 global $conf; 371 $id = cleanID($id); 372 $id = str_replace(':','/',$id); 373 $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext; 374 return $fn; 375} 376 377/** 378 * returns an array of full paths to all metafiles of a given ID 379 * 380 * @author Esther Brunner <esther@kaffeehaus.ch> 381 * @author Michael Hamann <michael@content-space.de> 382 * 383 * @param string $id page id 384 * @return array 385 */ 386function metaFiles($id){ 387 $basename = metaFN($id, ''); 388 $files = glob($basename.'.*', GLOB_MARK); 389 // filter files like foo.bar.meta when $id == 'foo' 390 return $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array(); 391} 392 393/** 394 * returns the full path to the mediafile specified by ID 395 * 396 * The filename is URL encoded to protect Unicode chars 397 * 398 * @author Andreas Gohr <andi@splitbrain.org> 399 * @author Kate Arzamastseva <pshns@ukr.net> 400 * 401 * @param string $id media id 402 * @param string|int $rev empty string or revision timestamp 403 * @param bool $clean 404 * 405 * @return string full path 406 */ 407function mediaFN($id, $rev='', $clean=true){ 408 global $conf; 409 if ($clean) $id = cleanID($id); 410 $id = str_replace(':','/',$id); 411 if(empty($rev)){ 412 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 413 }else{ 414 $ext = mimetype($id); 415 $name = substr($id,0, -1*strlen($ext[0])-1); 416 $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]); 417 } 418 return $fn; 419} 420 421/** 422 * Returns the full filepath to a localized file if local 423 * version isn't found the english one is returned 424 * 425 * @param string $id The id of the local file 426 * @param string $ext The file extension (usually txt) 427 * @return string full filepath to localized file 428 * 429 * @author Andreas Gohr <andi@splitbrain.org> 430 */ 431function localeFN($id,$ext='txt'){ 432 global $conf; 433 $file = DOKU_CONF.'lang/'.$conf['lang'].'/'.$id.'.'.$ext; 434 if(!file_exists($file)){ 435 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext; 436 if(!file_exists($file)){ 437 //fall back to english 438 $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext; 439 } 440 } 441 return $file; 442} 443 444/** 445 * Resolve relative paths in IDs 446 * 447 * Do not call directly use resolve_mediaid or resolve_pageid 448 * instead 449 * 450 * Partyly based on a cleanPath function found at 451 * http://php.net/manual/en/function.realpath.php#57016 452 * 453 * @author <bart at mediawave dot nl> 454 * 455 * @param string $ns namespace which is context of id 456 * @param string $id relative id 457 * @param bool $clean flag indicating that id should be cleaned 458 * @return string 459 */ 460function resolve_id($ns,$id,$clean=true){ 461 global $conf; 462 463 // some pre cleaning for useslash: 464 if($conf['useslash']) $id = str_replace('/',':',$id); 465 466 // if the id starts with a dot we need to handle the 467 // relative stuff 468 if($id && $id[0] == '.'){ 469 // normalize initial dots without a colon 470 $id = preg_replace('/^((\.+:)*)(\.+)(?=[^:\.])/','\1\3:',$id); 471 // prepend the current namespace 472 $id = $ns.':'.$id; 473 474 // cleanup relatives 475 $result = array(); 476 $pathA = explode(':', $id); 477 if (!$pathA[0]) $result[] = ''; 478 foreach ($pathA AS $key => $dir) { 479 if ($dir == '..') { 480 if (end($result) == '..') { 481 $result[] = '..'; 482 } elseif (!array_pop($result)) { 483 $result[] = '..'; 484 } 485 } elseif ($dir && $dir != '.') { 486 $result[] = $dir; 487 } 488 } 489 if (!end($pathA)) $result[] = ''; 490 $id = implode(':', $result); 491 }elseif($ns !== false && strpos($id,':') === false){ 492 //if link contains no namespace. add current namespace (if any) 493 $id = $ns.':'.$id; 494 } 495 496 if($clean) $id = cleanID($id); 497 return $id; 498} 499 500/** 501 * Returns a full media id 502 * 503 * @author Andreas Gohr <andi@splitbrain.org> 504 * 505 * @param string $ns namespace which is context of id 506 * @param string &$page (reference) relative media id, updated to resolved id 507 * @param bool &$exists (reference) updated with existance of media 508 * @param int|string $rev 509 * @param bool $date_at 510 */ 511function resolve_mediaid($ns,&$page,&$exists,$rev='',$date_at=false){ 512 $page = resolve_id($ns,$page); 513 if($rev !== '' && $date_at){ 514 $medialog = new MediaChangeLog($page); 515 $medialog_rev = $medialog->getLastRevisionAt($rev); 516 if($medialog_rev !== false) { 517 $rev = $medialog_rev; 518 } 519 } 520 521 $file = mediaFN($page,$rev); 522 $exists = file_exists($file); 523} 524 525/** 526 * Returns a full page id 527 * 528 * @author Andreas Gohr <andi@splitbrain.org> 529 * 530 * @param string $ns namespace which is context of id 531 * @param string &$page (reference) relative page id, updated to resolved id 532 * @param bool &$exists (reference) updated with existance of media 533 * @param string $rev 534 * @param bool $date_at 535 */ 536function resolve_pageid($ns,&$page,&$exists,$rev='',$date_at=false ){ 537 global $conf; 538 global $ID; 539 $exists = false; 540 541 //empty address should point to current page 542 if ($page === "") { 543 $page = $ID; 544 } 545 546 //keep hashlink if exists then clean both parts 547 if (strpos($page,'#')) { 548 list($page,$hash) = explode('#',$page,2); 549 } else { 550 $hash = ''; 551 } 552 $hash = cleanID($hash); 553 $page = resolve_id($ns,$page,false); // resolve but don't clean, yet 554 555 // get filename (calls clean itself) 556 if($rev !== '' && $date_at) { 557 $pagelog = new PageChangeLog($page); 558 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 559 if($pagelog_rev !== false)//something found 560 $rev = $pagelog_rev; 561 } 562 $file = wikiFN($page,$rev); 563 564 // if ends with colon or slash we have a namespace link 565 if(in_array(substr($page,-1), array(':', ';')) || 566 ($conf['useslash'] && substr($page,-1) == '/')){ 567 if(page_exists($page.$conf['start'],$rev,true,$date_at)){ 568 // start page inside namespace 569 $page = $page.$conf['start']; 570 $exists = true; 571 }elseif(page_exists($page.noNS(cleanID($page)),$rev,true,$date_at)){ 572 // page named like the NS inside the NS 573 $page = $page.noNS(cleanID($page)); 574 $exists = true; 575 }elseif(page_exists($page,$rev,true,$date_at)){ 576 // page like namespace exists 577 $page = $page; 578 $exists = true; 579 }else{ 580 // fall back to default 581 $page = $page.$conf['start']; 582 } 583 }else{ 584 //check alternative plural/nonplural form 585 if(!file_exists($file)){ 586 if( $conf['autoplural'] ){ 587 if(substr($page,-1) == 's'){ 588 $try = substr($page,0,-1); 589 }else{ 590 $try = $page.'s'; 591 } 592 if(page_exists($try,$rev,true,$date_at)){ 593 $page = $try; 594 $exists = true; 595 } 596 } 597 }else{ 598 $exists = true; 599 } 600 } 601 602 // now make sure we have a clean page 603 $page = cleanID($page); 604 605 //add hash if any 606 if(!empty($hash)) $page .= '#'.$hash; 607} 608 609/** 610 * Returns the name of a cachefile from given data 611 * 612 * The needed directory is created by this function! 613 * 614 * @author Andreas Gohr <andi@splitbrain.org> 615 * 616 * @param string $data This data is used to create a unique md5 name 617 * @param string $ext This is appended to the filename if given 618 * @return string The filename of the cachefile 619 */ 620function getCacheName($data,$ext=''){ 621 global $conf; 622 $md5 = md5($data); 623 $file = $conf['cachedir'].'/'.$md5[0].'/'.$md5.$ext; 624 io_makeFileDir($file); 625 return $file; 626} 627 628/** 629 * Checks a pageid against $conf['hidepages'] 630 * 631 * @author Andreas Gohr <gohr@cosmocode.de> 632 * 633 * @param string $id page id 634 * @return bool 635 */ 636function isHiddenPage($id){ 637 $data = array( 638 'id' => $id, 639 'hidden' => false 640 ); 641 \dokuwiki\Extension\Event::createAndTrigger('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage'); 642 return $data['hidden']; 643} 644 645/** 646 * callback checks if page is hidden 647 * 648 * @param array $data event data - see isHiddenPage() 649 */ 650function _isHiddenPage(&$data) { 651 global $conf; 652 global $ACT; 653 654 if ($data['hidden']) return; 655 if(empty($conf['hidepages'])) return; 656 if($ACT == 'admin') return; 657 658 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$data['id'])){ 659 $data['hidden'] = true; 660 } 661} 662 663/** 664 * Reverse of isHiddenPage 665 * 666 * @author Andreas Gohr <gohr@cosmocode.de> 667 * 668 * @param string $id page id 669 * @return bool 670 */ 671function isVisiblePage($id){ 672 return !isHiddenPage($id); 673} 674 675/** 676 * Format an id for output to a user 677 * 678 * Namespaces are denoted by a trailing “:*”. The root namespace is 679 * “*”. Output is escaped. 680 * 681 * @author Adrian Lang <lang@cosmocode.de> 682 * 683 * @param string $id page id 684 * @return string 685 */ 686function prettyprint_id($id) { 687 if (!$id || $id === ':') { 688 return '*'; 689 } 690 if ((substr($id, -1, 1) === ':')) { 691 $id .= '*'; 692 } 693 return hsc($id); 694} 695 696/** 697 * Encode a UTF-8 filename to use on any filesystem 698 * 699 * Uses the 'fnencode' option to determine encoding 700 * 701 * When the second parameter is true the string will 702 * be encoded only if non ASCII characters are detected - 703 * This makes it safe to run it multiple times on the 704 * same string (default is true) 705 * 706 * @author Andreas Gohr <andi@splitbrain.org> 707 * @see urlencode 708 * 709 * @param string $file file name 710 * @param bool $safe if true, only encoded when non ASCII characters detected 711 * @return string 712 */ 713function utf8_encodeFN($file,$safe=true){ 714 global $conf; 715 if($conf['fnencode'] == 'utf-8') return $file; 716 717 if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){ 718 return $file; 719 } 720 721 if($conf['fnencode'] == 'safe'){ 722 return SafeFN::encode($file); 723 } 724 725 $file = urlencode($file); 726 $file = str_replace('%2F','/',$file); 727 return $file; 728} 729 730/** 731 * Decode a filename back to UTF-8 732 * 733 * Uses the 'fnencode' option to determine encoding 734 * 735 * @author Andreas Gohr <andi@splitbrain.org> 736 * @see urldecode 737 * 738 * @param string $file file name 739 * @return string 740 */ 741function utf8_decodeFN($file){ 742 global $conf; 743 if($conf['fnencode'] == 'utf-8') return $file; 744 745 if($conf['fnencode'] == 'safe'){ 746 return SafeFN::decode($file); 747 } 748 749 return urldecode($file); 750} 751 752/** 753 * Find a page in the current namespace (determined from $ID) or any 754 * higher namespace that can be accessed by the current user, 755 * this condition can be overriden by an optional parameter. 756 * 757 * Used for sidebars, but can be used other stuff as well 758 * 759 * @todo add event hook 760 * 761 * @param string $page the pagename you're looking for 762 * @param bool $useacl only return pages readable by the current user, false to ignore ACLs 763 * @return false|string the full page id of the found page, false if any 764 */ 765function page_findnearest($page, $useacl = true){ 766 if ((string) $page === '') return false; 767 global $ID; 768 769 $ns = $ID; 770 do { 771 $ns = getNS($ns); 772 $pageid = cleanID("$ns:$page"); 773 if(page_exists($pageid) && (!$useacl || auth_quickaclcheck($pageid) >= AUTH_READ)){ 774 return $pageid; 775 } 776 } while($ns !== false); 777 778 return false; 779} 780