1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10/** 11 * Fetch the an ID from request 12 * 13 * Uses either standard $_REQUEST variable or extracts it from 14 * the full request URI when userewrite is set to 2 15 * 16 * For $param='id' $conf['start'] is returned if no id was found. 17 * If the second parameter is true (default) the ID is cleaned. 18 * 19 * @author Andreas Gohr <andi@splitbrain.org> 20 */ 21function getID($param='id',$clean=true){ 22 global $INPUT; 23 global $conf; 24 global $ACT; 25 26 $id = $INPUT->str($param); 27 28 //construct page id from request URI 29 if(empty($id) && $conf['userewrite'] == 2){ 30 $request = $_SERVER['REQUEST_URI']; 31 $script = ''; 32 33 //get the script URL 34 if($conf['basedir']){ 35 $relpath = ''; 36 if($param != 'id') { 37 $relpath = 'lib/exe/'; 38 } 39 $script = $conf['basedir'].$relpath.utf8_basename($_SERVER['SCRIPT_FILENAME']); 40 41 }elseif($_SERVER['PATH_INFO']){ 42 $request = $_SERVER['PATH_INFO']; 43 }elseif($_SERVER['SCRIPT_NAME']){ 44 $script = $_SERVER['SCRIPT_NAME']; 45 }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){ 46 $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','', 47 $_SERVER['SCRIPT_FILENAME']); 48 $script = '/'.$script; 49 } 50 51 //clean script and request (fixes a windows problem) 52 $script = preg_replace('/\/\/+/','/',$script); 53 $request = preg_replace('/\/\/+/','/',$request); 54 55 //remove script URL and Querystring to gain the id 56 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 57 $id = preg_replace ('/\?.*/','',$match[1]); 58 } 59 $id = urldecode($id); 60 //strip leading slashes 61 $id = preg_replace('!^/+!','',$id); 62 } 63 64 // Namespace autolinking from URL 65 if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){ 66 if(page_exists($id.$conf['start'])){ 67 // start page inside namespace 68 $id = $id.$conf['start']; 69 }elseif(page_exists($id.noNS(cleanID($id)))){ 70 // page named like the NS inside the NS 71 $id = $id.noNS(cleanID($id)); 72 }elseif(page_exists($id)){ 73 // page like namespace exists 74 $id = substr($id,0,-1); 75 }else{ 76 // fall back to default 77 $id = $id.$conf['start']; 78 } 79 if (isset($ACT) && $ACT === 'show') send_redirect(wl($id,'',true)); 80 } 81 82 if($clean) $id = cleanID($id); 83 if(empty($id) && $param=='id') $id = $conf['start']; 84 85 return $id; 86} 87 88/** 89 * Remove unwanted chars from ID 90 * 91 * Cleans a given ID to only use allowed characters. Accented characters are 92 * converted to unaccented ones 93 * 94 * @author Andreas Gohr <andi@splitbrain.org> 95 * @param string $raw_id The pageid to clean 96 * @param boolean $ascii Force ASCII 97 */ 98function cleanID($raw_id,$ascii=false){ 99 global $conf; 100 static $sepcharpat = null; 101 102 global $cache_cleanid; 103 $cache = & $cache_cleanid; 104 105 // check if it's already in the memory cache 106 if (isset($cache[(string)$raw_id])) { 107 return $cache[(string)$raw_id]; 108 } 109 110 $sepchar = $conf['sepchar']; 111 if($sepcharpat == null) // build string only once to save clock cycles 112 $sepcharpat = '#\\'.$sepchar.'+#'; 113 114 $id = trim((string)$raw_id); 115 $id = utf8_strtolower($id); 116 117 //alternative namespace seperator 118 if($conf['useslash']){ 119 $id = strtr($id,';/','::'); 120 }else{ 121 $id = strtr($id,';/',':'.$sepchar); 122 } 123 124 if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id); 125 if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1); 126 127 //remove specials 128 $id = utf8_stripspecials($id,$sepchar,'\*'); 129 130 if($ascii) $id = utf8_strip($id); 131 132 //clean up 133 $id = preg_replace($sepcharpat,$sepchar,$id); 134 $id = preg_replace('#:+#',':',$id); 135 $id = trim($id,':._-'); 136 $id = preg_replace('#:[:\._\-]+#',':',$id); 137 $id = preg_replace('#[:\._\-]+:#',':',$id); 138 139 $cache[(string)$raw_id] = $id; 140 return($id); 141} 142 143/** 144 * Return namespacepart of a wiki ID 145 * 146 * @author Andreas Gohr <andi@splitbrain.org> 147 */ 148function getNS($id){ 149 $pos = strrpos((string)$id,':'); 150 if($pos!==false){ 151 return substr((string)$id,0,$pos); 152 } 153 return false; 154} 155 156/** 157 * Returns the ID without the namespace 158 * 159 * @author Andreas Gohr <andi@splitbrain.org> 160 */ 161function noNS($id) { 162 $pos = strrpos($id, ':'); 163 if ($pos!==false) { 164 return substr($id, $pos+1); 165 } else { 166 return $id; 167 } 168} 169 170/** 171 * Returns the current namespace 172 * 173 * @author Nathan Fritz <fritzn@crown.edu> 174 */ 175function curNS($id) { 176 return noNS(getNS($id)); 177} 178 179/** 180 * Returns the ID without the namespace or current namespace for 'start' pages 181 * 182 * @author Nathan Fritz <fritzn@crown.edu> 183 */ 184function noNSorNS($id) { 185 global $conf; 186 187 $p = noNS($id); 188 if ($p == $conf['start'] || $p == false) { 189 $p = curNS($id); 190 if ($p == false) { 191 return $conf['start']; 192 } 193 } 194 return $p; 195} 196 197/** 198 * Creates a XHTML valid linkid from a given headline title 199 * 200 * @param string $title The headline title 201 * @param array|bool $check Existing IDs (title => number) 202 * @return string the title 203 * @author Andreas Gohr <andi@splitbrain.org> 204 */ 205function sectionID($title,&$check) { 206 $title = str_replace(array(':','.'),'',cleanID($title)); 207 $new = ltrim($title,'0123456789_-'); 208 if(empty($new)){ 209 $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline 210 }else{ 211 $title = $new; 212 } 213 214 if(is_array($check)){ 215 // make sure tiles are unique 216 if (!array_key_exists ($title,$check)) { 217 $check[$title] = 0; 218 } else { 219 $title .= ++ $check[$title]; 220 } 221 } 222 223 return $title; 224} 225 226 227/** 228 * Wiki page existence check 229 * 230 * parameters as for wikiFN 231 * 232 * @author Chris Smith <chris@jalakai.co.uk> 233 */ 234function page_exists($id,$rev='',$clean=true, $data_at=false) { 235 if($rev !== '' && $date_at) { 236 $pagelog = new PageChangeLog($page); 237 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 238 if($pagelog_rev !== false) 239 $rev = $pagelog_rev; 240 } 241 return @file_exists(wikiFN($id,$rev,$clean)); 242} 243 244/** 245 * returns the full path to the datafile specified by ID and optional revision 246 * 247 * The filename is URL encoded to protect Unicode chars 248 * 249 * @param $raw_id string id of wikipage 250 * @param $rev string page revision, empty string for current 251 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 252 * when $id is guaranteed to have been cleaned already. 253 * 254 * @author Andreas Gohr <andi@splitbrain.org> 255 */ 256function wikiFN($raw_id,$rev='',$clean=true){ 257 global $conf; 258 259 global $cache_wikifn; 260 $cache = & $cache_wikifn; 261 262 if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) { 263 return $cache[$raw_id][$rev]; 264 } 265 266 $id = $raw_id; 267 268 if ($clean) $id = cleanID($id); 269 $id = str_replace(':','/',$id); 270 if(empty($rev)){ 271 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 272 }else{ 273 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 274 if($conf['compression']){ 275 //test for extensions here, we want to read both compressions 276 if (@file_exists($fn . '.gz')){ 277 $fn .= '.gz'; 278 }else if(@file_exists($fn . '.bz2')){ 279 $fn .= '.bz2'; 280 }else{ 281 //file doesnt exist yet, so we take the configured extension 282 $fn .= '.' . $conf['compression']; 283 } 284 } 285 } 286 287 if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); } 288 $cache[$raw_id][$rev] = $fn; 289 return $fn; 290} 291 292/** 293 * Returns the full path to the file for locking the page while editing. 294 * 295 * @author Ben Coburn <btcoburn@silicodon.net> 296 */ 297function wikiLockFN($id) { 298 global $conf; 299 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 300} 301 302 303/** 304 * returns the full path to the meta file specified by ID and extension 305 * 306 * @author Steven Danz <steven-danz@kc.rr.com> 307 */ 308function metaFN($id,$ext){ 309 global $conf; 310 $id = cleanID($id); 311 $id = str_replace(':','/',$id); 312 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 313 return $fn; 314} 315 316/** 317 * returns the full path to the media's meta file specified by ID and extension 318 * 319 * @author Kate Arzamastseva <pshns@ukr.net> 320 */ 321function mediaMetaFN($id,$ext){ 322 global $conf; 323 $id = cleanID($id); 324 $id = str_replace(':','/',$id); 325 $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext; 326 return $fn; 327} 328 329/** 330 * returns an array of full paths to all metafiles of a given ID 331 * 332 * @author Esther Brunner <esther@kaffeehaus.ch> 333 * @author Michael Hamann <michael@content-space.de> 334 */ 335function metaFiles($id){ 336 $basename = metaFN($id, ''); 337 $files = glob($basename.'.*', GLOB_MARK); 338 // filter files like foo.bar.meta when $id == 'foo' 339 return $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array(); 340} 341 342/** 343 * returns the full path to the mediafile specified by ID 344 * 345 * The filename is URL encoded to protect Unicode chars 346 * 347 * @author Andreas Gohr <andi@splitbrain.org> 348 * @author Kate Arzamastseva <pshns@ukr.net> 349 */ 350function mediaFN($id, $rev=''){ 351 global $conf; 352 $id = cleanID($id); 353 $id = str_replace(':','/',$id); 354 if(empty($rev)){ 355 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 356 }else{ 357 $ext = mimetype($id); 358 $name = substr($id,0, -1*strlen($ext[0])-1); 359 $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]); 360 } 361 return $fn; 362} 363 364/** 365 * Returns the full filepath to a localized file if local 366 * version isn't found the english one is returned 367 * 368 * @param string $id The id of the local file 369 * @param string $ext The file extension (usually txt) 370 * @author Andreas Gohr <andi@splitbrain.org> 371 */ 372function localeFN($id,$ext='txt'){ 373 global $conf; 374 $file = DOKU_CONF.'lang/'.$conf['lang'].'/'.$id.'.'.$ext; 375 if(!@file_exists($file)){ 376 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext; 377 if(!@file_exists($file)){ 378 //fall back to english 379 $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext; 380 } 381 } 382 return $file; 383} 384 385/** 386 * Resolve relative paths in IDs 387 * 388 * Do not call directly use resolve_mediaid or resolve_pageid 389 * instead 390 * 391 * Partyly based on a cleanPath function found at 392 * http://www.php.net/manual/en/function.realpath.php#57016 393 * 394 * @author <bart at mediawave dot nl> 395 */ 396function resolve_id($ns,$id,$clean=true){ 397 global $conf; 398 399 // some pre cleaning for useslash: 400 if($conf['useslash']) $id = str_replace('/',':',$id); 401 402 // if the id starts with a dot we need to handle the 403 // relative stuff 404 if($id && $id{0} == '.'){ 405 // normalize initial dots without a colon 406 $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id); 407 // prepend the current namespace 408 $id = $ns.':'.$id; 409 410 // cleanup relatives 411 $result = array(); 412 $pathA = explode(':', $id); 413 if (!$pathA[0]) $result[] = ''; 414 foreach ($pathA AS $key => $dir) { 415 if ($dir == '..') { 416 if (end($result) == '..') { 417 $result[] = '..'; 418 } elseif (!array_pop($result)) { 419 $result[] = '..'; 420 } 421 } elseif ($dir && $dir != '.') { 422 $result[] = $dir; 423 } 424 } 425 if (!end($pathA)) $result[] = ''; 426 $id = implode(':', $result); 427 }elseif($ns !== false && strpos($id,':') === false){ 428 //if link contains no namespace. add current namespace (if any) 429 $id = $ns.':'.$id; 430 } 431 432 if($clean) $id = cleanID($id); 433 return $id; 434} 435 436/** 437 * Returns a full media id 438 * 439 * @author Andreas Gohr <andi@splitbrain.org> 440 */ 441function resolve_mediaid($ns,&$page,&$exists,$rev='',$date_at=false){ 442 if($rev !== '' && $date_at){ 443 $medialog = new MediaChangeLog($media_id); 444 $medialog_rev = $medialog->getLastRevisionAt($rev); 445 if($medialog_rev !== false) { 446 $rev = $medialog_rev; 447 } 448 } 449 $page = resolve_id($ns,$page); 450 $file = mediaFN($page,$rev); 451 $exists = @file_exists($file); 452} 453 454/** 455 * Returns a full page id 456 * 457 * @author Andreas Gohr <andi@splitbrain.org> 458 */ 459function resolve_pageid($ns,&$page,&$exists,$rev='',$date_at=false ){ 460 global $conf; 461 global $ID; 462 $exists = false; 463 464 //empty address should point to current page 465 if ($page === "") { 466 $page = $ID; 467 } 468 469 //keep hashlink if exists then clean both parts 470 if (strpos($page,'#')) { 471 list($page,$hash) = explode('#',$page,2); 472 } else { 473 $hash = ''; 474 } 475 $hash = cleanID($hash); 476 $page = resolve_id($ns,$page,false); // resolve but don't clean, yet 477 478 // get filename (calls clean itself) 479 if($rev !== '' && $date_at) { 480 $pagelog = new PageChangeLog($page); 481 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 482 if($pagelog_rev !== false)//something found 483 $rev = $pagelog_rev; 484 } 485 $file = wikiFN($page,$rev); 486 487 // if ends with colon or slash we have a namespace link 488 if(in_array(substr($page,-1), array(':', ';')) || 489 ($conf['useslash'] && substr($page,-1) == '/')){ 490 if(page_exists($page.$conf['start'],$rev,true,$date_at)){ 491 // start page inside namespace 492 $page = $page.$conf['start']; 493 $exists = true; 494 }elseif(page_exists($page.noNS(cleanID($page)),$rev,true,$date_at)){ 495 // page named like the NS inside the NS 496 $page = $page.noNS(cleanID($page)); 497 $exists = true; 498 }elseif(page_exists($page,$rev,true,$date_at)){ 499 // page like namespace exists 500 $page = $page; 501 $exists = true; 502 }else{ 503 // fall back to default 504 $page = $page.$conf['start']; 505 } 506 }else{ 507 //check alternative plural/nonplural form 508 if(!@file_exists($file)){ 509 if( $conf['autoplural'] ){ 510 if(substr($page,-1) == 's'){ 511 $try = substr($page,0,-1); 512 }else{ 513 $try = $page.'s'; 514 } 515 if(page_exists($try,$rev,true,$date_at)){ 516 $page = $try; 517 $exists = true; 518 } 519 } 520 }else{ 521 $exists = true; 522 } 523 } 524 525 // now make sure we have a clean page 526 $page = cleanID($page); 527 528 //add hash if any 529 if(!empty($hash)) $page .= '#'.$hash; 530} 531 532/** 533 * Returns the name of a cachefile from given data 534 * 535 * The needed directory is created by this function! 536 * 537 * @author Andreas Gohr <andi@splitbrain.org> 538 * 539 * @param string $data This data is used to create a unique md5 name 540 * @param string $ext This is appended to the filename if given 541 * @return string The filename of the cachefile 542 */ 543function getCacheName($data,$ext=''){ 544 global $conf; 545 $md5 = md5($data); 546 $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext; 547 io_makeFileDir($file); 548 return $file; 549} 550 551/** 552 * Checks a pageid against $conf['hidepages'] 553 * 554 * @author Andreas Gohr <gohr@cosmocode.de> 555 */ 556function isHiddenPage($id){ 557 $data = array( 558 'id' => $id, 559 'hidden' => false 560 ); 561 trigger_event('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage'); 562 return $data['hidden']; 563} 564 565function _isHiddenPage(&$data) { 566 global $conf; 567 global $ACT; 568 569 if ($data['hidden']) return; 570 if(empty($conf['hidepages'])) return; 571 if($ACT == 'admin') return; 572 573 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$data['id'])){ 574 $data['hidden'] = true; 575 } 576} 577 578/** 579 * Reverse of isHiddenPage 580 * 581 * @author Andreas Gohr <gohr@cosmocode.de> 582 */ 583function isVisiblePage($id){ 584 return !isHiddenPage($id); 585} 586 587/** 588 * Format an id for output to a user 589 * 590 * Namespaces are denoted by a trailing “:*”. The root namespace is 591 * “*”. Output is escaped. 592 * 593 * @author Adrian Lang <lang@cosmocode.de> 594 */ 595 596function prettyprint_id($id) { 597 if (!$id || $id === ':') { 598 return '*'; 599 } 600 if ((substr($id, -1, 1) === ':')) { 601 $id .= '*'; 602 } 603 return hsc($id); 604} 605 606/** 607 * Encode a UTF-8 filename to use on any filesystem 608 * 609 * Uses the 'fnencode' option to determine encoding 610 * 611 * When the second parameter is true the string will 612 * be encoded only if non ASCII characters are detected - 613 * This makes it safe to run it multiple times on the 614 * same string (default is true) 615 * 616 * @author Andreas Gohr <andi@splitbrain.org> 617 * @see urlencode 618 */ 619function utf8_encodeFN($file,$safe=true){ 620 global $conf; 621 if($conf['fnencode'] == 'utf-8') return $file; 622 623 if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){ 624 return $file; 625 } 626 627 if($conf['fnencode'] == 'safe'){ 628 return SafeFN::encode($file); 629 } 630 631 $file = urlencode($file); 632 $file = str_replace('%2F','/',$file); 633 return $file; 634} 635 636/** 637 * Decode a filename back to UTF-8 638 * 639 * Uses the 'fnencode' option to determine encoding 640 * 641 * @author Andreas Gohr <andi@splitbrain.org> 642 * @see urldecode 643 */ 644function utf8_decodeFN($file){ 645 global $conf; 646 if($conf['fnencode'] == 'utf-8') return $file; 647 648 if($conf['fnencode'] == 'safe'){ 649 return SafeFN::decode($file); 650 } 651 652 return urldecode($file); 653} 654 655/** 656 * Find a page in the current namespace (determined from $ID) or any 657 * higher namespace 658 * 659 * Used for sidebars, but can be used other stuff as well 660 * 661 * @todo add event hook 662 * @param string $page the pagename you're looking for 663 * @return string|false the full page id of the found page, false if any 664 */ 665function page_findnearest($page){ 666 if (!$page) return false; 667 global $ID; 668 669 $ns = $ID; 670 do { 671 $ns = getNS($ns); 672 $pageid = ltrim("$ns:$page",':'); 673 if(page_exists($pageid)){ 674 return $pageid; 675 } 676 } while($ns); 677 678 return false; 679} 680