1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10/** 11 * Fetch the an ID from request 12 * 13 * Uses either standard $_REQUEST variable or extracts it from 14 * the full request URI when userewrite is set to 2 15 * 16 * For $param='id' $conf['start'] is returned if no id was found. 17 * If the second parameter is true (default) the ID is cleaned. 18 * 19 * @author Andreas Gohr <andi@splitbrain.org> 20 */ 21function getID($param='id',$clean=true){ 22 /** @var Input $INPUT */ 23 global $INPUT; 24 global $conf; 25 global $ACT; 26 27 $id = $INPUT->str($param); 28 29 //construct page id from request URI 30 if(empty($id) && $conf['userewrite'] == 2){ 31 $request = $INPUT->server->str('REQUEST_URI'); 32 $script = ''; 33 34 //get the script URL 35 if($conf['basedir']){ 36 $relpath = ''; 37 if($param != 'id') { 38 $relpath = 'lib/exe/'; 39 } 40 $script = $conf['basedir'].$relpath.utf8_basename($INPUT->server->str('SCRIPT_FILENAME')); 41 42 }elseif($INPUT->server->str('PATH_INFO')){ 43 $request = $INPUT->server->str('PATH_INFO'); 44 }elseif($INPUT->server->str('SCRIPT_NAME')){ 45 $script = $INPUT->server->str('SCRIPT_NAME'); 46 }elseif($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')){ 47 $script = preg_replace ('/^'.preg_quote($INPUT->server->str('DOCUMENT_ROOT'),'/').'/','', 48 $INPUT->server->str('SCRIPT_FILENAME')); 49 $script = '/'.$script; 50 } 51 52 //clean script and request (fixes a windows problem) 53 $script = preg_replace('/\/\/+/','/',$script); 54 $request = preg_replace('/\/\/+/','/',$request); 55 56 //remove script URL and Querystring to gain the id 57 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 58 $id = preg_replace ('/\?.*/','',$match[1]); 59 } 60 $id = urldecode($id); 61 //strip leading slashes 62 $id = preg_replace('!^/+!','',$id); 63 } 64 65 // Namespace autolinking from URL 66 if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){ 67 if(page_exists($id.$conf['start'])){ 68 // start page inside namespace 69 $id = $id.$conf['start']; 70 }elseif(page_exists($id.noNS(cleanID($id)))){ 71 // page named like the NS inside the NS 72 $id = $id.noNS(cleanID($id)); 73 }elseif(page_exists($id)){ 74 // page like namespace exists 75 $id = substr($id,0,-1); 76 }else{ 77 // fall back to default 78 $id = $id.$conf['start']; 79 } 80 if (isset($ACT) && $ACT === 'show') send_redirect(wl($id,'',true)); 81 } 82 83 if($clean) $id = cleanID($id); 84 if(empty($id) && $param=='id') $id = $conf['start']; 85 86 return $id; 87} 88 89/** 90 * Remove unwanted chars from ID 91 * 92 * Cleans a given ID to only use allowed characters. Accented characters are 93 * converted to unaccented ones 94 * 95 * @author Andreas Gohr <andi@splitbrain.org> 96 * @param string $raw_id The pageid to clean 97 * @param boolean $ascii Force ASCII 98 * @return string cleaned id 99 */ 100function cleanID($raw_id,$ascii=false){ 101 global $conf; 102 static $sepcharpat = null; 103 104 global $cache_cleanid; 105 $cache = & $cache_cleanid; 106 107 // check if it's already in the memory cache 108 if (isset($cache[(string)$raw_id])) { 109 return $cache[(string)$raw_id]; 110 } 111 112 $sepchar = $conf['sepchar']; 113 if($sepcharpat == null) // build string only once to save clock cycles 114 $sepcharpat = '#\\'.$sepchar.'+#'; 115 116 $id = trim((string)$raw_id); 117 $id = utf8_strtolower($id); 118 119 //alternative namespace seperator 120 if($conf['useslash']){ 121 $id = strtr($id,';/','::'); 122 }else{ 123 $id = strtr($id,';/',':'.$sepchar); 124 } 125 126 if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id); 127 if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1); 128 129 //remove specials 130 $id = utf8_stripspecials($id,$sepchar,'\*'); 131 132 if($ascii) $id = utf8_strip($id); 133 134 //clean up 135 $id = preg_replace($sepcharpat,$sepchar,$id); 136 $id = preg_replace('#:+#',':',$id); 137 $id = trim($id,':._-'); 138 $id = preg_replace('#:[:\._\-]+#',':',$id); 139 $id = preg_replace('#[:\._\-]+:#',':',$id); 140 141 $cache[(string)$raw_id] = $id; 142 return($id); 143} 144 145/** 146 * Return namespacepart of a wiki ID 147 * 148 * @author Andreas Gohr <andi@splitbrain.org> 149 */ 150function getNS($id){ 151 $pos = strrpos((string)$id,':'); 152 if($pos!==false){ 153 return substr((string)$id,0,$pos); 154 } 155 return false; 156} 157 158/** 159 * Returns the ID without the namespace 160 * 161 * @author Andreas Gohr <andi@splitbrain.org> 162 */ 163function noNS($id) { 164 $pos = strrpos($id, ':'); 165 if ($pos!==false) { 166 return substr($id, $pos+1); 167 } else { 168 return $id; 169 } 170} 171 172/** 173 * Returns the current namespace 174 * 175 * @author Nathan Fritz <fritzn@crown.edu> 176 */ 177function curNS($id) { 178 return noNS(getNS($id)); 179} 180 181/** 182 * Returns the ID without the namespace or current namespace for 'start' pages 183 * 184 * @author Nathan Fritz <fritzn@crown.edu> 185 */ 186function noNSorNS($id) { 187 global $conf; 188 189 $p = noNS($id); 190 if ($p == $conf['start'] || $p == false) { 191 $p = curNS($id); 192 if ($p == false) { 193 return $conf['start']; 194 } 195 } 196 return $p; 197} 198 199/** 200 * Creates a XHTML valid linkid from a given headline title 201 * 202 * @param string $title The headline title 203 * @param array|bool $check Existing IDs (title => number) 204 * @return string the title 205 * @author Andreas Gohr <andi@splitbrain.org> 206 */ 207function sectionID($title,&$check) { 208 $title = str_replace(array(':','.'),'',cleanID($title)); 209 $new = ltrim($title,'0123456789_-'); 210 if(empty($new)){ 211 $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline 212 }else{ 213 $title = $new; 214 } 215 216 if(is_array($check)){ 217 // make sure tiles are unique 218 if (!array_key_exists ($title,$check)) { 219 $check[$title] = 0; 220 } else { 221 $title .= ++ $check[$title]; 222 } 223 } 224 225 return $title; 226} 227 228 229/** 230 * Wiki page existence check 231 * 232 * parameters as for wikiFN 233 * 234 * @author Chris Smith <chris@jalakai.co.uk> 235 */ 236function page_exists($id,$rev='',$clean=true) { 237 return @file_exists(wikiFN($id,$rev,$clean)); 238} 239 240/** 241 * returns the full path to the datafile specified by ID and optional revision 242 * 243 * The filename is URL encoded to protect Unicode chars 244 * 245 * @param $raw_id string id of wikipage 246 * @param $rev string page revision, empty string for current 247 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 248 * when $id is guaranteed to have been cleaned already. 249 * @return string full path 250 * 251 * @author Andreas Gohr <andi@splitbrain.org> 252 */ 253function wikiFN($raw_id,$rev='',$clean=true){ 254 global $conf; 255 256 global $cache_wikifn; 257 $cache = & $cache_wikifn; 258 259 if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) { 260 return $cache[$raw_id][$rev]; 261 } 262 263 $id = $raw_id; 264 265 if ($clean) $id = cleanID($id); 266 $id = str_replace(':','/',$id); 267 if(empty($rev)){ 268 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 269 }else{ 270 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 271 if($conf['compression']){ 272 //test for extensions here, we want to read both compressions 273 if (@file_exists($fn . '.gz')){ 274 $fn .= '.gz'; 275 }else if(@file_exists($fn . '.bz2')){ 276 $fn .= '.bz2'; 277 }else{ 278 //file doesnt exist yet, so we take the configured extension 279 $fn .= '.' . $conf['compression']; 280 } 281 } 282 } 283 284 if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); } 285 $cache[$raw_id][$rev] = $fn; 286 return $fn; 287} 288 289/** 290 * Returns the full path to the file for locking the page while editing. 291 * 292 * @author Ben Coburn <btcoburn@silicodon.net> 293 */ 294function wikiLockFN($id) { 295 global $conf; 296 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 297} 298 299 300/** 301 * returns the full path to the meta file specified by ID and extension 302 * 303 * @author Steven Danz <steven-danz@kc.rr.com> 304 */ 305function metaFN($id,$ext){ 306 global $conf; 307 $id = cleanID($id); 308 $id = str_replace(':','/',$id); 309 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 310 return $fn; 311} 312 313/** 314 * returns the full path to the media's meta file specified by ID and extension 315 * 316 * @author Kate Arzamastseva <pshns@ukr.net> 317 */ 318function mediaMetaFN($id,$ext){ 319 global $conf; 320 $id = cleanID($id); 321 $id = str_replace(':','/',$id); 322 $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext; 323 return $fn; 324} 325 326/** 327 * returns an array of full paths to all metafiles of a given ID 328 * 329 * @author Esther Brunner <esther@kaffeehaus.ch> 330 * @author Michael Hamann <michael@content-space.de> 331 */ 332function metaFiles($id){ 333 $basename = metaFN($id, ''); 334 $files = glob($basename.'.*', GLOB_MARK); 335 // filter files like foo.bar.meta when $id == 'foo' 336 return $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array(); 337} 338 339/** 340 * returns the full path to the mediafile specified by ID 341 * 342 * The filename is URL encoded to protect Unicode chars 343 * 344 * @author Andreas Gohr <andi@splitbrain.org> 345 * @author Kate Arzamastseva <pshns@ukr.net> 346 */ 347function mediaFN($id, $rev=''){ 348 global $conf; 349 $id = cleanID($id); 350 $id = str_replace(':','/',$id); 351 if(empty($rev)){ 352 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 353 }else{ 354 $ext = mimetype($id); 355 $name = substr($id,0, -1*strlen($ext[0])-1); 356 $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]); 357 } 358 return $fn; 359} 360 361/** 362 * Returns the full filepath to a localized file if local 363 * version isn't found the english one is returned 364 * 365 * @param string $id The id of the local file 366 * @param string $ext The file extension (usually txt) 367 * @return string full filepath to localized file 368 * @author Andreas Gohr <andi@splitbrain.org> 369 */ 370function localeFN($id,$ext='txt'){ 371 global $conf; 372 $file = DOKU_CONF.'lang/'.$conf['lang'].'/'.$id.'.'.$ext; 373 if(!@file_exists($file)){ 374 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext; 375 if(!@file_exists($file)){ 376 //fall back to english 377 $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext; 378 } 379 } 380 return $file; 381} 382 383/** 384 * Resolve relative paths in IDs 385 * 386 * Do not call directly use resolve_mediaid or resolve_pageid 387 * instead 388 * 389 * Partyly based on a cleanPath function found at 390 * http://www.php.net/manual/en/function.realpath.php#57016 391 * 392 * @author <bart at mediawave dot nl> 393 */ 394function resolve_id($ns,$id,$clean=true){ 395 global $conf; 396 397 // some pre cleaning for useslash: 398 if($conf['useslash']) $id = str_replace('/',':',$id); 399 400 // if the id starts with a dot we need to handle the 401 // relative stuff 402 if($id && $id{0} == '.'){ 403 // normalize initial dots without a colon 404 $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id); 405 // prepend the current namespace 406 $id = $ns.':'.$id; 407 408 // cleanup relatives 409 $result = array(); 410 $pathA = explode(':', $id); 411 if (!$pathA[0]) $result[] = ''; 412 foreach ($pathA AS $key => $dir) { 413 if ($dir == '..') { 414 if (end($result) == '..') { 415 $result[] = '..'; 416 } elseif (!array_pop($result)) { 417 $result[] = '..'; 418 } 419 } elseif ($dir && $dir != '.') { 420 $result[] = $dir; 421 } 422 } 423 if (!end($pathA)) $result[] = ''; 424 $id = implode(':', $result); 425 }elseif($ns !== false && strpos($id,':') === false){ 426 //if link contains no namespace. add current namespace (if any) 427 $id = $ns.':'.$id; 428 } 429 430 if($clean) $id = cleanID($id); 431 return $id; 432} 433 434/** 435 * Returns a full media id 436 * 437 * @author Andreas Gohr <andi@splitbrain.org> 438 */ 439function resolve_mediaid($ns,&$page,&$exists){ 440 $page = resolve_id($ns,$page); 441 $file = mediaFN($page); 442 $exists = @file_exists($file); 443} 444 445/** 446 * Returns a full page id 447 * 448 * @author Andreas Gohr <andi@splitbrain.org> 449 */ 450function resolve_pageid($ns,&$page,&$exists){ 451 global $conf; 452 global $ID; 453 $exists = false; 454 455 //empty address should point to current page 456 if ($page === "") { 457 $page = $ID; 458 } 459 460 //keep hashlink if exists then clean both parts 461 if (strpos($page,'#')) { 462 list($page,$hash) = explode('#',$page,2); 463 } else { 464 $hash = ''; 465 } 466 $hash = cleanID($hash); 467 $page = resolve_id($ns,$page,false); // resolve but don't clean, yet 468 469 // get filename (calls clean itself) 470 $file = wikiFN($page); 471 472 // if ends with colon or slash we have a namespace link 473 if(in_array(substr($page,-1), array(':', ';')) || 474 ($conf['useslash'] && substr($page,-1) == '/')){ 475 if(page_exists($page.$conf['start'])){ 476 // start page inside namespace 477 $page = $page.$conf['start']; 478 $exists = true; 479 }elseif(page_exists($page.noNS(cleanID($page)))){ 480 // page named like the NS inside the NS 481 $page = $page.noNS(cleanID($page)); 482 $exists = true; 483 }elseif(page_exists($page)){ 484 // page like namespace exists 485 $page = $page; 486 $exists = true; 487 }else{ 488 // fall back to default 489 $page = $page.$conf['start']; 490 } 491 }else{ 492 //check alternative plural/nonplural form 493 if(!@file_exists($file)){ 494 if( $conf['autoplural'] ){ 495 if(substr($page,-1) == 's'){ 496 $try = substr($page,0,-1); 497 }else{ 498 $try = $page.'s'; 499 } 500 if(page_exists($try)){ 501 $page = $try; 502 $exists = true; 503 } 504 } 505 }else{ 506 $exists = true; 507 } 508 } 509 510 // now make sure we have a clean page 511 $page = cleanID($page); 512 513 //add hash if any 514 if(!empty($hash)) $page .= '#'.$hash; 515} 516 517/** 518 * Returns the name of a cachefile from given data 519 * 520 * The needed directory is created by this function! 521 * 522 * @author Andreas Gohr <andi@splitbrain.org> 523 * 524 * @param string $data This data is used to create a unique md5 name 525 * @param string $ext This is appended to the filename if given 526 * @return string The filename of the cachefile 527 */ 528function getCacheName($data,$ext=''){ 529 global $conf; 530 $md5 = md5($data); 531 $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext; 532 io_makeFileDir($file); 533 return $file; 534} 535 536/** 537 * Checks a pageid against $conf['hidepages'] 538 * 539 * @author Andreas Gohr <gohr@cosmocode.de> 540 */ 541function isHiddenPage($id){ 542 $data = array( 543 'id' => $id, 544 'hidden' => false 545 ); 546 trigger_event('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage'); 547 return $data['hidden']; 548} 549 550/** 551 * callback checks if page is hidden 552 * 553 * @param array $data event data see isHiddenPage() 554 */ 555function _isHiddenPage(&$data) { 556 global $conf; 557 global $ACT; 558 559 if ($data['hidden']) return; 560 if(empty($conf['hidepages'])) return; 561 if($ACT == 'admin') return; 562 563 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$data['id'])){ 564 $data['hidden'] = true; 565 } 566} 567 568/** 569 * Reverse of isHiddenPage 570 * 571 * @author Andreas Gohr <gohr@cosmocode.de> 572 */ 573function isVisiblePage($id){ 574 return !isHiddenPage($id); 575} 576 577/** 578 * Format an id for output to a user 579 * 580 * Namespaces are denoted by a trailing “:*”. The root namespace is 581 * “*”. Output is escaped. 582 * 583 * @author Adrian Lang <lang@cosmocode.de> 584 */ 585 586function prettyprint_id($id) { 587 if (!$id || $id === ':') { 588 return '*'; 589 } 590 if ((substr($id, -1, 1) === ':')) { 591 $id .= '*'; 592 } 593 return hsc($id); 594} 595 596/** 597 * Encode a UTF-8 filename to use on any filesystem 598 * 599 * Uses the 'fnencode' option to determine encoding 600 * 601 * When the second parameter is true the string will 602 * be encoded only if non ASCII characters are detected - 603 * This makes it safe to run it multiple times on the 604 * same string (default is true) 605 * 606 * @author Andreas Gohr <andi@splitbrain.org> 607 * @see urlencode 608 */ 609function utf8_encodeFN($file,$safe=true){ 610 global $conf; 611 if($conf['fnencode'] == 'utf-8') return $file; 612 613 if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){ 614 return $file; 615 } 616 617 if($conf['fnencode'] == 'safe'){ 618 return SafeFN::encode($file); 619 } 620 621 $file = urlencode($file); 622 $file = str_replace('%2F','/',$file); 623 return $file; 624} 625 626/** 627 * Decode a filename back to UTF-8 628 * 629 * Uses the 'fnencode' option to determine encoding 630 * 631 * @author Andreas Gohr <andi@splitbrain.org> 632 * @see urldecode 633 */ 634function utf8_decodeFN($file){ 635 global $conf; 636 if($conf['fnencode'] == 'utf-8') return $file; 637 638 if($conf['fnencode'] == 'safe'){ 639 return SafeFN::decode($file); 640 } 641 642 return urldecode($file); 643} 644 645/** 646 * Find a page in the current namespace (determined from $ID) or any 647 * higher namespace 648 * 649 * Used for sidebars, but can be used other stuff as well 650 * 651 * @todo add event hook 652 * @param string $page the pagename you're looking for 653 * @return string|false the full page id of the found page, false if any 654 */ 655function page_findnearest($page){ 656 if (!$page) return false; 657 global $ID; 658 659 $ns = $ID; 660 do { 661 $ns = getNS($ns); 662 $pageid = ltrim("$ns:$page",':'); 663 if(page_exists($pageid)){ 664 return $pageid; 665 } 666 } while($ns); 667 668 return false; 669} 670