1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10/** 11 * Fetch the an ID from request 12 * 13 * Uses either standard $_REQUEST variable or extracts it from 14 * the full request URI when userewrite is set to 2 15 * 16 * For $param='id' $conf['start'] is returned if no id was found. 17 * If the second parameter is true (default) the ID is cleaned. 18 * 19 * @author Andreas Gohr <andi@splitbrain.org> 20 */ 21function getID($param='id',$clean=true){ 22 global $conf; 23 24 $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null; 25 26 //construct page id from request URI 27 if(empty($id) && $conf['userewrite'] == 2){ 28 //get the script URL 29 if($conf['basedir']){ 30 $relpath = ''; 31 if($param != 'id') { 32 $relpath = 'lib/exe/'; 33 } 34 $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']); 35 }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){ 36 $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','', 37 $_SERVER['SCRIPT_FILENAME']); 38 $script = '/'.$script; 39 }else{ 40 $script = $_SERVER['SCRIPT_NAME']; 41 } 42 43 //clean script and request (fixes a windows problem) 44 $script = preg_replace('/\/\/+/','/',$script); 45 $request = preg_replace('/\/\/+/','/',$_SERVER['REQUEST_URI']); 46 47 //remove script URL and Querystring to gain the id 48 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 49 $id = preg_replace ('/\?.*/','',$match[1]); 50 } 51 $id = urldecode($id); 52 //strip leading slashes 53 $id = preg_replace('!^/+!','',$id); 54 } 55 56 // Namespace autolinking from URL 57 if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){ 58 if(page_exists($id.$conf['start'])){ 59 // start page inside namespace 60 $id = $id.$conf['start']; 61 }elseif(page_exists($id.noNS(cleanID($id)))){ 62 // page named like the NS inside the NS 63 $id = $id.noNS(cleanID($id)); 64 }elseif(page_exists($id)){ 65 // page like namespace exists 66 $id = substr($id,0,-1); 67 }else{ 68 // fall back to default 69 $id = $id.$conf['start']; 70 } 71 header("Location: ".wl($id,'',true)); 72 } 73 74 if($clean) $id = cleanID($id); 75 if(empty($id) && $param=='id') $id = $conf['start']; 76 77 return $id; 78} 79 80/** 81 * Remove unwanted chars from ID 82 * 83 * Cleans a given ID to only use allowed characters. Accented characters are 84 * converted to unaccented ones 85 * 86 * @author Andreas Gohr <andi@splitbrain.org> 87 * @param string $raw_id The pageid to clean 88 * @param boolean $ascii Force ASCII 89 * @param boolean $media Allow leading or trailing _ for media files 90 */ 91function cleanID($raw_id,$ascii=false,$media=false){ 92 global $conf; 93 global $lang; 94 static $sepcharpat = null; 95 96 global $cache_cleanid; 97 $cache = & $cache_cleanid; 98 99 // check if it's already in the memory cache 100 if (isset($cache[(string)$raw_id])) { 101 return $cache[(string)$raw_id]; 102 } 103 104 $sepchar = $conf['sepchar']; 105 if($sepcharpat == null) // build string only once to save clock cycles 106 $sepcharpat = '#\\'.$sepchar.'+#'; 107 108 $id = trim((string)$raw_id); 109 $id = utf8_strtolower($id); 110 111 //alternative namespace seperator 112 $id = strtr($id,';',':'); 113 if($conf['useslash']){ 114 $id = strtr($id,'/',':'); 115 }else{ 116 $id = strtr($id,'/',$sepchar); 117 } 118 119 if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id); 120 if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1); 121 122 //remove specials 123 $id = utf8_stripspecials($id,$sepchar,'\*'); 124 125 if($ascii) $id = utf8_strip($id); 126 127 //clean up 128 $id = preg_replace($sepcharpat,$sepchar,$id); 129 $id = preg_replace('#:+#',':',$id); 130 $id = ($media ? trim($id,':.-') : trim($id,':._-')); 131 $id = preg_replace('#:[:\._\-]+#',':',$id); 132 133 $cache[(string)$raw_id] = $id; 134 return($id); 135} 136 137/** 138 * Return namespacepart of a wiki ID 139 * 140 * @author Andreas Gohr <andi@splitbrain.org> 141 */ 142function getNS($id){ 143 $pos = strrpos((string)$id,':'); 144 if($pos!==false){ 145 return substr((string)$id,0,$pos); 146 } 147 return false; 148} 149 150/** 151 * Returns the ID without the namespace 152 * 153 * @author Andreas Gohr <andi@splitbrain.org> 154 */ 155function noNS($id) { 156 $pos = strrpos($id, ':'); 157 if ($pos!==false) { 158 return substr($id, $pos+1); 159 } else { 160 return $id; 161 } 162} 163 164/** 165* Returns the current namespace 166* 167* @author Nathan Fritz <fritzn@crown.edu> 168*/ 169function curNS($id) { 170 return noNS(getNS($id)); 171} 172 173/** 174* Returns the ID without the namespace or current namespace for 'start' pages 175* 176* @author Nathan Fritz <fritzn@crown.edu> 177*/ 178function noNSorNS($id) { 179 global $conf; 180 181 $p = noNS($id); 182 if ($p == $conf['start']) { 183 $p = curNS($id); 184 if ($p == false) { 185 return noNS($id); 186 } 187 } 188 return $p; 189} 190 191/** 192 * Wiki page existence check 193 * 194 * parameters as for wikiFN 195 * 196 * @author Chris Smith <chris@jalakai.co.uk> 197 */ 198function page_exists($id,$rev='',$clean=true) { 199 return @file_exists(wikiFN($id,$rev,$clean)); 200} 201 202/** 203 * returns the full path to the datafile specified by ID and optional revision 204 * 205 * The filename is URL encoded to protect Unicode chars 206 * 207 * @param $raw_id string id of wikipage 208 * @param $rev string page revision, empty string for current 209 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 210 * when $id is guaranteed to have been cleaned already. 211 * 212 * @author Andreas Gohr <andi@splitbrain.org> 213 */ 214function wikiFN($raw_id,$rev='',$clean=true){ 215 global $conf; 216 217 global $cache_wikifn; 218 $cache = & $cache_wikifn; 219 220 if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) { 221 return $cache[$raw_id][$rev]; 222 } 223 224 $id = $raw_id; 225 226 if ($clean) $id = cleanID($id); 227 $id = str_replace(':','/',$id); 228 if(empty($rev)){ 229 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 230 }else{ 231 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 232 if($conf['compression']){ 233 //test for extensions here, we want to read both compressions 234 if (@file_exists($fn . '.gz')){ 235 $fn .= '.gz'; 236 }else if(@file_exists($fn . '.bz2')){ 237 $fn .= '.bz2'; 238 }else{ 239 //file doesnt exist yet, so we take the configured extension 240 $fn .= '.' . $conf['compression']; 241 } 242 } 243 } 244 245 if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); } 246 $cache[$raw_id][$rev] = $fn; 247 return $fn; 248} 249 250/** 251 * Returns the full path to the file for locking the page while editing. 252 * 253 * @author Ben Coburn <btcoburn@silicodon.net> 254 */ 255function wikiLockFN($id) { 256 global $conf; 257 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 258} 259 260 261/** 262 * returns the full path to the meta file specified by ID and extension 263 * 264 * The filename is URL encoded to protect Unicode chars 265 * 266 * @author Steven Danz <steven-danz@kc.rr.com> 267 */ 268function metaFN($id,$ext){ 269 global $conf; 270 $id = cleanID($id); 271 $id = str_replace(':','/',$id); 272 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 273 return $fn; 274} 275 276/** 277 * returns an array of full paths to all metafiles of a given ID 278 * 279 * @author Esther Brunner <esther@kaffeehaus.ch> 280 */ 281function metaFiles($id){ 282 $name = noNS($id); 283 $ns = getNS($id); 284 $dir = ($ns) ? metaFN($ns,'').'/' : metaFN($ns,''); 285 $files = array(); 286 287 $dh = @opendir($dir); 288 if(!$dh) return $files; 289 while(($file = readdir($dh)) !== false){ 290 if(strpos($file,$name.'.') === 0 && !is_dir($dir.$file)) 291 $files[] = $dir.$file; 292 } 293 closedir($dh); 294 295 return $files; 296} 297 298/** 299 * returns the full path to the mediafile specified by ID 300 * 301 * The filename is URL encoded to protect Unicode chars 302 * 303 * @author Andreas Gohr <andi@splitbrain.org> 304 */ 305function mediaFN($id){ 306 global $conf; 307 $id = cleanID($id); 308 $id = str_replace(':','/',$id); 309 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 310 return $fn; 311} 312 313/** 314 * Returns the full filepath to a localized textfile if local 315 * version isn't found the english one is returned 316 * 317 * @author Andreas Gohr <andi@splitbrain.org> 318 */ 319function localeFN($id){ 320 global $conf; 321 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt'; 322 if(!@file_exists($file)){ 323 //fall back to english 324 $file = DOKU_INC.'inc/lang/en/'.$id.'.txt'; 325 } 326 return $file; 327} 328 329/** 330 * Resolve relative paths in IDs 331 * 332 * Do not call directly use resolve_mediaid or resolve_pageid 333 * instead 334 * 335 * Partyly based on a cleanPath function found at 336 * http://www.php.net/manual/en/function.realpath.php#57016 337 * 338 * @author <bart at mediawave dot nl> 339 */ 340function resolve_id($ns,$id,$clean=true){ 341 global $conf; 342 343 // some pre cleaning for useslash: 344 if($conf['useslash']) $id = str_replace('/',':',$id); 345 346 // if the id starts with a dot we need to handle the 347 // relative stuff 348 if($id{0} == '.'){ 349 // normalize initial dots without a colon 350 $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id); 351 // prepend the current namespace 352 $id = $ns.':'.$id; 353 354 // cleanup relatives 355 $result = array(); 356 $pathA = explode(':', $id); 357 if (!$pathA[0]) $result[] = ''; 358 foreach ($pathA AS $key => $dir) { 359 if ($dir == '..') { 360 if (end($result) == '..') { 361 $result[] = '..'; 362 } elseif (!array_pop($result)) { 363 $result[] = '..'; 364 } 365 } elseif ($dir && $dir != '.') { 366 $result[] = $dir; 367 } 368 } 369 if (!end($pathA)) $result[] = ''; 370 $id = implode(':', $result); 371 }elseif($ns !== false && strpos($id,':') === false){ 372 //if link contains no namespace. add current namespace (if any) 373 $id = $ns.':'.$id; 374 } 375 376 if($clean) $id = cleanID($id); 377 return $id; 378} 379 380/** 381 * Returns a full media id 382 * 383 * @author Andreas Gohr <andi@splitbrain.org> 384 */ 385function resolve_mediaid($ns,&$page,&$exists){ 386 $page = resolve_id($ns,$page); 387 $file = mediaFN($page); 388 $exists = @file_exists($file); 389} 390 391/** 392 * Returns a full page id 393 * 394 * @author Andreas Gohr <andi@splitbrain.org> 395 */ 396function resolve_pageid($ns,&$page,&$exists){ 397 global $conf; 398 $exists = false; 399 400 //keep hashlink if exists then clean both parts 401 if (strpos($page,'#')) { 402 list($page,$hash) = split('#',$page,2); 403 } else { 404 $hash = ''; 405 } 406 $hash = cleanID($hash); 407 $page = resolve_id($ns,$page,false); // resolve but don't clean, yet 408 409 // get filename (calls clean itself) 410 $file = wikiFN($page); 411 412 // if ends with colon or slash we have a namespace link 413 if(substr($page,-1) == ':' || ($conf['useslash'] && substr($page,-1) == '/')){ 414 if(page_exists($page.$conf['start'])){ 415 // start page inside namespace 416 $page = $page.$conf['start']; 417 $exists = true; 418 }elseif(page_exists($page.noNS(cleanID($page)))){ 419 // page named like the NS inside the NS 420 $page = $page.noNS(cleanID($page)); 421 $exists = true; 422 }elseif(page_exists($page)){ 423 // page like namespace exists 424 $page = $page; 425 $exists = true; 426 }else{ 427 // fall back to default 428 $page = $page.$conf['start']; 429 } 430 }else{ 431 //check alternative plural/nonplural form 432 if(!@file_exists($file)){ 433 if( $conf['autoplural'] ){ 434 if(substr($page,-1) == 's'){ 435 $try = substr($page,0,-1); 436 }else{ 437 $try = $page.'s'; 438 } 439 if(page_exists($try)){ 440 $page = $try; 441 $exists = true; 442 } 443 } 444 }else{ 445 $exists = true; 446 } 447 } 448 449 // now make sure we have a clean page 450 $page = cleanID($page); 451 452 //add hash if any 453 if(!empty($hash)) $page .= '#'.$hash; 454} 455 456/** 457 * Returns the name of a cachefile from given data 458 * 459 * The needed directory is created by this function! 460 * 461 * @author Andreas Gohr <andi@splitbrain.org> 462 * 463 * @param string $data This data is used to create a unique md5 name 464 * @param string $ext This is appended to the filename if given 465 * @return string The filename of the cachefile 466 */ 467function getCacheName($data,$ext=''){ 468 global $conf; 469 $md5 = md5($data); 470 $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext; 471 io_makeFileDir($file); 472 return $file; 473} 474 475/** 476 * Checks a pageid against $conf['hidepages'] 477 * 478 * @author Andreas Gohr <gohr@cosmocode.de> 479 */ 480function isHiddenPage($id){ 481 global $conf; 482 if(empty($conf['hidepages'])) return false; 483 484 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){ 485 return true; 486 } 487 return false; 488} 489 490/** 491 * Reverse of isHiddenPage 492 * 493 * @author Andreas Gohr <gohr@cosmocode.de> 494 */ 495function isVisiblePage($id){ 496 return !isHiddenPage($id); 497} 498 499/** 500 * Checks and sets HTTP headers for conditional HTTP requests 501 * 502 * @author Simon Willison <swillison@gmail.com> 503 * @link http://simon.incutio.com/archive/2003/04/23/conditionalGet 504 * @param timestamp $timestamp lastmodified time of the cache file 505 * @returns void or void with previously header() commands executed 506 */ 507function http_conditionalRequest($timestamp){ 508 // A PHP implementation of conditional get, see 509 // http://fishbowl.pastiche.org/archives/001132.html 510 $last_modified = substr(gmdate('r', $timestamp), 0, -5).'GMT'; 511 $etag = '"'.md5($last_modified).'"'; 512 // Send the headers 513 header("Last-Modified: $last_modified"); 514 header("ETag: $etag"); 515 // See if the client has provided the required headers 516 if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])){ 517 $if_modified_since = stripslashes($_SERVER['HTTP_IF_MODIFIED_SINCE']); 518 }else{ 519 $if_modified_since = false; 520 } 521 522 if (isset($_SERVER['HTTP_IF_NONE_MATCH'])){ 523 $if_none_match = stripslashes($_SERVER['HTTP_IF_NONE_MATCH']); 524 }else{ 525 $if_none_match = false; 526 } 527 528 if (!$if_modified_since && !$if_none_match){ 529 return; 530 } 531 532 // At least one of the headers is there - check them 533 if ($if_none_match && $if_none_match != $etag) { 534 return; // etag is there but doesn't match 535 } 536 537 if ($if_modified_since && $if_modified_since != $last_modified) { 538 return; // if-modified-since is there but doesn't match 539 } 540 541 // Nothing has changed since their last request - serve a 304 and exit 542 header('HTTP/1.0 304 Not Modified'); 543 544 // don't produce output, even if compression is on 545 ob_end_clean(); 546 exit; 547} 548 549//Setup VIM: ex: et ts=2 enc=utf-8 : 550