1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10/** 11 * Fetch the an ID from request 12 * 13 * Uses either standard $_REQUEST variable or extracts it from 14 * the full request URI when userewrite is set to 2 15 * 16 * For $param='id' $conf['start'] is returned if no id was found. 17 * If the second parameter is true (default) the ID is cleaned. 18 * 19 * @author Andreas Gohr <andi@splitbrain.org> 20 */ 21function getID($param='id',$clean=true){ 22 global $conf; 23 24 $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null; 25 26 //construct page id from request URI 27 if(empty($id) && $conf['userewrite'] == 2){ 28 //get the script URL 29 if($conf['basedir']){ 30 $relpath = ''; 31 if($param != 'id') { 32 $relpath = 'lib/exe/'; 33 } 34 $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']); 35 }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){ 36 $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','', 37 $_SERVER['SCRIPT_FILENAME']); 38 $script = '/'.$script; 39 }else{ 40 $script = $_SERVER['SCRIPT_NAME']; 41 } 42 43 //clean script and request (fixes a windows problem) 44 $script = preg_replace('/\/\/+/','/',$script); 45 $request = preg_replace('/\/\/+/','/',$_SERVER['REQUEST_URI']); 46 47 //remove script URL and Querystring to gain the id 48 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 49 $id = preg_replace ('/\?.*/','',$match[1]); 50 } 51 $id = urldecode($id); 52 //strip leading slashes 53 $id = preg_replace('!^/+!','',$id); 54 } 55 56 // Namespace autolinking from URL 57 if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){ 58 if(page_exists($id.$conf['start'])){ 59 // start page inside namespace 60 $id = $id.$conf['start']; 61 }elseif(page_exists($id.noNS(cleanID($id)))){ 62 // page named like the NS inside the NS 63 $id = $id.noNS(cleanID($id)); 64 }elseif(page_exists($id)){ 65 // page like namespace exists 66 $id = substr($id,0,-1); 67 }else{ 68 // fall back to default 69 $id = $id.$conf['start']; 70 } 71 header("Location: ".wl($id,'',true)); 72 } 73 74 if($clean) $id = cleanID($id); 75 if(empty($id) && $param=='id') $id = $conf['start']; 76 77 return $id; 78} 79 80/** 81 * Remove unwanted chars from ID 82 * 83 * Cleans a given ID to only use allowed characters. Accented characters are 84 * converted to unaccented ones 85 * 86 * @author Andreas Gohr <andi@splitbrain.org> 87 * @param string $raw_id The pageid to clean 88 * @param boolean $ascii Force ASCII 89 * @param boolean $media Allow leading or trailing _ for media files 90 */ 91function cleanID($raw_id,$ascii=false,$media=false){ 92 global $conf; 93 global $lang; 94 static $sepcharpat = null; 95 96 global $cache_cleanid; 97 $cache = & $cache_cleanid; 98 99 // check if it's already in the memory cache 100 if (isset($cache[(string)$raw_id])) { 101 return $cache[(string)$raw_id]; 102 } 103 104 $sepchar = $conf['sepchar']; 105 if($sepcharpat == null) // build string only once to save clock cycles 106 $sepcharpat = '#\\'.$sepchar.'+#'; 107 108 $id = trim((string)$raw_id); 109 $id = utf8_strtolower($id); 110 111 //alternative namespace seperator 112 $id = strtr($id,';',':'); 113 if($conf['useslash']){ 114 $id = strtr($id,'/',':'); 115 }else{ 116 $id = strtr($id,'/',$sepchar); 117 } 118 119 if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id); 120 if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1); 121 122 //remove specials 123 $id = utf8_stripspecials($id,$sepchar,'\*'); 124 125 if($ascii) $id = utf8_strip($id); 126 127 //clean up 128 $id = preg_replace($sepcharpat,$sepchar,$id); 129 $id = preg_replace('#:+#',':',$id); 130 $id = ($media ? trim($id,':.-') : trim($id,':._-')); 131 $id = preg_replace('#:[:\._\-]+#',':',$id); 132 133 $cache[(string)$raw_id] = $id; 134 return($id); 135} 136 137/** 138 * Return namespacepart of a wiki ID 139 * 140 * @author Andreas Gohr <andi@splitbrain.org> 141 */ 142function getNS($id){ 143 $pos = strrpos((string)$id,':'); 144 if($pos!==false){ 145 return substr((string)$id,0,$pos); 146 } 147 return false; 148} 149 150/** 151 * Returns the ID without the namespace 152 * 153 * @author Andreas Gohr <andi@splitbrain.org> 154 */ 155function noNS($id) { 156 $pos = strrpos($id, ':'); 157 if ($pos!==false) { 158 return substr($id, $pos+1); 159 } else { 160 return $id; 161 } 162} 163 164/** 165 * Returns the current namespace 166 * 167 * @author Nathan Fritz <fritzn@crown.edu> 168 */ 169function curNS($id) { 170 return noNS(getNS($id)); 171} 172 173/** 174 * Returns the ID without the namespace or current namespace for 'start' pages 175 * 176 * @author Nathan Fritz <fritzn@crown.edu> 177 */ 178function noNSorNS($id) { 179 global $conf; 180 181 $p = noNS($id); 182 if ($p == $conf['start']) { 183 $p = curNS($id); 184 if ($p == false) { 185 return noNS($id); 186 } 187 } 188 return $p; 189} 190 191/** 192 * Creates a XHTML valid linkid from a given headline title 193 * 194 * @param string $title The headline title 195 * @param array $check List of existing IDs 196 * @author Andreas Gohr <andi@splitbrain.org> 197 */ 198function sectionID($title,&$check) { 199 $title = str_replace(':','',cleanID($title)); 200 $new = ltrim($title,'0123456789._-'); 201 if(empty($new)){ 202 $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline 203 }else{ 204 $title = $new; 205 } 206 207 if(is_array($check)){ 208 // make sure tiles are unique 209 $num = ''; 210 while(in_array($title.$num,$check)){ 211 ($num) ? $num++ : $num = 1; 212 } 213 $title = $title.$num; 214 $check[] = $title; 215 } 216 217 return $title; 218} 219 220 221/** 222 * Wiki page existence check 223 * 224 * parameters as for wikiFN 225 * 226 * @author Chris Smith <chris@jalakai.co.uk> 227 */ 228function page_exists($id,$rev='',$clean=true) { 229 return @file_exists(wikiFN($id,$rev,$clean)); 230} 231 232/** 233 * returns the full path to the datafile specified by ID and optional revision 234 * 235 * The filename is URL encoded to protect Unicode chars 236 * 237 * @param $raw_id string id of wikipage 238 * @param $rev string page revision, empty string for current 239 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 240 * when $id is guaranteed to have been cleaned already. 241 * 242 * @author Andreas Gohr <andi@splitbrain.org> 243 */ 244function wikiFN($raw_id,$rev='',$clean=true){ 245 global $conf; 246 247 global $cache_wikifn; 248 $cache = & $cache_wikifn; 249 250 if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) { 251 return $cache[$raw_id][$rev]; 252 } 253 254 $id = $raw_id; 255 256 if ($clean) $id = cleanID($id); 257 $id = str_replace(':','/',$id); 258 if(empty($rev)){ 259 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 260 }else{ 261 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 262 if($conf['compression']){ 263 //test for extensions here, we want to read both compressions 264 if (@file_exists($fn . '.gz')){ 265 $fn .= '.gz'; 266 }else if(@file_exists($fn . '.bz2')){ 267 $fn .= '.bz2'; 268 }else{ 269 //file doesnt exist yet, so we take the configured extension 270 $fn .= '.' . $conf['compression']; 271 } 272 } 273 } 274 275 if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); } 276 $cache[$raw_id][$rev] = $fn; 277 return $fn; 278} 279 280/** 281 * Returns the full path to the file for locking the page while editing. 282 * 283 * @author Ben Coburn <btcoburn@silicodon.net> 284 */ 285function wikiLockFN($id) { 286 global $conf; 287 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 288} 289 290 291/** 292 * returns the full path to the meta file specified by ID and extension 293 * 294 * The filename is URL encoded to protect Unicode chars 295 * 296 * @author Steven Danz <steven-danz@kc.rr.com> 297 */ 298function metaFN($id,$ext){ 299 global $conf; 300 $id = cleanID($id); 301 $id = str_replace(':','/',$id); 302 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 303 return $fn; 304} 305 306/** 307 * returns an array of full paths to all metafiles of a given ID 308 * 309 * @author Esther Brunner <esther@kaffeehaus.ch> 310 */ 311function metaFiles($id){ 312 $name = noNS($id); 313 $ns = getNS($id); 314 $dir = ($ns) ? metaFN($ns,'').'/' : metaFN($ns,''); 315 $files = array(); 316 317 $dh = @opendir($dir); 318 if(!$dh) return $files; 319 while(($file = readdir($dh)) !== false){ 320 if(strpos($file,$name.'.') === 0 && !is_dir($dir.$file)) 321 $files[] = $dir.$file; 322 } 323 closedir($dh); 324 325 return $files; 326} 327 328/** 329 * returns the full path to the mediafile specified by ID 330 * 331 * The filename is URL encoded to protect Unicode chars 332 * 333 * @author Andreas Gohr <andi@splitbrain.org> 334 */ 335function mediaFN($id){ 336 global $conf; 337 $id = cleanID($id); 338 $id = str_replace(':','/',$id); 339 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 340 return $fn; 341} 342 343/** 344 * Returns the full filepath to a localized textfile if local 345 * version isn't found the english one is returned 346 * 347 * @author Andreas Gohr <andi@splitbrain.org> 348 */ 349function localeFN($id){ 350 global $conf; 351 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt'; 352 if(!@file_exists($file)){ 353 //fall back to english 354 $file = DOKU_INC.'inc/lang/en/'.$id.'.txt'; 355 } 356 return $file; 357} 358 359/** 360 * Resolve relative paths in IDs 361 * 362 * Do not call directly use resolve_mediaid or resolve_pageid 363 * instead 364 * 365 * Partyly based on a cleanPath function found at 366 * http://www.php.net/manual/en/function.realpath.php#57016 367 * 368 * @author <bart at mediawave dot nl> 369 */ 370function resolve_id($ns,$id,$clean=true){ 371 global $conf; 372 373 // some pre cleaning for useslash: 374 if($conf['useslash']) $id = str_replace('/',':',$id); 375 376 // if the id starts with a dot we need to handle the 377 // relative stuff 378 if($id{0} == '.'){ 379 // normalize initial dots without a colon 380 $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id); 381 // prepend the current namespace 382 $id = $ns.':'.$id; 383 384 // cleanup relatives 385 $result = array(); 386 $pathA = explode(':', $id); 387 if (!$pathA[0]) $result[] = ''; 388 foreach ($pathA AS $key => $dir) { 389 if ($dir == '..') { 390 if (end($result) == '..') { 391 $result[] = '..'; 392 } elseif (!array_pop($result)) { 393 $result[] = '..'; 394 } 395 } elseif ($dir && $dir != '.') { 396 $result[] = $dir; 397 } 398 } 399 if (!end($pathA)) $result[] = ''; 400 $id = implode(':', $result); 401 }elseif($ns !== false && strpos($id,':') === false){ 402 //if link contains no namespace. add current namespace (if any) 403 $id = $ns.':'.$id; 404 } 405 406 if($clean) $id = cleanID($id); 407 return $id; 408} 409 410/** 411 * Returns a full media id 412 * 413 * @author Andreas Gohr <andi@splitbrain.org> 414 */ 415function resolve_mediaid($ns,&$page,&$exists){ 416 $page = resolve_id($ns,$page); 417 $file = mediaFN($page); 418 $exists = @file_exists($file); 419} 420 421/** 422 * Returns a full page id 423 * 424 * @author Andreas Gohr <andi@splitbrain.org> 425 */ 426function resolve_pageid($ns,&$page,&$exists){ 427 global $conf; 428 $exists = false; 429 430 //keep hashlink if exists then clean both parts 431 if (strpos($page,'#')) { 432 list($page,$hash) = split('#',$page,2); 433 } else { 434 $hash = ''; 435 } 436 $hash = cleanID($hash); 437 $page = resolve_id($ns,$page,false); // resolve but don't clean, yet 438 439 // get filename (calls clean itself) 440 $file = wikiFN($page); 441 442 // if ends with colon or slash we have a namespace link 443 if(substr($page,-1) == ':' || ($conf['useslash'] && substr($page,-1) == '/')){ 444 if(page_exists($page.$conf['start'])){ 445 // start page inside namespace 446 $page = $page.$conf['start']; 447 $exists = true; 448 }elseif(page_exists($page.noNS(cleanID($page)))){ 449 // page named like the NS inside the NS 450 $page = $page.noNS(cleanID($page)); 451 $exists = true; 452 }elseif(page_exists($page)){ 453 // page like namespace exists 454 $page = $page; 455 $exists = true; 456 }else{ 457 // fall back to default 458 $page = $page.$conf['start']; 459 } 460 }else{ 461 //check alternative plural/nonplural form 462 if(!@file_exists($file)){ 463 if( $conf['autoplural'] ){ 464 if(substr($page,-1) == 's'){ 465 $try = substr($page,0,-1); 466 }else{ 467 $try = $page.'s'; 468 } 469 if(page_exists($try)){ 470 $page = $try; 471 $exists = true; 472 } 473 } 474 }else{ 475 $exists = true; 476 } 477 } 478 479 // now make sure we have a clean page 480 $page = cleanID($page); 481 482 //add hash if any 483 if(!empty($hash)) $page .= '#'.$hash; 484} 485 486/** 487 * Returns the name of a cachefile from given data 488 * 489 * The needed directory is created by this function! 490 * 491 * @author Andreas Gohr <andi@splitbrain.org> 492 * 493 * @param string $data This data is used to create a unique md5 name 494 * @param string $ext This is appended to the filename if given 495 * @return string The filename of the cachefile 496 */ 497function getCacheName($data,$ext=''){ 498 global $conf; 499 $md5 = md5($data); 500 $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext; 501 io_makeFileDir($file); 502 return $file; 503} 504 505/** 506 * Checks a pageid against $conf['hidepages'] 507 * 508 * @author Andreas Gohr <gohr@cosmocode.de> 509 */ 510function isHiddenPage($id){ 511 global $conf; 512 global $ACT; 513 if(empty($conf['hidepages'])) return false; 514 if($ACT == 'admin') return false; 515 516 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){ 517 return true; 518 } 519 return false; 520} 521 522/** 523 * Reverse of isHiddenPage 524 * 525 * @author Andreas Gohr <gohr@cosmocode.de> 526 */ 527function isVisiblePage($id){ 528 return !isHiddenPage($id); 529} 530 531/** 532 * Checks and sets HTTP headers for conditional HTTP requests 533 * 534 * @author Simon Willison <swillison@gmail.com> 535 * @link http://simon.incutio.com/archive/2003/04/23/conditionalGet 536 * @param timestamp $timestamp lastmodified time of the cache file 537 * @returns void or void with previously header() commands executed 538 */ 539function http_conditionalRequest($timestamp){ 540 // A PHP implementation of conditional get, see 541 // http://fishbowl.pastiche.org/archives/001132.html 542 $last_modified = substr(gmdate('r', $timestamp), 0, -5).'GMT'; 543 $etag = '"'.md5($last_modified).'"'; 544 // Send the headers 545 header("Last-Modified: $last_modified"); 546 header("ETag: $etag"); 547 // See if the client has provided the required headers 548 if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])){ 549 $if_modified_since = stripslashes($_SERVER['HTTP_IF_MODIFIED_SINCE']); 550 }else{ 551 $if_modified_since = false; 552 } 553 554 if (isset($_SERVER['HTTP_IF_NONE_MATCH'])){ 555 $if_none_match = stripslashes($_SERVER['HTTP_IF_NONE_MATCH']); 556 }else{ 557 $if_none_match = false; 558 } 559 560 if (!$if_modified_since && !$if_none_match){ 561 return; 562 } 563 564 // At least one of the headers is there - check them 565 if ($if_none_match && $if_none_match != $etag) { 566 return; // etag is there but doesn't match 567 } 568 569 if ($if_modified_since && $if_modified_since != $last_modified) { 570 return; // if-modified-since is there but doesn't match 571 } 572 573 // Nothing has changed since their last request - serve a 304 and exit 574 header('HTTP/1.0 304 Not Modified'); 575 576 // don't produce output, even if compression is on 577 ob_end_clean(); 578 exit; 579} 580 581function http_sendfile($file) { 582 global $conf; 583 584 //use x-sendfile header to pass the delivery to compatible webservers 585 if($conf['xsendfile'] == 1){ 586 header("X-LIGHTTPD-send-file: $file"); 587 return true; 588 }elseif($conf['xsendfile'] == 2){ 589 header("X-Sendfile: $file"); 590 return true; 591 }elseif($conf['xsendfile'] == 3){ 592 header("X-Accel-Redirect: $file"); 593 return true; 594 } 595 596 return false; 597} 598 599//Setup VIM: ex: et ts=2 enc=utf-8 : 600