1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10/** 11 * Fetch the an ID from request 12 * 13 * Uses either standard $_REQUEST variable or extracts it from 14 * the full request URI when userewrite is set to 2 15 * 16 * For $param='id' $conf['start'] is returned if no id was found. 17 * If the second parameter is true (default) the ID is cleaned. 18 * 19 * @author Andreas Gohr <andi@splitbrain.org> 20 */ 21function getID($param='id',$clean=true){ 22 global $conf; 23 24 $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null; 25 26 //construct page id from request URI 27 if(empty($id) && $conf['userewrite'] == 2){ 28 //get the script URL 29 if($conf['basedir']){ 30 $relpath = ''; 31 if($param != 'id') { 32 $relpath = 'lib/exe/'; 33 } 34 $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']); 35 }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){ 36 $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','', 37 $_SERVER['SCRIPT_FILENAME']); 38 $script = '/'.$script; 39 }else{ 40 $script = $_SERVER['SCRIPT_NAME']; 41 } 42 43 //clean script and request (fixes a windows problem) 44 $script = preg_replace('/\/\/+/','/',$script); 45 $request = preg_replace('/\/\/+/','/',$_SERVER['REQUEST_URI']); 46 47 //remove script URL and Querystring to gain the id 48 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 49 $id = preg_replace ('/\?.*/','',$match[1]); 50 } 51 $id = urldecode($id); 52 //strip leading slashes 53 $id = preg_replace('!^/+!','',$id); 54 } 55 if($clean) $id = cleanID($id); 56 if(empty($id) && $param=='id') $id = $conf['start']; 57 58 return $id; 59} 60 61// function result cache for cleanID 62global $cache_cleanid; 63$cache_cleanid = array(); 64 65/** 66 * Remove unwanted chars from ID 67 * 68 * Cleans a given ID to only use allowed characters. Accented characters are 69 * converted to unaccented ones 70 * 71 * @author Andreas Gohr <andi@splitbrain.org> 72 * @param string $raw_id The pageid to clean 73 * @param boolean $ascii Force ASCII 74 */ 75function cleanID($raw_id,$ascii=false){ 76 global $conf; 77 global $lang; 78 static $sepcharpat = null; 79 80 global $cache_cleanid; 81 $cache = & $cache_cleanid; 82 83 // check if it's already in the memory cache 84 if (isset($cache[$raw_id])) { 85 return $cache[$raw_id]; 86 } 87 88 $sepchar = $conf['sepchar']; 89 if($sepcharpat == null) // build string only once to save clock cycles 90 $sepcharpat = '#\\'.$sepchar.'+#'; 91 92 $id = trim($raw_id); 93 $id = utf8_strtolower($id); 94 95 //alternative namespace seperator 96 $id = strtr($id,';',':'); 97 if($conf['useslash']){ 98 $id = strtr($id,'/',':'); 99 }else{ 100 $id = strtr($id,'/',$sepchar); 101 } 102 103 if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id); 104 if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1); 105 106 //remove specials 107 $id = utf8_stripspecials($id,$sepchar,'\*'); 108 109 if($ascii) $id = utf8_strip($id); 110 111 //clean up 112 $id = preg_replace($sepcharpat,$sepchar,$id); 113 $id = preg_replace('#:+#',':',$id); 114 $id = trim($id,':._-'); 115 $id = preg_replace('#:[:\._\-]+#',':',$id); 116 117 $cache[$raw_id] = $id; 118 return($id); 119} 120 121/** 122 * Return namespacepart of a wiki ID 123 * 124 * @author Andreas Gohr <andi@splitbrain.org> 125 */ 126function getNS($id){ 127 $pos = strrpos($id,':'); 128 if($pos!==false){ 129 return substr($id,0,$pos); 130 } 131 return false; 132} 133 134/** 135 * Returns the ID without the namespace 136 * 137 * @author Andreas Gohr <andi@splitbrain.org> 138 */ 139function noNS($id) { 140 $pos = strrpos($id, ':'); 141 if ($pos!==false) { 142 return substr($id, $pos+1); 143 } else { 144 return $id; 145 } 146} 147 148// function result cache for wikiFN 149global $cache_wikifn; 150$cache_wikifn = array(); 151 152/** 153 * returns the full path to the datafile specified by ID and 154 * optional revision 155 * 156 * The filename is URL encoded to protect Unicode chars 157 * 158 * @author Andreas Gohr <andi@splitbrain.org> 159 */ 160function wikiFN($raw_id,$rev='',$clean=true){ 161 global $conf; 162 163 global $cache_wikifn; 164 $cache = & $cache_wikifn; 165 166 if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) { 167 return $cache[$raw_id][$rev]; 168 } 169 170 $id = $raw_id; 171 172 if ($clean) $id = cleanID($id); 173 $id = str_replace(':','/',$id); 174 if(empty($rev)){ 175 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 176 }else{ 177 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 178 if($conf['compression']){ 179 //test for extensions here, we want to read both compressions 180 if (file_exists($fn . '.gz')){ 181 $fn .= '.gz'; 182 }else if(file_exists($fn . '.bz2')){ 183 $fn .= '.bz2'; 184 }else{ 185 //file doesnt exist yet, so we take the configured extension 186 $fn .= '.' . $conf['compression']; 187 } 188 } 189 } 190 191 $cache[$raw_id][$rev] = $fn; 192 return $fn; 193} 194 195/** 196 * Returns the full path to the file for locking the page while editing. 197 * 198 * @author Ben Coburn <btcoburn@silicodon.net> 199 */ 200function wikiLockFN($id) { 201 global $conf; 202 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 203} 204 205 206/** 207 * returns the full path to the meta file specified by ID and extension 208 * 209 * The filename is URL encoded to protect Unicode chars 210 * 211 * @author Steven Danz <steven-danz@kc.rr.com> 212 */ 213function metaFN($id,$ext){ 214 global $conf; 215 $id = cleanID($id); 216 $id = str_replace(':','/',$id); 217 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 218 return $fn; 219} 220 221/** 222 * returns an array of full paths to all metafiles of a given ID 223 * 224 * @author Esther Brunner <esther@kaffeehaus.ch> 225 */ 226function metaFiles($id){ 227 $name = noNS($id); 228 $dir = metaFN(getNS($id),''); 229 $files = array(); 230 231 $dh = @opendir($dir); 232 if(!$dh) return $files; 233 while(($file = readdir($dh)) !== false){ 234 if(strpos($file,$name.'.') === 0 && !is_dir($dir.$file)) 235 $files[] = $dir.$file; 236 } 237 closedir($dh); 238 239 return $files; 240} 241 242/** 243 * returns the full path to the mediafile specified by ID 244 * 245 * The filename is URL encoded to protect Unicode chars 246 * 247 * @author Andreas Gohr <andi@splitbrain.org> 248 */ 249function mediaFN($id){ 250 global $conf; 251 $id = cleanID($id); 252 $id = str_replace(':','/',$id); 253 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 254 return $fn; 255} 256 257/** 258 * Returns the full filepath to a localized textfile if local 259 * version isn't found the english one is returned 260 * 261 * @author Andreas Gohr <andi@splitbrain.org> 262 */ 263function localeFN($id){ 264 global $conf; 265 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt'; 266 if(!@file_exists($file)){ 267 //fall back to english 268 $file = DOKU_INC.'inc/lang/en/'.$id.'.txt'; 269 } 270 return $file; 271} 272 273/** 274 * Resolve relative paths in IDs 275 * 276 * Do not call directly use resolve_mediaid or resolve_pageid 277 * instead 278 * 279 * Partyly based on a cleanPath function found at 280 * http://www.php.net/manual/en/function.realpath.php#57016 281 * 282 * @author <bart at mediawave dot nl> 283 */ 284function resolve_id($ns,$id,$clean=true){ 285 // if the id starts with a dot we need to handle the 286 // relative stuff 287 if($id{0} == '.'){ 288 // normalize initial dots without a colon 289 $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id); 290 // prepend the current namespace 291 $id = $ns.':'.$id; 292 293 // cleanup relatives 294 $result = array(); 295 $pathA = explode(':', $id); 296 if (!$pathA[0]) $result[] = ''; 297 foreach ($pathA AS $key => $dir) { 298 if ($dir == '..') { 299 if (end($result) == '..') { 300 $result[] = '..'; 301 } elseif (!array_pop($result)) { 302 $result[] = '..'; 303 } 304 } elseif ($dir && $dir != '.') { 305 $result[] = $dir; 306 } 307 } 308 if (!end($pathA)) $result[] = ''; 309 $id = implode(':', $result); 310 }elseif($ns !== false && strpos($id,':') === false){ 311 //if link contains no namespace. add current namespace (if any) 312 $id = $ns.':'.$id; 313 } 314 315 if($clean) $id = cleanID($id); 316 return $id; 317} 318 319/** 320 * Returns a full media id 321 * 322 * @author Andreas Gohr <andi@splitbrain.org> 323 */ 324function resolve_mediaid($ns,&$page,&$exists){ 325 $page = resolve_id($ns,$page); 326 $file = mediaFN($page); 327 $exists = @file_exists($file); 328} 329 330/** 331 * Returns a full page id 332 * 333 * @author Andreas Gohr <andi@splitbrain.org> 334 */ 335function resolve_pageid($ns,&$page,&$exists){ 336 global $conf; 337 $exists = false; 338 339 //keep hashlink if exists then clean both parts 340 if (strpos($page,'#')) { 341 list($page,$hash) = split('#',$page,2); 342 } else { 343 $hash = ''; 344 } 345 $hash = cleanID($hash); 346 $page = resolve_id($ns,$page,false); // resolve but don't clean, yet 347 348 // get filename (calls clean itself) 349 $file = wikiFN($page); 350 351 // if ends with colon we have a namespace link 352 if(substr($page,-1) == ':'){ 353 if(@file_exists(wikiFN($page.$conf['start']))){ 354 // start page inside namespace 355 $page = $page.$conf['start']; 356 $exists = true; 357 }elseif(@file_exists(wikiFN($page.noNS(cleanID($page))))){ 358 // page named like the NS inside the NS 359 $page = $page.noNS(cleanID($page)); 360 $exists = true; 361 }elseif(@file_exists(wikiFN($page))){ 362 // page like namespace exists 363 $page = $page; 364 $exists = true; 365 }else{ 366 // fall back to default 367 $page = $page.$conf['start']; 368 } 369 }else{ 370 //check alternative plural/nonplural form 371 if(!@file_exists($file)){ 372 if( $conf['autoplural'] ){ 373 if(substr($page,-1) == 's'){ 374 $try = substr($page,0,-1); 375 }else{ 376 $try = $page.'s'; 377 } 378 if(@file_exists(wikiFN($try))){ 379 $page = $try; 380 $exists = true; 381 } 382 } 383 }else{ 384 $exists = true; 385 } 386 } 387 388 // now make sure we have a clean page 389 $page = cleanID($page); 390 391 //add hash if any 392 if(!empty($hash)) $page .= '#'.$hash; 393} 394 395/** 396 * Returns the name of a cachefile from given data 397 * 398 * The needed directory is created by this function! 399 * 400 * @author Andreas Gohr <andi@splitbrain.org> 401 * 402 * @param string $data This data is used to create a unique md5 name 403 * @param string $ext This is appended to the filename if given 404 * @return string The filename of the cachefile 405 */ 406function getCacheName($data,$ext=''){ 407 global $conf; 408 $md5 = md5($data); 409 $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext; 410 io_makeFileDir($file); 411 return $file; 412} 413 414/** 415 * Checks a pageid against $conf['hidepages'] 416 * 417 * @author Andreas Gohr <gohr@cosmocode.de> 418 */ 419function isHiddenPage($id){ 420 global $conf; 421 if(empty($conf['hidepages'])) return false; 422 423 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){ 424 return true; 425 } 426 return false; 427} 428 429/** 430 * Reverse of isHiddenPage 431 * 432 * @author Andreas Gohr <gohr@cosmocode.de> 433 */ 434function isVisiblePage($id){ 435 return !isHiddenPage($id); 436} 437 438/** 439 * Checks and sets HTTP headers for conditional HTTP requests 440 * 441 * @author Simon Willison <swillison@gmail.com> 442 * @link http://simon.incutio.com/archive/2003/04/23/conditionalGet 443 * @param timestamp $timestamp lastmodified time of the cache file 444 * @returns void or void with previously header() commands executed 445 */ 446function http_conditionalRequest($timestamp){ 447 // A PHP implementation of conditional get, see 448 // http://fishbowl.pastiche.org/archives/001132.html 449 $last_modified = substr(date('r', $timestamp), 0, -5).'GMT'; 450 $etag = '"'.md5($last_modified).'"'; 451 // Send the headers 452 header("Last-Modified: $last_modified"); 453 header("ETag: $etag"); 454 // See if the client has provided the required headers 455 if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])){ 456 $if_modified_since = stripslashes($_SERVER['HTTP_IF_MODIFIED_SINCE']); 457 }else{ 458 $if_modified_since = false; 459 } 460 461 if (isset($_SERVER['HTTP_IF_NONE_MATCH'])){ 462 $if_none_match = stripslashes($_SERVER['HTTP_IF_NONE_MATCH']); 463 }else{ 464 $if_none_match = false; 465 } 466 467 if (!$if_modified_since && !$if_none_match){ 468 return; 469 } 470 471 // At least one of the headers is there - check them 472 if ($if_none_match && $if_none_match != $etag) { 473 return; // etag is there but doesn't match 474 } 475 476 if ($if_modified_since && $if_modified_since != $last_modified) { 477 return; // if-modified-since is there but doesn't match 478 } 479 480 // Nothing has changed since their last request - serve a 304 and exit 481 header('HTTP/1.0 304 Not Modified'); 482 exit; 483} 484 485//Setup VIM: ex: et ts=2 enc=utf-8 : 486