1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10/** 11 * Fetch the an ID from request 12 * 13 * Uses either standard $_REQUEST variable or extracts it from 14 * the full request URI when userewrite is set to 2 15 * 16 * For $param='id' $conf['start'] is returned if no id was found. 17 * If the second parameter is true (default) the ID is cleaned. 18 * 19 * @author Andreas Gohr <andi@splitbrain.org> 20 */ 21function getID($param='id',$clean=true){ 22 global $conf; 23 24 $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null; 25 26 //construct page id from request URI 27 if(empty($id) && $conf['userewrite'] == 2){ 28 //get the script URL 29 if($conf['basedir']){ 30 $relpath = ''; 31 if($param != 'id') { 32 $relpath = 'lib/exe/'; 33 } 34 $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']); 35 }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){ 36 $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','', 37 $_SERVER['SCRIPT_FILENAME']); 38 $script = '/'.$script; 39 }else{ 40 $script = $_SERVER['SCRIPT_NAME']; 41 } 42 43 //clean script and request (fixes a windows problem) 44 $script = preg_replace('/\/\/+/','/',$script); 45 $request = preg_replace('/\/\/+/','/',$_SERVER['REQUEST_URI']); 46 47 //remove script URL and Querystring to gain the id 48 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 49 $id = preg_replace ('/\?.*/','',$match[1]); 50 } 51 $id = urldecode($id); 52 //strip leading slashes 53 $id = preg_replace('!^/+!','',$id); 54 } 55 if($clean) $id = cleanID($id); 56 if(empty($id) && $param=='id') $id = $conf['start']; 57 58 return $id; 59} 60 61// function result cache for cleanID 62global $cache_cleanid; 63$cache_cleanid = array(); 64 65/** 66 * Remove unwanted chars from ID 67 * 68 * Cleans a given ID to only use allowed characters. Accented characters are 69 * converted to unaccented ones 70 * 71 * @author Andreas Gohr <andi@splitbrain.org> 72 * @param string $raw_id The pageid to clean 73 * @param boolean $ascii Force ASCII 74 */ 75function cleanID($raw_id,$ascii=false){ 76 global $conf; 77 global $lang; 78 static $sepcharpat = null; 79 80 global $cache_cleanid; 81 $cache = & $cache_cleanid; 82 83 // check if it's already in the memory cache 84 if (isset($cache[$raw_id])) { 85 return $cache[$raw_id]; 86 } 87 88 $sepchar = $conf['sepchar']; 89 if($sepcharpat == null) // build string only once to save clock cycles 90 $sepcharpat = '#\\'.$sepchar.'+#'; 91 92 $id = trim($raw_id); 93 $id = utf8_strtolower($id); 94 95 //alternative namespace seperator 96 $id = strtr($id,';',':'); 97 if($conf['useslash']){ 98 $id = strtr($id,'/',':'); 99 }else{ 100 $id = strtr($id,'/',$sepchar); 101 } 102 103 if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id); 104 if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1); 105 106 //remove specials 107 $id = utf8_stripspecials($id,$sepchar,'\*'); 108 109 if($ascii) $id = utf8_strip($id); 110 111 //clean up 112 $id = preg_replace($sepcharpat,$sepchar,$id); 113 $id = preg_replace('#:+#',':',$id); 114 $id = trim($id,':._-'); 115 $id = preg_replace('#:[:\._\-]+#',':',$id); 116 117 $cache[$raw_id] = $id; 118 return($id); 119} 120 121/** 122 * Return namespacepart of a wiki ID 123 * 124 * @author Andreas Gohr <andi@splitbrain.org> 125 */ 126function getNS($id){ 127 $pos = strrpos($id,':'); 128 if($pos!==false){ 129 return substr($id,0,$pos); 130 } 131 return false; 132} 133 134/** 135 * Returns the ID without the namespace 136 * 137 * @author Andreas Gohr <andi@splitbrain.org> 138 */ 139function noNS($id) { 140 $pos = strrpos($id, ':'); 141 if ($pos!==false) { 142 return substr($id, $pos+1); 143 } else { 144 return $id; 145 } 146} 147 148// function result cache for wikiFN 149global $cache_wikifn; 150$cache_wikifn = array(); 151 152/** 153 * returns the full path to the datafile specified by ID and 154 * optional revision 155 * 156 * The filename is URL encoded to protect Unicode chars 157 * 158 * @author Andreas Gohr <andi@splitbrain.org> 159 */ 160function wikiFN($raw_id,$rev='',$clean=true){ 161 global $conf; 162 163 global $cache_wikifn; 164 $cache = & $cache_wikifn; 165 166 if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) { 167 return $cache[$raw_id][$rev]; 168 } 169 170 $id = $raw_id; 171 172 if ($clean) $id = cleanID($id); 173 $id = str_replace(':','/',$id); 174 if(empty($rev)){ 175 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 176 }else{ 177 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 178 if($conf['compression']){ 179 //test for extensions here, we want to read both compressions 180 if (file_exists($fn . '.gz')){ 181 $fn .= '.gz'; 182 }else if(file_exists($fn . '.bz2')){ 183 $fn .= '.bz2'; 184 }else{ 185 //file doesnt exist yet, so we take the configured extension 186 $fn .= '.' . $conf['compression']; 187 } 188 } 189 } 190 191 if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); } 192 $cache[$raw_id][$rev] = $fn; 193 return $fn; 194} 195 196/** 197 * Returns the full path to the file for locking the page while editing. 198 * 199 * @author Ben Coburn <btcoburn@silicodon.net> 200 */ 201function wikiLockFN($id) { 202 global $conf; 203 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 204} 205 206 207/** 208 * returns the full path to the meta file specified by ID and extension 209 * 210 * The filename is URL encoded to protect Unicode chars 211 * 212 * @author Steven Danz <steven-danz@kc.rr.com> 213 */ 214function metaFN($id,$ext){ 215 global $conf; 216 $id = cleanID($id); 217 $id = str_replace(':','/',$id); 218 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 219 return $fn; 220} 221 222/** 223 * returns an array of full paths to all metafiles of a given ID 224 * 225 * @author Esther Brunner <esther@kaffeehaus.ch> 226 */ 227function metaFiles($id){ 228 $name = noNS($id); 229 $dir = metaFN(getNS($id),''); 230 $files = array(); 231 232 $dh = @opendir($dir); 233 if(!$dh) return $files; 234 while(($file = readdir($dh)) !== false){ 235 if(strpos($file,$name.'.') === 0 && !is_dir($dir.$file)) 236 $files[] = $dir.$file; 237 } 238 closedir($dh); 239 240 return $files; 241} 242 243/** 244 * returns the full path to the mediafile specified by ID 245 * 246 * The filename is URL encoded to protect Unicode chars 247 * 248 * @author Andreas Gohr <andi@splitbrain.org> 249 */ 250function mediaFN($id){ 251 global $conf; 252 $id = cleanID($id); 253 $id = str_replace(':','/',$id); 254 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 255 return $fn; 256} 257 258/** 259 * Returns the full filepath to a localized textfile if local 260 * version isn't found the english one is returned 261 * 262 * @author Andreas Gohr <andi@splitbrain.org> 263 */ 264function localeFN($id){ 265 global $conf; 266 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt'; 267 if(!@file_exists($file)){ 268 //fall back to english 269 $file = DOKU_INC.'inc/lang/en/'.$id.'.txt'; 270 } 271 return $file; 272} 273 274/** 275 * Resolve relative paths in IDs 276 * 277 * Do not call directly use resolve_mediaid or resolve_pageid 278 * instead 279 * 280 * Partyly based on a cleanPath function found at 281 * http://www.php.net/manual/en/function.realpath.php#57016 282 * 283 * @author <bart at mediawave dot nl> 284 */ 285function resolve_id($ns,$id,$clean=true){ 286 // if the id starts with a dot we need to handle the 287 // relative stuff 288 if($id{0} == '.'){ 289 // normalize initial dots without a colon 290 $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id); 291 // prepend the current namespace 292 $id = $ns.':'.$id; 293 294 // cleanup relatives 295 $result = array(); 296 $pathA = explode(':', $id); 297 if (!$pathA[0]) $result[] = ''; 298 foreach ($pathA AS $key => $dir) { 299 if ($dir == '..') { 300 if (end($result) == '..') { 301 $result[] = '..'; 302 } elseif (!array_pop($result)) { 303 $result[] = '..'; 304 } 305 } elseif ($dir && $dir != '.') { 306 $result[] = $dir; 307 } 308 } 309 if (!end($pathA)) $result[] = ''; 310 $id = implode(':', $result); 311 }elseif($ns !== false && strpos($id,':') === false){ 312 //if link contains no namespace. add current namespace (if any) 313 $id = $ns.':'.$id; 314 } 315 316 if($clean) $id = cleanID($id); 317 return $id; 318} 319 320/** 321 * Returns a full media id 322 * 323 * @author Andreas Gohr <andi@splitbrain.org> 324 */ 325function resolve_mediaid($ns,&$page,&$exists){ 326 $page = resolve_id($ns,$page); 327 $file = mediaFN($page); 328 $exists = @file_exists($file); 329} 330 331/** 332 * Returns a full page id 333 * 334 * @author Andreas Gohr <andi@splitbrain.org> 335 */ 336function resolve_pageid($ns,&$page,&$exists){ 337 global $conf; 338 $exists = false; 339 340 //keep hashlink if exists then clean both parts 341 if (strpos($page,'#')) { 342 list($page,$hash) = split('#',$page,2); 343 } else { 344 $hash = ''; 345 } 346 $hash = cleanID($hash); 347 $page = resolve_id($ns,$page,false); // resolve but don't clean, yet 348 349 // get filename (calls clean itself) 350 $file = wikiFN($page); 351 352 // if ends with colon we have a namespace link 353 if(substr($page,-1) == ':'){ 354 if(@file_exists(wikiFN($page.$conf['start']))){ 355 // start page inside namespace 356 $page = $page.$conf['start']; 357 $exists = true; 358 }elseif(@file_exists(wikiFN($page.noNS(cleanID($page))))){ 359 // page named like the NS inside the NS 360 $page = $page.noNS(cleanID($page)); 361 $exists = true; 362 }elseif(@file_exists(wikiFN($page))){ 363 // page like namespace exists 364 $page = $page; 365 $exists = true; 366 }else{ 367 // fall back to default 368 $page = $page.$conf['start']; 369 } 370 }else{ 371 //check alternative plural/nonplural form 372 if(!@file_exists($file)){ 373 if( $conf['autoplural'] ){ 374 if(substr($page,-1) == 's'){ 375 $try = substr($page,0,-1); 376 }else{ 377 $try = $page.'s'; 378 } 379 if(@file_exists(wikiFN($try))){ 380 $page = $try; 381 $exists = true; 382 } 383 } 384 }else{ 385 $exists = true; 386 } 387 } 388 389 // now make sure we have a clean page 390 $page = cleanID($page); 391 392 //add hash if any 393 if(!empty($hash)) $page .= '#'.$hash; 394} 395 396/** 397 * Returns the name of a cachefile from given data 398 * 399 * The needed directory is created by this function! 400 * 401 * @author Andreas Gohr <andi@splitbrain.org> 402 * 403 * @param string $data This data is used to create a unique md5 name 404 * @param string $ext This is appended to the filename if given 405 * @return string The filename of the cachefile 406 */ 407function getCacheName($data,$ext=''){ 408 global $conf; 409 $md5 = md5($data); 410 $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext; 411 io_makeFileDir($file); 412 return $file; 413} 414 415/** 416 * Checks a pageid against $conf['hidepages'] 417 * 418 * @author Andreas Gohr <gohr@cosmocode.de> 419 */ 420function isHiddenPage($id){ 421 global $conf; 422 if(empty($conf['hidepages'])) return false; 423 424 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){ 425 return true; 426 } 427 return false; 428} 429 430/** 431 * Reverse of isHiddenPage 432 * 433 * @author Andreas Gohr <gohr@cosmocode.de> 434 */ 435function isVisiblePage($id){ 436 return !isHiddenPage($id); 437} 438 439/** 440 * Checks and sets HTTP headers for conditional HTTP requests 441 * 442 * @author Simon Willison <swillison@gmail.com> 443 * @link http://simon.incutio.com/archive/2003/04/23/conditionalGet 444 * @param timestamp $timestamp lastmodified time of the cache file 445 * @returns void or void with previously header() commands executed 446 */ 447function http_conditionalRequest($timestamp){ 448 // A PHP implementation of conditional get, see 449 // http://fishbowl.pastiche.org/archives/001132.html 450 $last_modified = substr(date('r', $timestamp), 0, -5).'GMT'; 451 $etag = '"'.md5($last_modified).'"'; 452 // Send the headers 453 header("Last-Modified: $last_modified"); 454 header("ETag: $etag"); 455 // See if the client has provided the required headers 456 if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])){ 457 $if_modified_since = stripslashes($_SERVER['HTTP_IF_MODIFIED_SINCE']); 458 }else{ 459 $if_modified_since = false; 460 } 461 462 if (isset($_SERVER['HTTP_IF_NONE_MATCH'])){ 463 $if_none_match = stripslashes($_SERVER['HTTP_IF_NONE_MATCH']); 464 }else{ 465 $if_none_match = false; 466 } 467 468 if (!$if_modified_since && !$if_none_match){ 469 return; 470 } 471 472 // At least one of the headers is there - check them 473 if ($if_none_match && $if_none_match != $etag) { 474 return; // etag is there but doesn't match 475 } 476 477 if ($if_modified_since && $if_modified_since != $last_modified) { 478 return; // if-modified-since is there but doesn't match 479 } 480 481 // Nothing has changed since their last request - serve a 304 and exit 482 header('HTTP/1.0 304 Not Modified'); 483 exit; 484} 485 486//Setup VIM: ex: et ts=2 enc=utf-8 : 487