1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10/** 11 * Fetch the an ID from request 12 * 13 * Uses either standard $_REQUEST variable or extracts it from 14 * the full request URI when userewrite is set to 2 15 * 16 * For $param='id' $conf['start'] is returned if no id was found. 17 * If the second parameter is true (default) the ID is cleaned. 18 * 19 * @author Andreas Gohr <andi@splitbrain.org> 20 */ 21function getID($param='id',$clean=true){ 22 global $conf; 23 24 $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null; 25 26 //construct page id from request URI 27 if(empty($id) && $conf['userewrite'] == 2){ 28 //get the script URL 29 if($conf['basedir']){ 30 $relpath = ''; 31 if($param != 'id') { 32 $relpath = 'lib/exe/'; 33 } 34 $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']); 35 }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){ 36 $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','', 37 $_SERVER['SCRIPT_FILENAME']); 38 $script = '/'.$script; 39 }else{ 40 $script = $_SERVER['SCRIPT_NAME']; 41 } 42 43 //clean script and request (fixes a windows problem) 44 $script = preg_replace('/\/\/+/','/',$script); 45 $request = preg_replace('/\/\/+/','/',$_SERVER['REQUEST_URI']); 46 47 //remove script URL and Querystring to gain the id 48 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 49 $id = preg_replace ('/\?.*/','',$match[1]); 50 } 51 $id = urldecode($id); 52 //strip leading slashes 53 $id = preg_replace('!^/+!','',$id); 54 } 55 if($clean) $id = cleanID($id); 56 if(empty($id) && $param=='id') $id = $conf['start']; 57 58 return $id; 59} 60 61/** 62 * Remove unwanted chars from ID 63 * 64 * Cleans a given ID to only use allowed characters. Accented characters are 65 * converted to unaccented ones 66 * 67 * @author Andreas Gohr <andi@splitbrain.org> 68 * @param string $raw_id The pageid to clean 69 * @param boolean $ascii Force ASCII 70 */ 71function cleanID($raw_id,$ascii=false){ 72 global $conf; 73 global $lang; 74 static $sepcharpat = null; 75 76 static $cache = array(); 77 78 // check if it's already in the memory cache 79 if (isset($cache[$raw_id])) { 80 return $cache[$raw_id]; 81 } 82 83 $sepchar = $conf['sepchar']; 84 if($sepcharpat == null) // build string only once to save clock cycles 85 $sepcharpat = '#\\'.$sepchar.'+#'; 86 87 $id = trim($raw_id); 88 $id = utf8_strtolower($id); 89 90 //alternative namespace seperator 91 $id = strtr($id,';',':'); 92 if($conf['useslash']){ 93 $id = strtr($id,'/',':'); 94 }else{ 95 $id = strtr($id,'/',$sepchar); 96 } 97 98 if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id); 99 if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1); 100 101 //remove specials 102 $id = utf8_stripspecials($id,$sepchar,'\*'); 103 104 if($ascii) $id = utf8_strip($id); 105 106 //clean up 107 $id = preg_replace($sepcharpat,$sepchar,$id); 108 $id = preg_replace('#:+#',':',$id); 109 $id = trim($id,':._-'); 110 $id = preg_replace('#:[:\._\-]+#',':',$id); 111 112 $cache[$raw_id] = $id; 113 return($id); 114} 115 116/** 117 * Return namespacepart of a wiki ID 118 * 119 * @author Andreas Gohr <andi@splitbrain.org> 120 */ 121function getNS($id){ 122 $pos = strrpos($id,':'); 123 if($pos!==false){ 124 return substr($id,0,$pos); 125 } 126 return false; 127} 128 129/** 130 * Returns the ID without the namespace 131 * 132 * @author Andreas Gohr <andi@splitbrain.org> 133 */ 134function noNS($id) { 135 $pos = strrpos($id, ':'); 136 if ($pos!==false) { 137 return substr($id, $pos+1); 138 } else { 139 return $id; 140 } 141} 142 143/** 144 * returns the full path to the datafile specified by ID and 145 * optional revision 146 * 147 * The filename is URL encoded to protect Unicode chars 148 * 149 * @author Andreas Gohr <andi@splitbrain.org> 150 */ 151function wikiFN($raw_id,$rev='',$clean=true){ 152 global $conf; 153 154 static $cache = array(); 155 if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) { 156 return $cache[$raw_id][$rev]; 157 } 158 159 $id = $raw_id; 160 161 if ($clean) $id = cleanID($id); 162 $id = str_replace(':','/',$id); 163 if(empty($rev)){ 164 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 165 }else{ 166 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 167 if($conf['compression']){ 168 //test for extensions here, we want to read both compressions 169 if (file_exists($fn . '.gz')){ 170 $fn .= '.gz'; 171 }else if(file_exists($fn . '.bz2')){ 172 $fn .= '.bz2'; 173 }else{ 174 //file doesnt exist yet, so we take the configured extension 175 $fn .= '.' . $conf['compression']; 176 } 177 } 178 } 179 180 $cache[$raw_id][$rev] = $fn; 181 return $fn; 182} 183 184/** 185 * Returns the full path to the file for locking the page while editing. 186 * 187 * @author Ben Coburn <btcoburn@silicodon.net> 188 */ 189function wikiLockFN($id) { 190 global $conf; 191 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 192} 193 194 195/** 196 * returns the full path to the meta file specified by ID and extension 197 * 198 * The filename is URL encoded to protect Unicode chars 199 * 200 * @author Steven Danz <steven-danz@kc.rr.com> 201 */ 202function metaFN($id,$ext){ 203 global $conf; 204 $id = cleanID($id); 205 $id = str_replace(':','/',$id); 206 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 207 return $fn; 208} 209 210/** 211 * returns an array of full paths to all metafiles of a given ID 212 * 213 * @author Esther Brunner <esther@kaffeehaus.ch> 214 */ 215function metaFiles($id){ 216 $name = noNS($id); 217 $dir = metaFN(getNS($id),''); 218 $files = array(); 219 220 $dh = @opendir($dir); 221 if(!$dh) return $files; 222 while(($file = readdir($dh)) !== false){ 223 if(strpos($file,$name.'.') === 0 && !is_dir($dir.$file)) 224 $files[] = $dir.$file; 225 } 226 closedir($dh); 227 228 return $files; 229} 230 231/** 232 * returns the full path to the mediafile specified by ID 233 * 234 * The filename is URL encoded to protect Unicode chars 235 * 236 * @author Andreas Gohr <andi@splitbrain.org> 237 */ 238function mediaFN($id){ 239 global $conf; 240 $id = cleanID($id); 241 $id = str_replace(':','/',$id); 242 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 243 return $fn; 244} 245 246/** 247 * Returns the full filepath to a localized textfile if local 248 * version isn't found the english one is returned 249 * 250 * @author Andreas Gohr <andi@splitbrain.org> 251 */ 252function localeFN($id){ 253 global $conf; 254 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt'; 255 if(!@file_exists($file)){ 256 //fall back to english 257 $file = DOKU_INC.'inc/lang/en/'.$id.'.txt'; 258 } 259 return $file; 260} 261 262/** 263 * Resolve relative paths in IDs 264 * 265 * Do not call directly use resolve_mediaid or resolve_pageid 266 * instead 267 * 268 * Partyly based on a cleanPath function found at 269 * http://www.php.net/manual/en/function.realpath.php#57016 270 * 271 * @author <bart at mediawave dot nl> 272 */ 273function resolve_id($ns,$id,$clean=true){ 274 // if the id starts with a dot we need to handle the 275 // relative stuff 276 if($id{0} == '.'){ 277 // normalize initial dots without a colon 278 $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id); 279 // prepend the current namespace 280 $id = $ns.':'.$id; 281 282 // cleanup relatives 283 $result = array(); 284 $pathA = explode(':', $id); 285 if (!$pathA[0]) $result[] = ''; 286 foreach ($pathA AS $key => $dir) { 287 if ($dir == '..') { 288 if (end($result) == '..') { 289 $result[] = '..'; 290 } elseif (!array_pop($result)) { 291 $result[] = '..'; 292 } 293 } elseif ($dir && $dir != '.') { 294 $result[] = $dir; 295 } 296 } 297 if (!end($pathA)) $result[] = ''; 298 $id = implode(':', $result); 299 }elseif($ns !== false && strpos($id,':') === false){ 300 //if link contains no namespace. add current namespace (if any) 301 $id = $ns.':'.$id; 302 } 303 304 if($clean) $id = cleanID($id); 305 return $id; 306} 307 308/** 309 * Returns a full media id 310 * 311 * @author Andreas Gohr <andi@splitbrain.org> 312 */ 313function resolve_mediaid($ns,&$page,&$exists){ 314 $page = resolve_id($ns,$page); 315 $file = mediaFN($page); 316 $exists = @file_exists($file); 317} 318 319/** 320 * Returns a full page id 321 * 322 * @author Andreas Gohr <andi@splitbrain.org> 323 */ 324function resolve_pageid($ns,&$page,&$exists){ 325 global $conf; 326 $exists = false; 327 328 //keep hashlink if exists then clean both parts 329 if (strpos($page,'#')) { 330 list($page,$hash) = split('#',$page,2); 331 } else { 332 $hash = ''; 333 } 334 $hash = cleanID($hash); 335 $page = resolve_id($ns,$page,false); // resolve but don't clean, yet 336 337 // get filename (calls clean itself) 338 $file = wikiFN($page); 339 340 // if ends with colon we have a namespace link 341 if(substr($page,-1) == ':'){ 342 if(@file_exists(wikiFN($page.$conf['start']))){ 343 // start page inside namespace 344 $page = $page.$conf['start']; 345 $exists = true; 346 }elseif(@file_exists(wikiFN($page.noNS(cleanID($page))))){ 347 // page named like the NS inside the NS 348 $page = $page.noNS(cleanID($page)); 349 $exists = true; 350 }elseif(@file_exists(wikiFN($page))){ 351 // page like namespace exists 352 $page = $page; 353 $exists = true; 354 }else{ 355 // fall back to default 356 $page = $page.$conf['start']; 357 } 358 }else{ 359 //check alternative plural/nonplural form 360 if(!@file_exists($file)){ 361 if( $conf['autoplural'] ){ 362 if(substr($page,-1) == 's'){ 363 $try = substr($page,0,-1); 364 }else{ 365 $try = $page.'s'; 366 } 367 if(@file_exists(wikiFN($try))){ 368 $page = $try; 369 $exists = true; 370 } 371 } 372 }else{ 373 $exists = true; 374 } 375 } 376 377 // now make sure we have a clean page 378 $page = cleanID($page); 379 380 //add hash if any 381 if(!empty($hash)) $page .= '#'.$hash; 382} 383 384/** 385 * Returns the name of a cachefile from given data 386 * 387 * The needed directory is created by this function! 388 * 389 * @author Andreas Gohr <andi@splitbrain.org> 390 * 391 * @param string $data This data is used to create a unique md5 name 392 * @param string $ext This is appended to the filename if given 393 * @return string The filename of the cachefile 394 */ 395function getCacheName($data,$ext=''){ 396 global $conf; 397 $md5 = md5($data); 398 $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext; 399 io_makeFileDir($file); 400 return $file; 401} 402 403/** 404 * Checks a pageid against $conf['hidepages'] 405 * 406 * @author Andreas Gohr <gohr@cosmocode.de> 407 */ 408function isHiddenPage($id){ 409 global $conf; 410 if(empty($conf['hidepages'])) return false; 411 412 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){ 413 return true; 414 } 415 return false; 416} 417 418/** 419 * Reverse of isHiddenPage 420 * 421 * @author Andreas Gohr <gohr@cosmocode.de> 422 */ 423function isVisiblePage($id){ 424 return !isHiddenPage($id); 425} 426 427/** 428 * Checks and sets HTTP headers for conditional HTTP requests 429 * 430 * @author Simon Willison <swillison@gmail.com> 431 * @link http://simon.incutio.com/archive/2003/04/23/conditionalGet 432 * @param timestamp $timestamp lastmodified time of the cache file 433 * @returns void or void with previously header() commands executed 434 */ 435function http_conditionalRequest($timestamp){ 436 // A PHP implementation of conditional get, see 437 // http://fishbowl.pastiche.org/archives/001132.html 438 $last_modified = substr(date('r', $timestamp), 0, -5).'GMT'; 439 $etag = '"'.md5($last_modified).'"'; 440 // Send the headers 441 header("Last-Modified: $last_modified"); 442 header("ETag: $etag"); 443 // See if the client has provided the required headers 444 if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])){ 445 $if_modified_since = stripslashes($_SERVER['HTTP_IF_MODIFIED_SINCE']); 446 }else{ 447 $if_modified_since = false; 448 } 449 450 if (isset($_SERVER['HTTP_IF_NONE_MATCH'])){ 451 $if_none_match = stripslashes($_SERVER['HTTP_IF_NONE_MATCH']); 452 }else{ 453 $if_none_match = false; 454 } 455 456 if (!$if_modified_since && !$if_none_match){ 457 return; 458 } 459 460 // At least one of the headers is there - check them 461 if ($if_none_match && $if_none_match != $etag) { 462 return; // etag is there but doesn't match 463 } 464 465 if ($if_modified_since && $if_modified_since != $last_modified) { 466 return; // if-modified-since is there but doesn't match 467 } 468 469 // Nothing has changed since their last request - serve a 304 and exit 470 header('HTTP/1.0 304 Not Modified'); 471 exit; 472} 473 474//Setup VIM: ex: et ts=2 enc=utf-8 : 475