xref: /dokuwiki/inc/pageutils.php (revision 6e0cc83a924bf9b5f90ae80f11ccdcf58ee415a7)
1<?php
2/**
3 * Utilities for handling pagenames
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 * @todo       Combine similar functions like {wiki,media,meta}FN()
8 */
9
10/**
11 * Fetch the an ID from request
12 *
13 * Uses either standard $_REQUEST variable or extracts it from
14 * the full request URI when userewrite is set to 2
15 *
16 * For $param='id' $conf['start'] is returned if no id was found.
17 * If the second parameter is true (default) the ID is cleaned.
18 *
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21function getID($param='id',$clean=true){
22  global $conf;
23
24  $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null;
25
26  //construct page id from request URI
27  if(empty($id) && $conf['userewrite'] == 2){
28    //get the script URL
29    if($conf['basedir']){
30      $relpath = '';
31      if($param != 'id') {
32        $relpath = 'lib/exe/';
33      }
34      $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']);
35    }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
36      $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
37                              $_SERVER['SCRIPT_FILENAME']);
38      $script = '/'.$script;
39    }else{
40      $script = $_SERVER['SCRIPT_NAME'];
41    }
42
43    //clean script and request (fixes a windows problem)
44    $script  = preg_replace('/\/\/+/','/',$script);
45    $request = preg_replace('/\/\/+/','/',$_SERVER['REQUEST_URI']);
46
47    //remove script URL and Querystring to gain the id
48    if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
49      $id = preg_replace ('/\?.*/','',$match[1]);
50    }
51    $id = urldecode($id);
52    //strip leading slashes
53    $id = preg_replace('!^/+!','',$id);
54  }
55  if($clean) $id = cleanID($id);
56  if(empty($id) && $param=='id') $id = $conf['start'];
57
58  return $id;
59}
60
61/**
62 * Remove unwanted chars from ID
63 *
64 * Cleans a given ID to only use allowed characters. Accented characters are
65 * converted to unaccented ones
66 *
67 * @author Andreas Gohr <andi@splitbrain.org>
68 * @param  string  $raw_id    The pageid to clean
69 * @param  boolean $ascii     Force ASCII
70 */
71function cleanID($raw_id,$ascii=false){
72  global $conf;
73  global $lang;
74  static $sepcharpat = null;
75
76  static $cache = array();
77
78  // check if it's already in the memory cache
79  if (isset($cache[$raw_id])) {
80    return $cache[$raw_id];
81	}
82
83  $sepchar = $conf['sepchar'];
84  if($sepcharpat == null) // build string only once to save clock cycles
85    $sepcharpat = '#\\'.$sepchar.'+#';
86
87  $id = trim($raw_id);
88  $id = utf8_strtolower($id);
89
90  //alternative namespace seperator
91  $id = strtr($id,';',':');
92  if($conf['useslash']){
93    $id = strtr($id,'/',':');
94  }else{
95    $id = strtr($id,'/',$sepchar);
96  }
97
98  if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
99  if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
100
101  //remove specials
102  $id = utf8_stripspecials($id,$sepchar,'\*');
103
104  if($ascii) $id = utf8_strip($id);
105
106  //clean up
107  $id = preg_replace($sepcharpat,$sepchar,$id);
108  $id = preg_replace('#:+#',':',$id);
109  $id = trim($id,':._-');
110  $id = preg_replace('#:[:\._\-]+#',':',$id);
111
112  $cache[$raw_id] = $id;
113  return($id);
114}
115
116/**
117 * Return namespacepart of a wiki ID
118 *
119 * @author Andreas Gohr <andi@splitbrain.org>
120 */
121function getNS($id){
122  $pos = strrpos($id,':');
123  if($pos!==false){
124    return substr($id,0,$pos);
125  }
126  return false;
127}
128
129/**
130 * Returns the ID without the namespace
131 *
132 * @author Andreas Gohr <andi@splitbrain.org>
133 */
134function noNS($id) {
135  $pos = strrpos($id, ':');
136  if ($pos!==false) {
137    return substr($id, $pos+1);
138  } else {
139    return $id;
140  }
141}
142
143/**
144 * returns the full path to the datafile specified by ID and
145 * optional revision
146 *
147 * The filename is URL encoded to protect Unicode chars
148 *
149 * @author Andreas Gohr <andi@splitbrain.org>
150 */
151function wikiFN($raw_id,$rev='',$clean=true){
152  global $conf;
153
154  static $cache = array();
155  if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
156    return $cache[$raw_id][$rev];
157  }
158
159  $id = $raw_id;
160
161  if ($clean) $id = cleanID($id);
162  $id = str_replace(':','/',$id);
163  if(empty($rev)){
164    $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
165  }else{
166    $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
167    if($conf['compression']){
168      //test for extensions here, we want to read both compressions
169       if (file_exists($fn . '.gz')){
170          $fn .= '.gz';
171       }else if(file_exists($fn . '.bz2')){
172          $fn .= '.bz2';
173       }else{
174          //file doesnt exist yet, so we take the configured extension
175          $fn .= '.' . $conf['compression'];
176       }
177    }
178  }
179
180  $cache[$raw_id][$rev] = $fn;
181  return $fn;
182}
183
184/**
185 * Returns the full path to the file for locking the page while editing.
186 *
187 * @author Ben Coburn <btcoburn@silicodon.net>
188 */
189function wikiLockFN($id) {
190  global $conf;
191  return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
192}
193
194
195/**
196 * returns the full path to the meta file specified by ID and extension
197 *
198 * The filename is URL encoded to protect Unicode chars
199 *
200 * @author Steven Danz <steven-danz@kc.rr.com>
201 */
202function metaFN($id,$ext){
203  global $conf;
204  $id = cleanID($id);
205  $id = str_replace(':','/',$id);
206  $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
207  return $fn;
208}
209
210/**
211 * returns an array of full paths to all metafiles of a given ID
212 *
213 * @author Esther Brunner <esther@kaffeehaus.ch>
214 */
215function metaFiles($id){
216   $name   = noNS($id);
217   $dir    = metaFN(getNS($id),'');
218   $files  = array();
219
220   $dh = @opendir($dir);
221   if(!$dh) return $files;
222   while(($file = readdir($dh)) !== false){
223     if(strpos($file,$name.'.') === 0 && !is_dir($dir.$file))
224       $files[] = $dir.$file;
225   }
226   closedir($dh);
227
228   return $files;
229}
230
231/**
232 * returns the full path to the mediafile specified by ID
233 *
234 * The filename is URL encoded to protect Unicode chars
235 *
236 * @author Andreas Gohr <andi@splitbrain.org>
237 */
238function mediaFN($id){
239  global $conf;
240  $id = cleanID($id);
241  $id = str_replace(':','/',$id);
242    $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
243  return $fn;
244}
245
246/**
247 * Returns the full filepath to a localized textfile if local
248 * version isn't found the english one is returned
249 *
250 * @author Andreas Gohr <andi@splitbrain.org>
251 */
252function localeFN($id){
253  global $conf;
254  $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt';
255  if(!@file_exists($file)){
256    //fall back to english
257    $file = DOKU_INC.'inc/lang/en/'.$id.'.txt';
258  }
259  return $file;
260}
261
262/**
263 * Resolve relative paths in IDs
264 *
265 * Do not call directly use resolve_mediaid or resolve_pageid
266 * instead
267 *
268 * Partyly based on a cleanPath function found at
269 * http://www.php.net/manual/en/function.realpath.php#57016
270 *
271 * @author <bart at mediawave dot nl>
272 */
273function resolve_id($ns,$id,$clean=true){
274  // if the id starts with a dot we need to handle the
275  // relative stuff
276  if($id{0} == '.'){
277    // normalize initial dots without a colon
278    $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
279    // prepend the current namespace
280    $id = $ns.':'.$id;
281
282    // cleanup relatives
283    $result = array();
284    $pathA  = explode(':', $id);
285    if (!$pathA[0]) $result[] = '';
286    foreach ($pathA AS $key => $dir) {
287      if ($dir == '..') {
288        if (end($result) == '..') {
289          $result[] = '..';
290        } elseif (!array_pop($result)) {
291          $result[] = '..';
292        }
293      } elseif ($dir && $dir != '.') {
294        $result[] = $dir;
295      }
296    }
297    if (!end($pathA)) $result[] = '';
298    $id = implode(':', $result);
299  }elseif($ns !== false && strpos($id,':') === false){
300    //if link contains no namespace. add current namespace (if any)
301    $id = $ns.':'.$id;
302  }
303
304  if($clean) $id = cleanID($id);
305  return $id;
306}
307
308/**
309 * Returns a full media id
310 *
311 * @author Andreas Gohr <andi@splitbrain.org>
312 */
313function resolve_mediaid($ns,&$page,&$exists){
314  $page   = resolve_id($ns,$page);
315  $file   = mediaFN($page);
316  $exists = @file_exists($file);
317}
318
319/**
320 * Returns a full page id
321 *
322 * @author Andreas Gohr <andi@splitbrain.org>
323 */
324function resolve_pageid($ns,&$page,&$exists){
325  global $conf;
326  $exists = false;
327
328  //keep hashlink if exists then clean both parts
329  if (strpos($page,'#')) {
330    list($page,$hash) = split('#',$page,2);
331  } else {
332    $hash = '';
333  }
334  $hash = cleanID($hash);
335  $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
336
337  // get filename (calls clean itself)
338  $file = wikiFN($page);
339
340  // if ends with colon we have a namespace link
341  if(substr($page,-1) == ':'){
342    if(@file_exists(wikiFN($page.$conf['start']))){
343      // start page inside namespace
344      $page = $page.$conf['start'];
345      $exists = true;
346    }elseif(@file_exists(wikiFN($page.noNS(cleanID($page))))){
347      // page named like the NS inside the NS
348      $page = $page.noNS(cleanID($page));
349      $exists = true;
350    }elseif(@file_exists(wikiFN($page))){
351      // page like namespace exists
352      $page = $page;
353      $exists = true;
354    }else{
355      // fall back to default
356      $page = $page.$conf['start'];
357    }
358  }else{
359    //check alternative plural/nonplural form
360    if(!@file_exists($file)){
361      if( $conf['autoplural'] ){
362        if(substr($page,-1) == 's'){
363          $try = substr($page,0,-1);
364        }else{
365          $try = $page.'s';
366        }
367        if(@file_exists(wikiFN($try))){
368          $page   = $try;
369          $exists = true;
370        }
371      }
372    }else{
373      $exists = true;
374    }
375  }
376
377  // now make sure we have a clean page
378  $page = cleanID($page);
379
380  //add hash if any
381  if(!empty($hash)) $page .= '#'.$hash;
382}
383
384/**
385 * Returns the name of a cachefile from given data
386 *
387 * The needed directory is created by this function!
388 *
389 * @author Andreas Gohr <andi@splitbrain.org>
390 *
391 * @param string $data  This data is used to create a unique md5 name
392 * @param string $ext   This is appended to the filename if given
393 * @return string       The filename of the cachefile
394 */
395function getCacheName($data,$ext=''){
396  global $conf;
397  $md5  = md5($data);
398  $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
399  io_makeFileDir($file);
400  return $file;
401}
402
403/**
404 * Checks a pageid against $conf['hidepages']
405 *
406 * @author Andreas Gohr <gohr@cosmocode.de>
407 */
408function isHiddenPage($id){
409  global $conf;
410  if(empty($conf['hidepages'])) return false;
411
412  if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){
413    return true;
414  }
415  return false;
416}
417
418/**
419 * Reverse of isHiddenPage
420 *
421 * @author Andreas Gohr <gohr@cosmocode.de>
422 */
423function isVisiblePage($id){
424  return !isHiddenPage($id);
425}
426
427/**
428 * Checks and sets HTTP headers for conditional HTTP requests
429 *
430 * @author   Simon Willison <swillison@gmail.com>
431 * @link     http://simon.incutio.com/archive/2003/04/23/conditionalGet
432 * @param    timestamp $timestamp lastmodified time of the cache file
433 * @returns  void or void with previously header() commands executed
434 */
435function http_conditionalRequest($timestamp){
436  // A PHP implementation of conditional get, see
437  //   http://fishbowl.pastiche.org/archives/001132.html
438  $last_modified = substr(date('r', $timestamp), 0, -5).'GMT';
439  $etag = '"'.md5($last_modified).'"';
440  // Send the headers
441  header("Last-Modified: $last_modified");
442  header("ETag: $etag");
443  // See if the client has provided the required headers
444  if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])){
445    $if_modified_since = stripslashes($_SERVER['HTTP_IF_MODIFIED_SINCE']);
446  }else{
447    $if_modified_since = false;
448  }
449
450  if (isset($_SERVER['HTTP_IF_NONE_MATCH'])){
451    $if_none_match = stripslashes($_SERVER['HTTP_IF_NONE_MATCH']);
452  }else{
453    $if_none_match = false;
454  }
455
456  if (!$if_modified_since && !$if_none_match){
457    return;
458  }
459
460  // At least one of the headers is there - check them
461  if ($if_none_match && $if_none_match != $etag) {
462    return; // etag is there but doesn't match
463  }
464
465  if ($if_modified_since && $if_modified_since != $last_modified) {
466    return; // if-modified-since is there but doesn't match
467  }
468
469  // Nothing has changed since their last request - serve a 304 and exit
470  header('HTTP/1.0 304 Not Modified');
471  exit;
472}
473
474//Setup VIM: ex: et ts=2 enc=utf-8 :
475