xref: /dokuwiki/inc/pageutils.php (revision 0dfc144084ed9386eda9a7c296a40a3ea1d5f7f2)
1<?php
2/**
3 * Utilities for handling pagenames
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 * @todo       Combine similar functions like {wiki,media,meta}FN()
8 */
9
10/**
11 * Fetch the an ID from request
12 *
13 * Uses either standard $_REQUEST variable or extracts it from
14 * the full request URI when userewrite is set to 2
15 *
16 * For $param='id' $conf['start'] is returned if no id was found.
17 * If the second parameter is true (default) the ID is cleaned.
18 *
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21function getID($param='id',$clean=true){
22  global $conf;
23
24  $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null;
25
26  $request = $_SERVER['REQUEST_URI'];
27
28  //construct page id from request URI
29  if(empty($id) && $conf['userewrite'] == 2){
30    //get the script URL
31    if($conf['basedir']){
32      $relpath = '';
33      if($param != 'id') {
34        $relpath = 'lib/exe/';
35      }
36      $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']);
37
38    }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['PATH_TRANSLATED']){
39      $request = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
40                              $_SERVER['PATH_TRANSLATED']);
41    }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
42      $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
43                              $_SERVER['SCRIPT_FILENAME']);
44      $script = '/'.$script;
45    }else{
46      $script = $_SERVER['SCRIPT_NAME'];
47    }
48
49    //clean script and request (fixes a windows problem)
50    $script  = preg_replace('/\/\/+/','/',$script);
51    $request = preg_replace('/\/\/+/','/',$request);
52
53    //remove script URL and Querystring to gain the id
54    if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
55      $id = preg_replace ('/\?.*/','',$match[1]);
56    }
57    $id = urldecode($id);
58    //strip leading slashes
59    $id = preg_replace('!^/+!','',$id);
60  }
61
62  // Namespace autolinking from URL
63  if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){
64    if(page_exists($id.$conf['start'])){
65      // start page inside namespace
66      $id = $id.$conf['start'];
67    }elseif(page_exists($id.noNS(cleanID($id)))){
68      // page named like the NS inside the NS
69      $id = $id.noNS(cleanID($id));
70    }elseif(page_exists($id)){
71      // page like namespace exists
72      $id = substr($id,0,-1);
73    }else{
74      // fall back to default
75      $id = $id.$conf['start'];
76    }
77    send_redirect(wl($id,'',true));
78  }
79
80  if($clean) $id = cleanID($id);
81  if(empty($id) && $param=='id') $id = $conf['start'];
82
83  return $id;
84}
85
86/**
87 * Remove unwanted chars from ID
88 *
89 * Cleans a given ID to only use allowed characters. Accented characters are
90 * converted to unaccented ones
91 *
92 * @author Andreas Gohr <andi@splitbrain.org>
93 * @param  string  $raw_id    The pageid to clean
94 * @param  boolean $ascii     Force ASCII
95 * @param  boolean $media     Allow leading or trailing _ for media files
96 */
97function cleanID($raw_id,$ascii=false,$media=false){
98  global $conf;
99  global $lang;
100  static $sepcharpat = null;
101
102  global $cache_cleanid;
103  $cache = & $cache_cleanid;
104
105  // check if it's already in the memory cache
106  if (isset($cache[(string)$raw_id])) {
107    return $cache[(string)$raw_id];
108    }
109
110  $sepchar = $conf['sepchar'];
111  if($sepcharpat == null) // build string only once to save clock cycles
112    $sepcharpat = '#\\'.$sepchar.'+#';
113
114  $id = trim((string)$raw_id);
115  $id = utf8_strtolower($id);
116
117  //alternative namespace seperator
118  $id = strtr($id,';',':');
119  if($conf['useslash']){
120    $id = strtr($id,'/',':');
121  }else{
122    $id = strtr($id,'/',$sepchar);
123  }
124
125  if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
126  if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
127
128  //remove specials
129  $id = utf8_stripspecials($id,$sepchar,'\*');
130
131  if($ascii) $id = utf8_strip($id);
132
133  //clean up
134  $id = preg_replace($sepcharpat,$sepchar,$id);
135  $id = preg_replace('#:+#',':',$id);
136  $id = ($media ? trim($id,':.-') : trim($id,':._-'));
137  $id = preg_replace('#:[:\._\-]+#',':',$id);
138
139  $cache[(string)$raw_id] = $id;
140  return($id);
141}
142
143/**
144 * Return namespacepart of a wiki ID
145 *
146 * @author Andreas Gohr <andi@splitbrain.org>
147 */
148function getNS($id){
149  $pos = strrpos((string)$id,':');
150  if($pos!==false){
151    return substr((string)$id,0,$pos);
152  }
153  return false;
154}
155
156/**
157 * Returns the ID without the namespace
158 *
159 * @author Andreas Gohr <andi@splitbrain.org>
160 */
161function noNS($id) {
162  $pos = strrpos($id, ':');
163  if ($pos!==false) {
164    return substr($id, $pos+1);
165  } else {
166    return $id;
167  }
168}
169
170/**
171 * Returns the current namespace
172 *
173 * @author Nathan Fritz <fritzn@crown.edu>
174 */
175function curNS($id) {
176    return noNS(getNS($id));
177}
178
179/**
180 * Returns the ID without the namespace or current namespace for 'start' pages
181 *
182 * @author Nathan Fritz <fritzn@crown.edu>
183 */
184function noNSorNS($id) {
185    global $conf;
186
187    $p = noNS($id);
188    if ($p == $conf['start']) {
189        $p = curNS($id);
190        if ($p == false) {
191            return noNS($id);
192        }
193    }
194    return $p;
195}
196
197/**
198 * Creates a XHTML valid linkid from a given headline title
199 *
200 * @param string  $title   The headline title
201 * @param array   $check   List of existing IDs
202 * @author Andreas Gohr <andi@splitbrain.org>
203 */
204function sectionID($title,&$check) {
205    $title = str_replace(':','',cleanID($title));
206    $new = ltrim($title,'0123456789._-');
207    if(empty($new)){
208        $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline
209    }else{
210        $title = $new;
211    }
212
213    if(is_array($check)){
214        // make sure tiles are unique
215        $num = '';
216        while(in_array($title.$num,$check)){
217            ($num) ? $num++ : $num = 1;
218        }
219        $title = $title.$num;
220        $check[] = $title;
221    }
222
223    return $title;
224}
225
226
227/**
228 *  Wiki page existence check
229 *
230 *  parameters as for wikiFN
231 *
232 *  @author Chris Smith <chris@jalakai.co.uk>
233 */
234function page_exists($id,$rev='',$clean=true) {
235  return @file_exists(wikiFN($id,$rev,$clean));
236}
237
238/**
239 * returns the full path to the datafile specified by ID and optional revision
240 *
241 * The filename is URL encoded to protect Unicode chars
242 *
243 * @param  $raw_id  string   id of wikipage
244 * @param  $rev     string   page revision, empty string for current
245 * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
246 *                           when $id is guaranteed to have been cleaned already.
247 *
248 * @author Andreas Gohr <andi@splitbrain.org>
249 */
250function wikiFN($raw_id,$rev='',$clean=true){
251  global $conf;
252
253  global $cache_wikifn;
254  $cache = & $cache_wikifn;
255
256  if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
257    return $cache[$raw_id][$rev];
258  }
259
260  $id = $raw_id;
261
262  if ($clean) $id = cleanID($id);
263  $id = str_replace(':','/',$id);
264  if(empty($rev)){
265    $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
266  }else{
267    $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
268    if($conf['compression']){
269      //test for extensions here, we want to read both compressions
270       if (@file_exists($fn . '.gz')){
271          $fn .= '.gz';
272       }else if(@file_exists($fn . '.bz2')){
273          $fn .= '.bz2';
274       }else{
275          //file doesnt exist yet, so we take the configured extension
276          $fn .= '.' . $conf['compression'];
277       }
278    }
279  }
280
281  if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); }
282  $cache[$raw_id][$rev] = $fn;
283  return $fn;
284}
285
286/**
287 * Returns the full path to the file for locking the page while editing.
288 *
289 * @author Ben Coburn <btcoburn@silicodon.net>
290 */
291function wikiLockFN($id) {
292  global $conf;
293  return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
294}
295
296
297/**
298 * returns the full path to the meta file specified by ID and extension
299 *
300 * The filename is URL encoded to protect Unicode chars
301 *
302 * @author Steven Danz <steven-danz@kc.rr.com>
303 */
304function metaFN($id,$ext){
305  global $conf;
306  $id = cleanID($id);
307  $id = str_replace(':','/',$id);
308  $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
309  return $fn;
310}
311
312/**
313 * returns an array of full paths to all metafiles of a given ID
314 *
315 * @author Esther Brunner <esther@kaffeehaus.ch>
316 */
317function metaFiles($id){
318   $name   = noNS($id);
319   $ns     = getNS($id);
320   $dir    = ($ns) ? metaFN($ns,'').'/' : metaFN($ns,'');
321   $files  = array();
322
323   $dh = @opendir($dir);
324   if(!$dh) return $files;
325   while(($file = readdir($dh)) !== false){
326     if(strpos($file,$name.'.') === 0 && !is_dir($dir.$file))
327       $files[] = $dir.$file;
328   }
329   closedir($dh);
330
331   return $files;
332}
333
334/**
335 * returns the full path to the mediafile specified by ID
336 *
337 * The filename is URL encoded to protect Unicode chars
338 *
339 * @author Andreas Gohr <andi@splitbrain.org>
340 */
341function mediaFN($id){
342  global $conf;
343  $id = cleanID($id);
344  $id = str_replace(':','/',$id);
345    $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
346  return $fn;
347}
348
349/**
350 * Returns the full filepath to a localized textfile if local
351 * version isn't found the english one is returned
352 *
353 * @author Andreas Gohr <andi@splitbrain.org>
354 */
355function localeFN($id){
356  global $conf;
357  $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt';
358  if(!@file_exists($file)){
359    //fall back to english
360    $file = DOKU_INC.'inc/lang/en/'.$id.'.txt';
361  }
362  return $file;
363}
364
365/**
366 * Resolve relative paths in IDs
367 *
368 * Do not call directly use resolve_mediaid or resolve_pageid
369 * instead
370 *
371 * Partyly based on a cleanPath function found at
372 * http://www.php.net/manual/en/function.realpath.php#57016
373 *
374 * @author <bart at mediawave dot nl>
375 */
376function resolve_id($ns,$id,$clean=true){
377  global $conf;
378
379  // some pre cleaning for useslash:
380  if($conf['useslash']) $id = str_replace('/',':',$id);
381
382  // if the id starts with a dot we need to handle the
383  // relative stuff
384  if($id{0} == '.'){
385    // normalize initial dots without a colon
386    $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
387    // prepend the current namespace
388    $id = $ns.':'.$id;
389
390    // cleanup relatives
391    $result = array();
392    $pathA  = explode(':', $id);
393    if (!$pathA[0]) $result[] = '';
394    foreach ($pathA AS $key => $dir) {
395      if ($dir == '..') {
396        if (end($result) == '..') {
397          $result[] = '..';
398        } elseif (!array_pop($result)) {
399          $result[] = '..';
400        }
401      } elseif ($dir && $dir != '.') {
402        $result[] = $dir;
403      }
404    }
405    if (!end($pathA)) $result[] = '';
406    $id = implode(':', $result);
407  }elseif($ns !== false && strpos($id,':') === false){
408    //if link contains no namespace. add current namespace (if any)
409    $id = $ns.':'.$id;
410  }
411
412  if($clean) $id = cleanID($id);
413  return $id;
414}
415
416/**
417 * Returns a full media id
418 *
419 * @author Andreas Gohr <andi@splitbrain.org>
420 */
421function resolve_mediaid($ns,&$page,&$exists){
422  $page   = resolve_id($ns,$page);
423  $file   = mediaFN($page);
424  $exists = @file_exists($file);
425}
426
427/**
428 * Returns a full page id
429 *
430 * @author Andreas Gohr <andi@splitbrain.org>
431 */
432function resolve_pageid($ns,&$page,&$exists){
433  global $conf;
434  $exists = false;
435
436  //keep hashlink if exists then clean both parts
437  if (strpos($page,'#')) {
438    list($page,$hash) = explode('#',$page,2);
439  } else {
440    $hash = '';
441  }
442  $hash = cleanID($hash);
443  $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
444
445  // get filename (calls clean itself)
446  $file = wikiFN($page);
447
448  // if ends with colon or slash we have a namespace link
449  if(substr($page,-1) == ':' || ($conf['useslash'] && substr($page,-1) == '/')){
450    if(page_exists($page.$conf['start'])){
451      // start page inside namespace
452      $page = $page.$conf['start'];
453      $exists = true;
454    }elseif(page_exists($page.noNS(cleanID($page)))){
455      // page named like the NS inside the NS
456      $page = $page.noNS(cleanID($page));
457      $exists = true;
458    }elseif(page_exists($page)){
459      // page like namespace exists
460      $page = $page;
461      $exists = true;
462    }else{
463      // fall back to default
464      $page = $page.$conf['start'];
465    }
466  }else{
467    //check alternative plural/nonplural form
468    if(!@file_exists($file)){
469      if( $conf['autoplural'] ){
470        if(substr($page,-1) == 's'){
471          $try = substr($page,0,-1);
472        }else{
473          $try = $page.'s';
474        }
475        if(page_exists($try)){
476          $page   = $try;
477          $exists = true;
478        }
479      }
480    }else{
481      $exists = true;
482    }
483  }
484
485  // now make sure we have a clean page
486  $page = cleanID($page);
487
488  //add hash if any
489  if(!empty($hash)) $page .= '#'.$hash;
490}
491
492/**
493 * Returns the name of a cachefile from given data
494 *
495 * The needed directory is created by this function!
496 *
497 * @author Andreas Gohr <andi@splitbrain.org>
498 *
499 * @param string $data  This data is used to create a unique md5 name
500 * @param string $ext   This is appended to the filename if given
501 * @return string       The filename of the cachefile
502 */
503function getCacheName($data,$ext=''){
504  global $conf;
505  $md5  = md5($data);
506  $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
507  io_makeFileDir($file);
508  return $file;
509}
510
511/**
512 * Checks a pageid against $conf['hidepages']
513 *
514 * @author Andreas Gohr <gohr@cosmocode.de>
515 */
516function isHiddenPage($id){
517  global $conf;
518  global $ACT;
519  if(empty($conf['hidepages'])) return false;
520  if($ACT == 'admin') return false;
521
522  if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){
523    return true;
524  }
525  return false;
526}
527
528/**
529 * Reverse of isHiddenPage
530 *
531 * @author Andreas Gohr <gohr@cosmocode.de>
532 */
533function isVisiblePage($id){
534  return !isHiddenPage($id);
535}
536
537
538//Setup VIM: ex: et ts=2 enc=utf-8 :
539