xref: /dokuwiki/inc/pageutils.php (revision a6ef4796e22156364843a3b42bdd8f2dc78c0db5)
1b625487dSandi<?php
2b625487dSandi/**
3b625487dSandi * Utilities for handling pagenames
4b625487dSandi *
5b625487dSandi * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6b625487dSandi * @author     Andreas Gohr <andi@splitbrain.org>
71380fc45SAndreas Gohr * @todo       Combine similar functions like {wiki,media,meta}FN()
8b625487dSandi */
9b625487dSandi
106c7843b5Sandi/**
116de3759aSAndreas Gohr * Fetch the an ID from request
126c7843b5Sandi *
136c7843b5Sandi * Uses either standard $_REQUEST variable or extracts it from
146c7843b5Sandi * the full request URI when userewrite is set to 2
156c7843b5Sandi *
1642905504SAndreas Gohr * For $param='id' $conf['start'] is returned if no id was found.
1742905504SAndreas Gohr * If the second parameter is true (default) the ID is cleaned.
186c7843b5Sandi *
196c7843b5Sandi * @author Andreas Gohr <andi@splitbrain.org>
206c7843b5Sandi */
2142905504SAndreas Gohrfunction getID($param='id',$clean=true){
226c7843b5Sandi  global $conf;
236c7843b5Sandi
2448665d38SAndreas Gohr  $id = $_REQUEST[$param];
2548665d38SAndreas Gohr
266c7843b5Sandi  //construct page id from request URI
276c7843b5Sandi  if(empty($id) && $conf['userewrite'] == 2){
286c7843b5Sandi    //get the script URL
296c7843b5Sandi    if($conf['basedir']){
3081124000Sjan      $relpath = '';
3181124000Sjan      if($param != 'id') {
3281124000Sjan        $relpath = 'lib/exe/';
3381124000Sjan      }
3481124000Sjan      $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']);
356c7843b5Sandi    }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
366c7843b5Sandi      $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
376c7843b5Sandi                              $_SERVER['SCRIPT_FILENAME']);
386c7843b5Sandi      $script = '/'.$script;
396c7843b5Sandi    }else{
406c7843b5Sandi      $script = $_SERVER['SCRIPT_NAME'];
416c7843b5Sandi    }
426c7843b5Sandi
4352339126Sandi    //clean script and request (fixes a windows problem)
4452339126Sandi    $script  = preg_replace('/\/\/+/','/',$script);
4552339126Sandi    $request = preg_replace('/\/\/+/','/',$_SERVER['REQUEST_URI']);
4652339126Sandi
476c7843b5Sandi    //remove script URL and Querystring to gain the id
4852339126Sandi    if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
496c7843b5Sandi      $id = preg_replace ('/\?.*/','',$match[1]);
506c7843b5Sandi    }
516de3759aSAndreas Gohr    $id = urldecode($id);
5242905504SAndreas Gohr    //strip leading slashes
5342905504SAndreas Gohr    $id = preg_replace('!^/+!','',$id);
546c7843b5Sandi  }
5542905504SAndreas Gohr  if($clean) $id = cleanID($id);
560868021bSAndreas Gohr  if(empty($id) && $param=='id') $id = $conf['start'];
576c7843b5Sandi
586c7843b5Sandi  return $id;
596c7843b5Sandi}
60b625487dSandi
61b625487dSandi/**
62b625487dSandi * Remove unwanted chars from ID
63b625487dSandi *
64b625487dSandi * Cleans a given ID to only use allowed characters. Accented characters are
65b625487dSandi * converted to unaccented ones
66b625487dSandi *
67b625487dSandi * @author Andreas Gohr <andi@splitbrain.org>
688a831f2bSAndreas Gohr * @param  string  $id    The pageid to clean
698a831f2bSAndreas Gohr * @param  boolean $ascii Force ASCII
70b625487dSandi */
718a831f2bSAndreas Gohrfunction cleanID($id,$ascii=false){
72b625487dSandi  global $conf;
73b625487dSandi  global $lang;
744b5db43bSjoe.lapp  static $sepcharpat = null;
754b5db43bSjoe.lapp
764b5db43bSjoe.lapp  $sepchar = $conf['sepchar'];
774b5db43bSjoe.lapp  if($sepcharpat == null) // build string only once to save clock cycles
784b5db43bSjoe.lapp    $sepcharpat = '#\\'.$sepchar.'+#';
794b5db43bSjoe.lapp
80b625487dSandi  $id = trim($id);
81b625487dSandi  $id = utf8_strtolower($id);
82b625487dSandi
83b625487dSandi  //alternative namespace seperator
84b625487dSandi  $id = strtr($id,';',':');
85b625487dSandi  if($conf['useslash']){
86b625487dSandi    $id = strtr($id,'/',':');
87b625487dSandi  }else{
884eeffcd2SAndreas Gohr    $id = strtr($id,'/',$sepchar);
89b625487dSandi  }
90b625487dSandi
918a831f2bSAndreas Gohr  if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
928a831f2bSAndreas Gohr  if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
93b625487dSandi
94b625487dSandi  //remove specials
95ad81d431SAndreas Gohr  $id = utf8_stripspecials($id,$sepchar,'\*');
96b625487dSandi
978a831f2bSAndreas Gohr  if($ascii) $id = utf8_strip($id);
988a831f2bSAndreas Gohr
99b625487dSandi  //clean up
1004b5db43bSjoe.lapp  $id = preg_replace($sepcharpat,$sepchar,$id);
101b625487dSandi  $id = preg_replace('#:+#',':',$id);
102b625487dSandi  $id = trim($id,':._-');
103b625487dSandi  $id = preg_replace('#:[:\._\-]+#',':',$id);
104b625487dSandi
105b625487dSandi  return($id);
106b625487dSandi}
107b625487dSandi
108b625487dSandi/**
109b625487dSandi * Return namespacepart of a wiki ID
110b625487dSandi *
111b625487dSandi * @author Andreas Gohr <andi@splitbrain.org>
112b625487dSandi */
113b625487dSandifunction getNS($id){
114c4e0e4a1SAndreas Gohr  $pos = strrpos($id,':');
115c4e0e4a1SAndreas Gohr  if($pos!==false){
116c4e0e4a1SAndreas Gohr    return substr($id,0,$pos);
117b625487dSandi  }
118b625487dSandi  return false;
119b625487dSandi}
120b625487dSandi
121b625487dSandi/**
122b625487dSandi * Returns the ID without the namespace
123b625487dSandi *
124b625487dSandi * @author Andreas Gohr <andi@splitbrain.org>
125b625487dSandi */
126b625487dSandifunction noNS($id){
127b625487dSandi  return preg_replace('/.*:/','',$id);
128b625487dSandi}
129b625487dSandi
130b625487dSandi/**
131b625487dSandi * returns the full path to the datafile specified by ID and
132b625487dSandi * optional revision
133b625487dSandi *
134b625487dSandi * The filename is URL encoded to protect Unicode chars
135b625487dSandi *
136b625487dSandi * @author Andreas Gohr <andi@splitbrain.org>
137b625487dSandi */
138b625487dSandifunction wikiFN($id,$rev=''){
139b625487dSandi  global $conf;
140b625487dSandi  $id = cleanID($id);
141b625487dSandi  $id = str_replace(':','/',$id);
142b625487dSandi  if(empty($rev)){
143b625487dSandi    $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
144b625487dSandi  }else{
145b625487dSandi    $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
146b625487dSandi    if($conf['usegzip'] && !@file_exists($fn)){
147b625487dSandi      //return gzip if enabled and plaintext doesn't exist
148b625487dSandi      $fn .= '.gz';
149b625487dSandi    }
150b625487dSandi  }
151b625487dSandi  return $fn;
152b625487dSandi}
153b625487dSandi
154b625487dSandi/**
1551380fc45SAndreas Gohr * returns the full path to the meta file specified by ID and extension
156b158d625SSteven Danz *
157b158d625SSteven Danz * The filename is URL encoded to protect Unicode chars
158b158d625SSteven Danz *
159b158d625SSteven Danz * @author Steven Danz <steven-danz@kc.rr.com>
160b158d625SSteven Danz */
1611380fc45SAndreas Gohrfunction metaFN($id,$ext){
162b158d625SSteven Danz  global $conf;
163b158d625SSteven Danz  $id = cleanID($id);
164b158d625SSteven Danz  $id = str_replace(':','/',$id);
1651380fc45SAndreas Gohr  $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
166b158d625SSteven Danz  return $fn;
167b158d625SSteven Danz}
168b158d625SSteven Danz
169b158d625SSteven Danz/**
170e1f3d9e1SEsther Brunner * returns an array of full paths to all metafiles of a given ID
171e1f3d9e1SEsther Brunner *
172e1f3d9e1SEsther Brunner * @author Esther Brunner <esther@kaffeehaus.ch>
173e1f3d9e1SEsther Brunner */
174e1f3d9e1SEsther Brunnerfunction metaFiles($id){
175e1f3d9e1SEsther Brunner   $name   = noNS($id);
176e1f3d9e1SEsther Brunner   $dir    = metaFN(getNS($id),'');
177e1f3d9e1SEsther Brunner   $files  = array();
178e1f3d9e1SEsther Brunner
179e1f3d9e1SEsther Brunner   $dh = @opendir($dir);
1805011da9dSEsther Brunner   if(!$dh) return $files;
181e1f3d9e1SEsther Brunner   while(($file = readdir($dh)) !== false){
1821a54dfabSEsther Brunner     if(strpos($file,$name.'.') === 0 && !is_dir($dir.$file))
183e1f3d9e1SEsther Brunner       $files[] = $dir.$file;
184e1f3d9e1SEsther Brunner   }
185e1f3d9e1SEsther Brunner   closedir($dh);
186e1f3d9e1SEsther Brunner
187e1f3d9e1SEsther Brunner   return $files;
188e1f3d9e1SEsther Brunner}
189e1f3d9e1SEsther Brunner
190e1f3d9e1SEsther Brunner/**
191b625487dSandi * returns the full path to the mediafile specified by ID
192b625487dSandi *
193b625487dSandi * The filename is URL encoded to protect Unicode chars
194b625487dSandi *
195b625487dSandi * @author Andreas Gohr <andi@splitbrain.org>
196b625487dSandi */
197b625487dSandifunction mediaFN($id){
198b625487dSandi  global $conf;
199b625487dSandi  $id = cleanID($id);
200b625487dSandi  $id = str_replace(':','/',$id);
201b625487dSandi    $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
202b625487dSandi  return $fn;
203b625487dSandi}
204b625487dSandi
205b625487dSandi/**
206b625487dSandi * Returns the full filepath to a localized textfile if local
207b625487dSandi * version isn't found the english one is returned
208b625487dSandi *
209b625487dSandi * @author Andreas Gohr <andi@splitbrain.org>
210b625487dSandi */
211b625487dSandifunction localeFN($id){
212b625487dSandi  global $conf;
213bc3b6aecSandi  $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt';
214b625487dSandi  if(!@file_exists($file)){
215b625487dSandi    //fall back to english
216bc3b6aecSandi    $file = DOKU_INC.'inc/lang/en/'.$id.'.txt';
217b625487dSandi  }
218b625487dSandi  return $file;
219b625487dSandi}
220b625487dSandi
221b625487dSandi/**
222c4e0e4a1SAndreas Gohr * Resolve relative paths in IDs
223c4e0e4a1SAndreas Gohr *
224c4e0e4a1SAndreas Gohr * Do not call directly use resolve_mediaid or resolve_pageid
225c4e0e4a1SAndreas Gohr * instead
226c4e0e4a1SAndreas Gohr *
227c4e0e4a1SAndreas Gohr * Partyly based on a cleanPath function found at
228c4e0e4a1SAndreas Gohr * http://www.php.net/manual/en/function.realpath.php#57016
229c4e0e4a1SAndreas Gohr *
230c4e0e4a1SAndreas Gohr * @author <bart at mediawave dot nl>
231c4e0e4a1SAndreas Gohr */
232*a6ef4796SAndreas Gohrfunction resolve_id($ns,$id,$clean=true){
233c4e0e4a1SAndreas Gohr  // if the id starts with a dot we need to handle the
234c4e0e4a1SAndreas Gohr  // relative stuff
235c4e0e4a1SAndreas Gohr  if($id{0} == '.'){
236c4e0e4a1SAndreas Gohr    // normalize initial dots without a colon
237c4e0e4a1SAndreas Gohr    $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
238c4e0e4a1SAndreas Gohr    // prepend the current namespace
239c4e0e4a1SAndreas Gohr    $id = $ns.':'.$id;
240c4e0e4a1SAndreas Gohr
241c4e0e4a1SAndreas Gohr    // cleanup relatives
242c4e0e4a1SAndreas Gohr    $result = array();
243c4e0e4a1SAndreas Gohr    $pathA  = explode(':', $id);
244c4e0e4a1SAndreas Gohr    if (!$pathA[0]) $result[] = '';
245c4e0e4a1SAndreas Gohr    foreach ($pathA AS $key => $dir) {
246c4e0e4a1SAndreas Gohr      if ($dir == '..') {
247c4e0e4a1SAndreas Gohr        if (end($result) == '..') {
248c4e0e4a1SAndreas Gohr          $result[] = '..';
249c4e0e4a1SAndreas Gohr        } elseif (!array_pop($result)) {
250c4e0e4a1SAndreas Gohr          $result[] = '..';
251c4e0e4a1SAndreas Gohr        }
252c4e0e4a1SAndreas Gohr      } elseif ($dir && $dir != '.') {
253c4e0e4a1SAndreas Gohr        $result[] = $dir;
254c4e0e4a1SAndreas Gohr      }
255c4e0e4a1SAndreas Gohr    }
256c4e0e4a1SAndreas Gohr    if (!end($pathA)) $result[] = '';
257c4e0e4a1SAndreas Gohr    $id = implode(':', $result);
258c4e0e4a1SAndreas Gohr  }elseif($ns !== false && strpos($id,':') === false){
259c4e0e4a1SAndreas Gohr    //if link contains no namespace. add current namespace (if any)
260c4e0e4a1SAndreas Gohr    $id = $ns.':'.$id;
261c4e0e4a1SAndreas Gohr  }
262c4e0e4a1SAndreas Gohr
263*a6ef4796SAndreas Gohr  if($clean) $id = cleanID($id);
264*a6ef4796SAndreas Gohr  return $id;
265c4e0e4a1SAndreas Gohr}
266c4e0e4a1SAndreas Gohr
267c4e0e4a1SAndreas Gohr/**
268b625487dSandi * Returns a full media id
269b625487dSandi *
270b625487dSandi * @author Andreas Gohr <andi@splitbrain.org>
271b625487dSandi */
27237e34a5eSandifunction resolve_mediaid($ns,&$page,&$exists){
273c4e0e4a1SAndreas Gohr  $page   = resolve_id($ns,$page);
274b625487dSandi  $file   = mediaFN($page);
275b625487dSandi  $exists = @file_exists($file);
276b625487dSandi}
277b625487dSandi
278b625487dSandi/**
279b625487dSandi * Returns a full page id
280b625487dSandi *
281b625487dSandi * @author Andreas Gohr <andi@splitbrain.org>
282b625487dSandi */
28337e34a5eSandifunction resolve_pageid($ns,&$page,&$exists){
284b625487dSandi  global $conf;
2850b7c14c2Sandi  $exists = false;
286b625487dSandi
287b625487dSandi  //keep hashlink if exists then clean both parts
288b625487dSandi  list($page,$hash) = split('#',$page,2);
289b625487dSandi  $hash = cleanID($hash);
290*a6ef4796SAndreas Gohr  $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
291b625487dSandi
292*a6ef4796SAndreas Gohr  // get filename (calls clean itself)
293b625487dSandi  $file = wikiFN($page);
294b625487dSandi
295*a6ef4796SAndreas Gohr  // if ends with colon we have a namespace link
296*a6ef4796SAndreas Gohr  if(substr($page,-1) == ':'){
297*a6ef4796SAndreas Gohr    if(@file_exists(wikiFN($page.$conf['start']))){
298*a6ef4796SAndreas Gohr      // start page inside namespace
299*a6ef4796SAndreas Gohr      $page = $page.$conf['start'];
300*a6ef4796SAndreas Gohr      $exists = true;
301*a6ef4796SAndreas Gohr    }elseif(@file_exists(wikiFN($page.noNS(cleanID($page))))){
302*a6ef4796SAndreas Gohr      // page named like the NS inside the NS
303*a6ef4796SAndreas Gohr      $page = $page.noNS(cleanID($page));
304*a6ef4796SAndreas Gohr      $exists = true;
305*a6ef4796SAndreas Gohr    }elseif(@file_exists(wikiFN($page))){
306*a6ef4796SAndreas Gohr      // page like namespace exists
307*a6ef4796SAndreas Gohr      $page = $page;
308*a6ef4796SAndreas Gohr      $exists = true;
309*a6ef4796SAndreas Gohr    }else{
310*a6ef4796SAndreas Gohr      // fall back to default
311*a6ef4796SAndreas Gohr      $page = $page.$conf['start'];
312*a6ef4796SAndreas Gohr      $exists = false;
313*a6ef4796SAndreas Gohr    }
314*a6ef4796SAndreas Gohr  }else{
315b625487dSandi    //check alternative plural/nonplural form
316b625487dSandi    if(!@file_exists($file)){
317b625487dSandi      if( $conf['autoplural'] ){
318b625487dSandi        if(substr($page,-1) == 's'){
319b625487dSandi          $try = substr($page,0,-1);
320b625487dSandi        }else{
321b625487dSandi          $try = $page.'s';
322b625487dSandi        }
323b625487dSandi        if(@file_exists(wikiFN($try))){
324b625487dSandi          $page   = $try;
325b625487dSandi          $exists = true;
326b625487dSandi        }
327b625487dSandi      }
328b625487dSandi    }else{
329b625487dSandi      $exists = true;
330b625487dSandi    }
331*a6ef4796SAndreas Gohr  }
332*a6ef4796SAndreas Gohr
333*a6ef4796SAndreas Gohr  // now make sure we have a clean page
334*a6ef4796SAndreas Gohr  $page = cleanID($page);
335b625487dSandi
336b625487dSandi  //add hash if any
337b2d7d3f2Sandi  if(!empty($hash)) $page .= '#'.$hash;
338b625487dSandi}
339b625487dSandi
34098407a7aSandi/**
34198407a7aSandi * Returns the name of a cachefile from given data
34298407a7aSandi *
34398407a7aSandi * The needed directory is created by this function!
34498407a7aSandi *
34598407a7aSandi * @author Andreas Gohr <andi@splitbrain.org>
34698407a7aSandi *
34798407a7aSandi * @param string $data  This data is used to create a unique md5 name
34898407a7aSandi * @param string $ext   This is appended to the filename if given
34998407a7aSandi * @return string       The filename of the cachefile
35098407a7aSandi */
35198407a7aSandifunction getCacheName($data,$ext=''){
35298407a7aSandi  global $conf;
35398407a7aSandi  $md5  = md5($data);
35498407a7aSandi  $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
35598407a7aSandi  io_makeFileDir($file);
35698407a7aSandi  return $file;
35798407a7aSandi}
35898407a7aSandi
3590dc92c6fSAndreas Gohr/**
3600dc92c6fSAndreas Gohr * Checks a pageid against $conf['hidepages']
3610dc92c6fSAndreas Gohr *
3620dc92c6fSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de>
3630dc92c6fSAndreas Gohr */
3640dc92c6fSAndreas Gohrfunction isHiddenPage($id){
3650dc92c6fSAndreas Gohr  global $conf;
3660dc92c6fSAndreas Gohr  if(empty($conf['hidepages'])) return false;
3670dc92c6fSAndreas Gohr
3680dc92c6fSAndreas Gohr  if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){
3690dc92c6fSAndreas Gohr    return true;
3700dc92c6fSAndreas Gohr  }
3710dc92c6fSAndreas Gohr  return false;
3720dc92c6fSAndreas Gohr}
3730dc92c6fSAndreas Gohr
3740dc92c6fSAndreas Gohr/**
3750dc92c6fSAndreas Gohr * Reverse of isHiddenPage
3760dc92c6fSAndreas Gohr *
3770dc92c6fSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de>
3780dc92c6fSAndreas Gohr */
3790dc92c6fSAndreas Gohrfunction isVisiblePage($id){
3800dc92c6fSAndreas Gohr  return !isHiddenPage($id);
3810dc92c6fSAndreas Gohr}
3820dc92c6fSAndreas Gohr
383254e5c84SBen Coburn/**
384254e5c84SBen Coburn * Checks and sets HTTP headers for conditional HTTP requests
385254e5c84SBen Coburn *
386254e5c84SBen Coburn * @author Simon Willison <swillison@gmail.com>
387254e5c84SBen Coburn * @link   http://simon.incutio.com/archive/2003/04/23/conditionalGet
388254e5c84SBen Coburn */
389254e5c84SBen Coburnfunction http_conditionalRequest($timestamp){
390254e5c84SBen Coburn    // A PHP implementation of conditional get, see
391254e5c84SBen Coburn    //   http://fishbowl.pastiche.org/archives/001132.html
392254e5c84SBen Coburn    $last_modified = substr(date('r', $timestamp), 0, -5).'GMT';
393254e5c84SBen Coburn    $etag = '"'.md5($last_modified).'"';
394254e5c84SBen Coburn    // Send the headers
395254e5c84SBen Coburn    header("Last-Modified: $last_modified");
396254e5c84SBen Coburn    header("ETag: $etag");
397254e5c84SBen Coburn    // See if the client has provided the required headers
398254e5c84SBen Coburn    $if_modified_since = isset($_SERVER['HTTP_IF_MODIFIED_SINCE']) ?
399254e5c84SBen Coburn        stripslashes($_SERVER['HTTP_IF_MODIFIED_SINCE']) :
400254e5c84SBen Coburn        false;
401254e5c84SBen Coburn    $if_none_match = isset($_SERVER['HTTP_IF_NONE_MATCH']) ?
402254e5c84SBen Coburn        stripslashes($_SERVER['HTTP_IF_NONE_MATCH']) :
403254e5c84SBen Coburn        false;
404254e5c84SBen Coburn    if (!$if_modified_since && !$if_none_match) {
405254e5c84SBen Coburn        return;
406254e5c84SBen Coburn    }
407254e5c84SBen Coburn    // At least one of the headers is there - check them
408254e5c84SBen Coburn    if ($if_none_match && $if_none_match != $etag) {
409254e5c84SBen Coburn        return; // etag is there but doesn't match
410254e5c84SBen Coburn    }
411254e5c84SBen Coburn    if ($if_modified_since && $if_modified_since != $last_modified) {
412254e5c84SBen Coburn        return; // if-modified-since is there but doesn't match
413254e5c84SBen Coburn    }
414254e5c84SBen Coburn    // Nothing has changed since their last request - serve a 304 and exit
415254e5c84SBen Coburn    header('HTTP/1.0 304 Not Modified');
416254e5c84SBen Coburn    exit;
417254e5c84SBen Coburn}
418254e5c84SBen Coburn
419b625487dSandi//Setup VIM: ex: et ts=2 enc=utf-8 :
420