xref: /dokuwiki/inc/pageutils.php (revision 058f1c3e98b1437a691cc77a36002dee1c41f007)
1<?php
2/**
3 * Utilities for handling pagenames
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 * @todo       Combine similar functions like {wiki,media,meta}FN()
8 */
9
10/**
11 * Fetch the an ID from request
12 *
13 * Uses either standard $_REQUEST variable or extracts it from
14 * the full request URI when userewrite is set to 2
15 *
16 * For $param='id' $conf['start'] is returned if no id was found.
17 * If the second parameter is true (default) the ID is cleaned.
18 *
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21function getID($param='id',$clean=true){
22    global $conf;
23
24    $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null;
25
26    //construct page id from request URI
27    if(empty($id) && $conf['userewrite'] == 2){
28        $request = $_SERVER['REQUEST_URI'];
29        $script = '';
30
31        //get the script URL
32        if($conf['basedir']){
33            $relpath = '';
34            if($param != 'id') {
35                $relpath = 'lib/exe/';
36            }
37            $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']);
38
39        }elseif($_SERVER['PATH_INFO']){
40            $request = $_SERVER['PATH_INFO'];
41        }elseif($_SERVER['SCRIPT_NAME']){
42            $script = $_SERVER['SCRIPT_NAME'];
43        }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
44            $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
45                    $_SERVER['SCRIPT_FILENAME']);
46            $script = '/'.$script;
47        }
48
49        //clean script and request (fixes a windows problem)
50        $script  = preg_replace('/\/\/+/','/',$script);
51        $request = preg_replace('/\/\/+/','/',$request);
52
53        //remove script URL and Querystring to gain the id
54        if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
55            $id = preg_replace ('/\?.*/','',$match[1]);
56        }
57        $id = urldecode($id);
58        //strip leading slashes
59        $id = preg_replace('!^/+!','',$id);
60    }
61
62    // Namespace autolinking from URL
63    if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){
64        if(page_exists($id.$conf['start'])){
65            // start page inside namespace
66            $id = $id.$conf['start'];
67        }elseif(page_exists($id.noNS(cleanID($id)))){
68            // page named like the NS inside the NS
69            $id = $id.noNS(cleanID($id));
70        }elseif(page_exists($id)){
71            // page like namespace exists
72            $id = substr($id,0,-1);
73        }else{
74            // fall back to default
75            $id = $id.$conf['start'];
76        }
77        send_redirect(wl($id,'',true));
78    }
79
80    if($clean) $id = cleanID($id);
81    if(empty($id) && $param=='id') $id = $conf['start'];
82
83    return $id;
84}
85
86/**
87 * Remove unwanted chars from ID
88 *
89 * Cleans a given ID to only use allowed characters. Accented characters are
90 * converted to unaccented ones
91 *
92 * @author Andreas Gohr <andi@splitbrain.org>
93 * @param  string  $raw_id    The pageid to clean
94 * @param  boolean $ascii     Force ASCII
95 * @param  boolean $media     Allow leading or trailing _ for media files
96 */
97function cleanID($raw_id,$ascii=false,$media=false){
98    global $conf;
99    static $sepcharpat = null;
100
101    global $cache_cleanid;
102    $cache = & $cache_cleanid;
103
104    // check if it's already in the memory cache
105    if (isset($cache[(string)$raw_id])) {
106        return $cache[(string)$raw_id];
107    }
108
109    $sepchar = $conf['sepchar'];
110    if($sepcharpat == null) // build string only once to save clock cycles
111        $sepcharpat = '#\\'.$sepchar.'+#';
112
113    $id = trim((string)$raw_id);
114    $id = utf8_strtolower($id);
115
116    //alternative namespace seperator
117    $id = strtr($id,';',':');
118    if($conf['useslash']){
119        $id = strtr($id,'/',':');
120    }else{
121        $id = strtr($id,'/',$sepchar);
122    }
123
124    if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
125    if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
126
127    //remove specials
128    $id = utf8_stripspecials($id,$sepchar,'\*');
129
130    if($ascii) $id = utf8_strip($id);
131
132    //clean up
133    $id = preg_replace($sepcharpat,$sepchar,$id);
134    $id = preg_replace('#:+#',':',$id);
135    $id = ($media ? trim($id,':.-') : trim($id,':._-'));
136    $id = preg_replace('#:[:\._\-]+#',':',$id);
137
138    $cache[(string)$raw_id] = $id;
139    return($id);
140}
141
142/**
143 * Return namespacepart of a wiki ID
144 *
145 * @author Andreas Gohr <andi@splitbrain.org>
146 */
147function getNS($id){
148    $pos = strrpos((string)$id,':');
149    if($pos!==false){
150        return substr((string)$id,0,$pos);
151    }
152    return false;
153}
154
155/**
156 * Returns the ID without the namespace
157 *
158 * @author Andreas Gohr <andi@splitbrain.org>
159 */
160function noNS($id) {
161    $pos = strrpos($id, ':');
162    if ($pos!==false) {
163        return substr($id, $pos+1);
164    } else {
165        return $id;
166    }
167}
168
169/**
170 * Returns the current namespace
171 *
172 * @author Nathan Fritz <fritzn@crown.edu>
173 */
174function curNS($id) {
175    return noNS(getNS($id));
176}
177
178/**
179 * Returns the ID without the namespace or current namespace for 'start' pages
180 *
181 * @author Nathan Fritz <fritzn@crown.edu>
182 */
183function noNSorNS($id) {
184    global $conf;
185
186    $p = noNS($id);
187    if ($p == $conf['start'] || $p == false) {
188        $p = curNS($id);
189        if ($p == false) {
190            return $conf['start'];
191        }
192    }
193    return $p;
194}
195
196/**
197 * Creates a XHTML valid linkid from a given headline title
198 *
199 * @param string  $title   The headline title
200 * @param array   $check   Existing IDs (title => number)
201 * @author Andreas Gohr <andi@splitbrain.org>
202 */
203function sectionID($title,&$check) {
204    $title = str_replace(array(':','.'),'',cleanID($title));
205    $new = ltrim($title,'0123456789_-');
206    if(empty($new)){
207        $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline
208    }else{
209        $title = $new;
210    }
211
212    if(is_array($check)){
213        // make sure tiles are unique
214        if (!array_key_exists ($title,$check)) {
215           $check[$title] = 0;
216        } else {
217           $title .= ++ $check[$title];
218        }
219    }
220
221    return $title;
222}
223
224
225/**
226 * Wiki page existence check
227 *
228 * parameters as for wikiFN
229 *
230 * @author Chris Smith <chris@jalakai.co.uk>
231 */
232function page_exists($id,$rev='',$clean=true) {
233    return @file_exists(wikiFN($id,$rev,$clean));
234}
235
236/**
237 * returns the full path to the datafile specified by ID and optional revision
238 *
239 * The filename is URL encoded to protect Unicode chars
240 *
241 * @param  $raw_id  string   id of wikipage
242 * @param  $rev     string   page revision, empty string for current
243 * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
244 *                           when $id is guaranteed to have been cleaned already.
245 *
246 * @author Andreas Gohr <andi@splitbrain.org>
247 */
248function wikiFN($raw_id,$rev='',$clean=true){
249    global $conf;
250
251    global $cache_wikifn;
252    $cache = & $cache_wikifn;
253
254    if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
255        return $cache[$raw_id][$rev];
256    }
257
258    $id = $raw_id;
259
260    if ($clean) $id = cleanID($id);
261    $id = str_replace(':','/',$id);
262    if(empty($rev)){
263        $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
264    }else{
265        $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
266        if($conf['compression']){
267            //test for extensions here, we want to read both compressions
268            if (@file_exists($fn . '.gz')){
269                $fn .= '.gz';
270            }else if(@file_exists($fn . '.bz2')){
271                $fn .= '.bz2';
272            }else{
273                //file doesnt exist yet, so we take the configured extension
274                $fn .= '.' . $conf['compression'];
275            }
276        }
277    }
278
279    if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); }
280    $cache[$raw_id][$rev] = $fn;
281    return $fn;
282}
283
284/**
285 * Returns the full path to the file for locking the page while editing.
286 *
287 * @author Ben Coburn <btcoburn@silicodon.net>
288 */
289function wikiLockFN($id) {
290    global $conf;
291    return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
292}
293
294
295/**
296 * returns the full path to the meta file specified by ID and extension
297 *
298 * The filename is URL encoded to protect Unicode chars
299 *
300 * @author Steven Danz <steven-danz@kc.rr.com>
301 */
302function metaFN($id,$ext){
303    global $conf;
304    $id = cleanID($id);
305    $id = str_replace(':','/',$id);
306    $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
307    return $fn;
308}
309
310/**
311 * returns an array of full paths to all metafiles of a given ID
312 *
313 * @author Esther Brunner <esther@kaffeehaus.ch>
314 * @author Michael Hamann <michael@content-space.de>
315 */
316function metaFiles($id){
317    $basename = metaFN($id, '');
318    $files    = glob($basename.'.*', GLOB_MARK);
319    // filter files like foo.bar.meta when $id == 'foo'
320    return    $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array();
321}
322
323/**
324 * returns the full path to the mediafile specified by ID
325 *
326 * The filename is URL encoded to protect Unicode chars
327 *
328 * @author Andreas Gohr <andi@splitbrain.org>
329 */
330function mediaFN($id){
331    global $conf;
332    $id = cleanID($id);
333    $id = str_replace(':','/',$id);
334    $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
335    return $fn;
336}
337
338/**
339 * Returns the full filepath to a localized textfile if local
340 * version isn't found the english one is returned
341 *
342 * @author Andreas Gohr <andi@splitbrain.org>
343 */
344function localeFN($id){
345    global $conf;
346    $file = DOKU_CONF.'/lang/'.$conf['lang'].'/'.$id.'.txt';
347    if(!@file_exists($file)){
348        $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt';
349        if(!@file_exists($file)){
350            //fall back to english
351            $file = DOKU_INC.'inc/lang/en/'.$id.'.txt';
352        }
353    }
354    return $file;
355}
356
357/**
358 * Resolve relative paths in IDs
359 *
360 * Do not call directly use resolve_mediaid or resolve_pageid
361 * instead
362 *
363 * Partyly based on a cleanPath function found at
364 * http://www.php.net/manual/en/function.realpath.php#57016
365 *
366 * @author <bart at mediawave dot nl>
367 */
368function resolve_id($ns,$id,$clean=true){
369    global $conf;
370
371    // some pre cleaning for useslash:
372    if($conf['useslash']) $id = str_replace('/',':',$id);
373
374    // if the id starts with a dot we need to handle the
375    // relative stuff
376    if($id{0} == '.'){
377        // normalize initial dots without a colon
378        $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
379        // prepend the current namespace
380        $id = $ns.':'.$id;
381
382        // cleanup relatives
383        $result = array();
384        $pathA  = explode(':', $id);
385        if (!$pathA[0]) $result[] = '';
386        foreach ($pathA AS $key => $dir) {
387            if ($dir == '..') {
388                if (end($result) == '..') {
389                    $result[] = '..';
390                } elseif (!array_pop($result)) {
391                    $result[] = '..';
392                }
393            } elseif ($dir && $dir != '.') {
394                $result[] = $dir;
395            }
396        }
397        if (!end($pathA)) $result[] = '';
398        $id = implode(':', $result);
399    }elseif($ns !== false && strpos($id,':') === false){
400        //if link contains no namespace. add current namespace (if any)
401        $id = $ns.':'.$id;
402    }
403
404    if($clean) $id = cleanID($id);
405    return $id;
406}
407
408/**
409 * Returns a full media id
410 *
411 * @author Andreas Gohr <andi@splitbrain.org>
412 */
413function resolve_mediaid($ns,&$page,&$exists){
414    $page   = resolve_id($ns,$page);
415    $file   = mediaFN($page);
416    $exists = @file_exists($file);
417}
418
419/**
420 * Returns a full page id
421 *
422 * @author Andreas Gohr <andi@splitbrain.org>
423 */
424function resolve_pageid($ns,&$page,&$exists){
425    global $conf;
426    $exists = false;
427
428    //keep hashlink if exists then clean both parts
429    if (strpos($page,'#')) {
430        list($page,$hash) = explode('#',$page,2);
431    } else {
432        $hash = '';
433    }
434    $hash = cleanID($hash);
435    $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
436
437    // get filename (calls clean itself)
438    $file = wikiFN($page);
439
440    // if ends with colon or slash we have a namespace link
441    if(in_array(substr($page,-1), array(':', ';')) ||
442       ($conf['useslash'] && substr($page,-1) == '/')){
443        if(page_exists($page.$conf['start'])){
444            // start page inside namespace
445            $page = $page.$conf['start'];
446            $exists = true;
447        }elseif(page_exists($page.noNS(cleanID($page)))){
448            // page named like the NS inside the NS
449            $page = $page.noNS(cleanID($page));
450            $exists = true;
451        }elseif(page_exists($page)){
452            // page like namespace exists
453            $page = $page;
454            $exists = true;
455        }else{
456            // fall back to default
457            $page = $page.$conf['start'];
458        }
459    }else{
460        //check alternative plural/nonplural form
461        if(!@file_exists($file)){
462            if( $conf['autoplural'] ){
463                if(substr($page,-1) == 's'){
464                    $try = substr($page,0,-1);
465                }else{
466                    $try = $page.'s';
467                }
468                if(page_exists($try)){
469                    $page   = $try;
470                    $exists = true;
471                }
472            }
473        }else{
474            $exists = true;
475        }
476    }
477
478    // now make sure we have a clean page
479    $page = cleanID($page);
480
481    //add hash if any
482    if(!empty($hash)) $page .= '#'.$hash;
483}
484
485/**
486 * Returns the name of a cachefile from given data
487 *
488 * The needed directory is created by this function!
489 *
490 * @author Andreas Gohr <andi@splitbrain.org>
491 *
492 * @param string $data  This data is used to create a unique md5 name
493 * @param string $ext   This is appended to the filename if given
494 * @return string       The filename of the cachefile
495 */
496function getCacheName($data,$ext=''){
497    global $conf;
498    $md5  = md5($data);
499    $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
500    io_makeFileDir($file);
501    return $file;
502}
503
504/**
505 * Checks a pageid against $conf['hidepages']
506 *
507 * @author Andreas Gohr <gohr@cosmocode.de>
508 */
509function isHiddenPage($id){
510    global $conf;
511    global $ACT;
512    if(empty($conf['hidepages'])) return false;
513    if($ACT == 'admin') return false;
514
515    if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){
516        return true;
517    }
518    return false;
519}
520
521/**
522 * Reverse of isHiddenPage
523 *
524 * @author Andreas Gohr <gohr@cosmocode.de>
525 */
526function isVisiblePage($id){
527    return !isHiddenPage($id);
528}
529
530/**
531 * Format an id for output to a user
532 *
533 * Namespaces are denoted by a trailing “:*”. The root namespace is
534 * “*”. Output is escaped.
535 *
536 * @author Adrian Lang <lang@cosmocode.de>
537 */
538
539function prettyprint_id($id) {
540    if (!$id || $id === ':') {
541        return '*';
542    }
543    if ((substr($id, -1, 1) === ':')) {
544        $id .= '*';
545    }
546    return hsc($id);
547}
548
549/**
550 * Encode a UTF-8 filename to use on any filesystem
551 *
552 * Uses the 'fnencode' option to determine encoding
553 *
554 * When the second parameter is true the string will
555 * be encoded only if non ASCII characters are detected -
556 * This makes it safe to run it multiple times on the
557 * same string (default is true)
558 *
559 * @author Andreas Gohr <andi@splitbrain.org>
560 * @see    urlencode
561 */
562function utf8_encodeFN($file,$safe=true){
563    global $conf;
564    if($conf['fnencode'] == 'utf-8') return $file;
565
566    if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){
567        return $file;
568    }
569
570    if($conf['fnencode'] == 'safe'){
571        return SafeFN::encode($file);
572    }
573
574    $file = urlencode($file);
575    $file = str_replace('%2F','/',$file);
576    return $file;
577}
578
579/**
580 * Decode a filename back to UTF-8
581 *
582 * Uses the 'fnencode' option to determine encoding
583 *
584 * @author Andreas Gohr <andi@splitbrain.org>
585 * @see    urldecode
586 */
587function utf8_decodeFN($file){
588    global $conf;
589    if($conf['fnencode'] == 'utf-8') return $file;
590
591    if($conf['fnencode'] == 'safe'){
592        return SafeFN::decode($file);
593    }
594
595    return urldecode($file);
596}
597
598