xref: /dokuwiki/inc/pageutils.php (revision 24d494984899eca69df2a5e50d941007500ba545)
1<?php
2/**
3 * Utilities for handling pagenames
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 * @todo       Combine similar functions like {wiki,media,meta}FN()
8 */
9
10/**
11 * Fetch the an ID from request
12 *
13 * Uses either standard $_REQUEST variable or extracts it from
14 * the full request URI when userewrite is set to 2
15 *
16 * For $param='id' $conf['start'] is returned if no id was found.
17 * If the second parameter is true (default) the ID is cleaned.
18 *
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21function getID($param='id',$clean=true){
22    global $INPUT;
23    global $conf;
24
25    $id = $INPUT->str($param);
26
27    //construct page id from request URI
28    if(empty($id) && $conf['userewrite'] == 2){
29        $request = $_SERVER['REQUEST_URI'];
30        $script = '';
31
32        //get the script URL
33        if($conf['basedir']){
34            $relpath = '';
35            if($param != 'id') {
36                $relpath = 'lib/exe/';
37            }
38            $script = $conf['basedir'].$relpath.utf8_basename($_SERVER['SCRIPT_FILENAME']);
39
40        }elseif($_SERVER['PATH_INFO']){
41            $request = $_SERVER['PATH_INFO'];
42        }elseif($_SERVER['SCRIPT_NAME']){
43            $script = $_SERVER['SCRIPT_NAME'];
44        }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
45            $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
46                    $_SERVER['SCRIPT_FILENAME']);
47            $script = '/'.$script;
48        }
49
50        //clean script and request (fixes a windows problem)
51        $script  = preg_replace('/\/\/+/','/',$script);
52        $request = preg_replace('/\/\/+/','/',$request);
53
54        //remove script URL and Querystring to gain the id
55        if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
56            $id = preg_replace ('/\?.*/','',$match[1]);
57        }
58        $id = urldecode($id);
59        //strip leading slashes
60        $id = preg_replace('!^/+!','',$id);
61    }
62
63    // Namespace autolinking from URL
64    if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){
65        if(page_exists($id.$conf['start'])){
66            // start page inside namespace
67            $id = $id.$conf['start'];
68        }elseif(page_exists($id.noNS(cleanID($id)))){
69            // page named like the NS inside the NS
70            $id = $id.noNS(cleanID($id));
71        }elseif(page_exists($id)){
72            // page like namespace exists
73            $id = substr($id,0,-1);
74        }else{
75            // fall back to default
76            $id = $id.$conf['start'];
77        }
78        send_redirect(wl($id,'',true));
79    }
80
81    if($clean) $id = cleanID($id);
82    if(empty($id) && $param=='id') $id = $conf['start'];
83
84    return $id;
85}
86
87/**
88 * Remove unwanted chars from ID
89 *
90 * Cleans a given ID to only use allowed characters. Accented characters are
91 * converted to unaccented ones
92 *
93 * @author Andreas Gohr <andi@splitbrain.org>
94 * @param  string  $raw_id    The pageid to clean
95 * @param  boolean $ascii     Force ASCII
96 * @param  boolean $media     DEPRECATED
97 */
98function cleanID($raw_id,$ascii=false,$media=false){
99    global $conf;
100    static $sepcharpat = null;
101
102    global $cache_cleanid;
103    $cache = & $cache_cleanid;
104
105    // check if it's already in the memory cache
106    if (isset($cache[(string)$raw_id])) {
107        return $cache[(string)$raw_id];
108    }
109
110    $sepchar = $conf['sepchar'];
111    if($sepcharpat == null) // build string only once to save clock cycles
112        $sepcharpat = '#\\'.$sepchar.'+#';
113
114    $id = trim((string)$raw_id);
115    $id = utf8_strtolower($id);
116
117    //alternative namespace seperator
118    $id = strtr($id,';',':');
119    if($conf['useslash']){
120        $id = strtr($id,'/',':');
121    }else{
122        $id = strtr($id,'/',$sepchar);
123    }
124
125    if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
126    if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
127
128    //remove specials
129    $id = utf8_stripspecials($id,$sepchar,'\*');
130
131    if($ascii) $id = utf8_strip($id);
132
133    //clean up
134    $id = preg_replace($sepcharpat,$sepchar,$id);
135    $id = preg_replace('#:+#',':',$id);
136    $id = trim($id,':._-');
137    $id = preg_replace('#:[:\._\-]+#',':',$id);
138    $id = preg_replace('#[:\._\-]+:#',':',$id);
139
140    $cache[(string)$raw_id] = $id;
141    return($id);
142}
143
144/**
145 * Return namespacepart of a wiki ID
146 *
147 * @author Andreas Gohr <andi@splitbrain.org>
148 */
149function getNS($id){
150    $pos = strrpos((string)$id,':');
151    if($pos!==false){
152        return substr((string)$id,0,$pos);
153    }
154    return false;
155}
156
157/**
158 * Returns the ID without the namespace
159 *
160 * @author Andreas Gohr <andi@splitbrain.org>
161 */
162function noNS($id) {
163    $pos = strrpos($id, ':');
164    if ($pos!==false) {
165        return substr($id, $pos+1);
166    } else {
167        return $id;
168    }
169}
170
171/**
172 * Returns the current namespace
173 *
174 * @author Nathan Fritz <fritzn@crown.edu>
175 */
176function curNS($id) {
177    return noNS(getNS($id));
178}
179
180/**
181 * Returns the ID without the namespace or current namespace for 'start' pages
182 *
183 * @author Nathan Fritz <fritzn@crown.edu>
184 */
185function noNSorNS($id) {
186    global $conf;
187
188    $p = noNS($id);
189    if ($p == $conf['start'] || $p == false) {
190        $p = curNS($id);
191        if ($p == false) {
192            return $conf['start'];
193        }
194    }
195    return $p;
196}
197
198/**
199 * Creates a XHTML valid linkid from a given headline title
200 *
201 * @param string  $title   The headline title
202 * @param array   $check   Existing IDs (title => number)
203 * @author Andreas Gohr <andi@splitbrain.org>
204 */
205function sectionID($title,&$check) {
206    $title = str_replace(array(':','.'),'',cleanID($title));
207    $new = ltrim($title,'0123456789_-');
208    if(empty($new)){
209        $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline
210    }else{
211        $title = $new;
212    }
213
214    if(is_array($check)){
215        // make sure tiles are unique
216        if (!array_key_exists ($title,$check)) {
217            $check[$title] = 0;
218        } else {
219            $title .= ++ $check[$title];
220        }
221    }
222
223    return $title;
224}
225
226
227/**
228 * Wiki page existence check
229 *
230 * parameters as for wikiFN
231 *
232 * @author Chris Smith <chris@jalakai.co.uk>
233 */
234function page_exists($id,$rev='',$clean=true) {
235    return @file_exists(wikiFN($id,$rev,$clean));
236}
237
238/**
239 * returns the full path to the datafile specified by ID and optional revision
240 *
241 * The filename is URL encoded to protect Unicode chars
242 *
243 * @param  $raw_id  string   id of wikipage
244 * @param  $rev     string   page revision, empty string for current
245 * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
246 *                           when $id is guaranteed to have been cleaned already.
247 *
248 * @author Andreas Gohr <andi@splitbrain.org>
249 */
250function wikiFN($raw_id,$rev='',$clean=true){
251    global $conf;
252
253    global $cache_wikifn;
254    $cache = & $cache_wikifn;
255
256    if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
257        return $cache[$raw_id][$rev];
258    }
259
260    $id = $raw_id;
261
262    if ($clean) $id = cleanID($id);
263    $id = str_replace(':','/',$id);
264    if(empty($rev)){
265        $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
266    }else{
267        $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
268        if($conf['compression']){
269            //test for extensions here, we want to read both compressions
270            if (@file_exists($fn . '.gz')){
271                $fn .= '.gz';
272            }else if(@file_exists($fn . '.bz2')){
273                $fn .= '.bz2';
274            }else{
275                //file doesnt exist yet, so we take the configured extension
276                $fn .= '.' . $conf['compression'];
277            }
278        }
279    }
280
281    if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); }
282    $cache[$raw_id][$rev] = $fn;
283    return $fn;
284}
285
286/**
287 * Returns the full path to the file for locking the page while editing.
288 *
289 * @author Ben Coburn <btcoburn@silicodon.net>
290 */
291function wikiLockFN($id) {
292    global $conf;
293    return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
294}
295
296
297/**
298 * returns the full path to the meta file specified by ID and extension
299 *
300 * @author Steven Danz <steven-danz@kc.rr.com>
301 */
302function metaFN($id,$ext){
303    global $conf;
304    $id = cleanID($id);
305    $id = str_replace(':','/',$id);
306    $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
307    return $fn;
308}
309
310/**
311 * returns the full path to the media's meta file specified by ID and extension
312 *
313 * @author Kate Arzamastseva <pshns@ukr.net>
314 */
315function mediaMetaFN($id,$ext){
316    global $conf;
317    $id = cleanID($id);
318    $id = str_replace(':','/',$id);
319    $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext;
320    return $fn;
321}
322
323/**
324 * returns an array of full paths to all metafiles of a given ID
325 *
326 * @author Esther Brunner <esther@kaffeehaus.ch>
327 * @author Michael Hamann <michael@content-space.de>
328 */
329function metaFiles($id){
330    $basename = metaFN($id, '');
331    $files    = glob($basename.'.*', GLOB_MARK);
332    // filter files like foo.bar.meta when $id == 'foo'
333    return    $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array();
334}
335
336/**
337 * returns the full path to the mediafile specified by ID
338 *
339 * The filename is URL encoded to protect Unicode chars
340 *
341 * @author Andreas Gohr <andi@splitbrain.org>
342 * @author Kate Arzamastseva <pshns@ukr.net>
343 */
344function mediaFN($id, $rev=''){
345    global $conf;
346    $id = cleanID($id);
347    $id = str_replace(':','/',$id);
348    if(empty($rev)){
349        $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
350    }else{
351        $ext = mimetype($id);
352        $name = substr($id,0, -1*strlen($ext[0])-1);
353        $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]);
354    }
355    return $fn;
356}
357
358/**
359 * Returns the full filepath to a localized file if local
360 * version isn't found the english one is returned
361 *
362 * @param  string $id  The id of the local file
363 * @param  string $ext The file extension (usually txt)
364 * @author Andreas Gohr <andi@splitbrain.org>
365 */
366function localeFN($id,$ext='txt'){
367    global $conf;
368    $file = DOKU_CONF.'/lang/'.$conf['lang'].'/'.$id.'.'.$ext;
369    if(!@file_exists($file)){
370        $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext;
371        if(!@file_exists($file)){
372            //fall back to english
373            $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext;
374        }
375    }
376    return $file;
377}
378
379/**
380 * Resolve relative paths in IDs
381 *
382 * Do not call directly use resolve_mediaid or resolve_pageid
383 * instead
384 *
385 * Partyly based on a cleanPath function found at
386 * http://www.php.net/manual/en/function.realpath.php#57016
387 *
388 * @author <bart at mediawave dot nl>
389 */
390function resolve_id($ns,$id,$clean=true){
391    global $conf;
392
393    // some pre cleaning for useslash:
394    if($conf['useslash']) $id = str_replace('/',':',$id);
395
396    // if the id starts with a dot we need to handle the
397    // relative stuff
398    if($id{0} == '.'){
399        // normalize initial dots without a colon
400        $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
401        // prepend the current namespace
402        $id = $ns.':'.$id;
403
404        // cleanup relatives
405        $result = array();
406        $pathA  = explode(':', $id);
407        if (!$pathA[0]) $result[] = '';
408        foreach ($pathA AS $key => $dir) {
409            if ($dir == '..') {
410                if (end($result) == '..') {
411                    $result[] = '..';
412                } elseif (!array_pop($result)) {
413                    $result[] = '..';
414                }
415            } elseif ($dir && $dir != '.') {
416                $result[] = $dir;
417            }
418        }
419        if (!end($pathA)) $result[] = '';
420        $id = implode(':', $result);
421    }elseif($ns !== false && strpos($id,':') === false){
422        //if link contains no namespace. add current namespace (if any)
423        $id = $ns.':'.$id;
424    }
425
426    if($clean) $id = cleanID($id);
427    return $id;
428}
429
430/**
431 * Returns a full media id
432 *
433 * @author Andreas Gohr <andi@splitbrain.org>
434 */
435function resolve_mediaid($ns,&$page,&$exists){
436    $page   = resolve_id($ns,$page);
437    $file   = mediaFN($page);
438    $exists = @file_exists($file);
439}
440
441/**
442 * Returns a full page id
443 *
444 * @author Andreas Gohr <andi@splitbrain.org>
445 */
446function resolve_pageid($ns,&$page,&$exists){
447    global $conf;
448    global $ID;
449    $exists = false;
450
451    //empty address should point to current page
452    if ($page === "") {
453        $page = $ID;
454    }
455
456    //keep hashlink if exists then clean both parts
457    if (strpos($page,'#')) {
458        list($page,$hash) = explode('#',$page,2);
459    } else {
460        $hash = '';
461    }
462    $hash = cleanID($hash);
463    $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
464
465    // get filename (calls clean itself)
466    $file = wikiFN($page);
467
468    // if ends with colon or slash we have a namespace link
469    if(in_array(substr($page,-1), array(':', ';')) ||
470       ($conf['useslash'] && substr($page,-1) == '/')){
471        if(page_exists($page.$conf['start'])){
472            // start page inside namespace
473            $page = $page.$conf['start'];
474            $exists = true;
475        }elseif(page_exists($page.noNS(cleanID($page)))){
476            // page named like the NS inside the NS
477            $page = $page.noNS(cleanID($page));
478            $exists = true;
479        }elseif(page_exists($page)){
480            // page like namespace exists
481            $page = $page;
482            $exists = true;
483        }else{
484            // fall back to default
485            $page = $page.$conf['start'];
486        }
487    }else{
488        //check alternative plural/nonplural form
489        if(!@file_exists($file)){
490            if( $conf['autoplural'] ){
491                if(substr($page,-1) == 's'){
492                    $try = substr($page,0,-1);
493                }else{
494                    $try = $page.'s';
495                }
496                if(page_exists($try)){
497                    $page   = $try;
498                    $exists = true;
499                }
500            }
501        }else{
502            $exists = true;
503        }
504    }
505
506    // now make sure we have a clean page
507    $page = cleanID($page);
508
509    //add hash if any
510    if(!empty($hash)) $page .= '#'.$hash;
511}
512
513/**
514 * Returns the name of a cachefile from given data
515 *
516 * The needed directory is created by this function!
517 *
518 * @author Andreas Gohr <andi@splitbrain.org>
519 *
520 * @param string $data  This data is used to create a unique md5 name
521 * @param string $ext   This is appended to the filename if given
522 * @return string       The filename of the cachefile
523 */
524function getCacheName($data,$ext=''){
525    global $conf;
526    $md5  = md5($data);
527    $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
528    io_makeFileDir($file);
529    return $file;
530}
531
532/**
533 * Checks a pageid against $conf['hidepages']
534 *
535 * @author Andreas Gohr <gohr@cosmocode.de>
536 */
537function isHiddenPage($id){
538    global $conf;
539    global $ACT;
540    if(empty($conf['hidepages'])) return false;
541    if($ACT == 'admin') return false;
542
543    if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){
544        return true;
545    }
546    return false;
547}
548
549/**
550 * Reverse of isHiddenPage
551 *
552 * @author Andreas Gohr <gohr@cosmocode.de>
553 */
554function isVisiblePage($id){
555    return !isHiddenPage($id);
556}
557
558/**
559 * Format an id for output to a user
560 *
561 * Namespaces are denoted by a trailing “:*”. The root namespace is
562 * “*”. Output is escaped.
563 *
564 * @author Adrian Lang <lang@cosmocode.de>
565 */
566
567function prettyprint_id($id) {
568    if (!$id || $id === ':') {
569        return '*';
570    }
571    if ((substr($id, -1, 1) === ':')) {
572        $id .= '*';
573    }
574    return hsc($id);
575}
576
577/**
578 * Encode a UTF-8 filename to use on any filesystem
579 *
580 * Uses the 'fnencode' option to determine encoding
581 *
582 * When the second parameter is true the string will
583 * be encoded only if non ASCII characters are detected -
584 * This makes it safe to run it multiple times on the
585 * same string (default is true)
586 *
587 * @author Andreas Gohr <andi@splitbrain.org>
588 * @see    urlencode
589 */
590function utf8_encodeFN($file,$safe=true){
591    global $conf;
592    if($conf['fnencode'] == 'utf-8') return $file;
593
594    if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){
595        return $file;
596    }
597
598    if($conf['fnencode'] == 'safe'){
599        return SafeFN::encode($file);
600    }
601
602    $file = urlencode($file);
603    $file = str_replace('%2F','/',$file);
604    return $file;
605}
606
607/**
608 * Decode a filename back to UTF-8
609 *
610 * Uses the 'fnencode' option to determine encoding
611 *
612 * @author Andreas Gohr <andi@splitbrain.org>
613 * @see    urldecode
614 */
615function utf8_decodeFN($file){
616    global $conf;
617    if($conf['fnencode'] == 'utf-8') return $file;
618
619    if($conf['fnencode'] == 'safe'){
620        return SafeFN::decode($file);
621    }
622
623    return urldecode($file);
624}
625
626/**
627 * Find a page in the current namespace (determined from $ID) or any
628 * higher namespace
629 *
630 * Used for sidebars, but can be used other stuff as well
631 *
632 * @todo   add event hook
633 * @param  string $page the pagename you're looking for
634 * @return string|false the full page id of the found page, false if any
635 */
636function page_findnearest($page){
637    global $ID;
638
639    $ns = $ID;
640    do {
641        $ns = getNS($ns);
642        $pageid = ltrim("$ns:$page",':');
643        if(page_exists($pageid)){
644            return $pageid;
645        }
646    } while($ns);
647
648    return false;
649}
650