xref: /dokuwiki/inc/pageutils.php (revision 2400ddcb0585bd4bba5d4abeb1be8e2f4ebc56d6)
1<?php
2/**
3 * Utilities for handling pagenames
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 * @todo       Combine similar functions like {wiki,media,meta}FN()
8 */
9
10/**
11 * Fetch the an ID from request
12 *
13 * Uses either standard $_REQUEST variable or extracts it from
14 * the full request URI when userewrite is set to 2
15 *
16 * For $param='id' $conf['start'] is returned if no id was found.
17 * If the second parameter is true (default) the ID is cleaned.
18 *
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21function getID($param='id',$clean=true){
22    global $INPUT;
23    global $conf;
24    global $ACT;
25
26    $id = $INPUT->str($param);
27
28    //construct page id from request URI
29    if(empty($id) && $conf['userewrite'] == 2){
30        $request = $_SERVER['REQUEST_URI'];
31        $script = '';
32
33        //get the script URL
34        if($conf['basedir']){
35            $relpath = '';
36            if($param != 'id') {
37                $relpath = 'lib/exe/';
38            }
39            $script = $conf['basedir'].$relpath.utf8_basename($_SERVER['SCRIPT_FILENAME']);
40
41        }elseif($_SERVER['PATH_INFO']){
42            $request = $_SERVER['PATH_INFO'];
43        }elseif($_SERVER['SCRIPT_NAME']){
44            $script = $_SERVER['SCRIPT_NAME'];
45        }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
46            $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
47                    $_SERVER['SCRIPT_FILENAME']);
48            $script = '/'.$script;
49        }
50
51        //clean script and request (fixes a windows problem)
52        $script  = preg_replace('/\/\/+/','/',$script);
53        $request = preg_replace('/\/\/+/','/',$request);
54
55        //remove script URL and Querystring to gain the id
56        if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
57            $id = preg_replace ('/\?.*/','',$match[1]);
58        }
59        $id = urldecode($id);
60        //strip leading slashes
61        $id = preg_replace('!^/+!','',$id);
62    }
63
64    // Namespace autolinking from URL
65    if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){
66        if(page_exists($id.$conf['start'])){
67            // start page inside namespace
68            $id = $id.$conf['start'];
69        }elseif(page_exists($id.noNS(cleanID($id)))){
70            // page named like the NS inside the NS
71            $id = $id.noNS(cleanID($id));
72        }elseif(page_exists($id)){
73            // page like namespace exists
74            $id = substr($id,0,-1);
75        }else{
76            // fall back to default
77            $id = $id.$conf['start'];
78        }
79        if (isset($ACT) && $ACT === 'show') send_redirect(wl($id,'',true));
80    }
81
82    if($clean) $id = cleanID($id);
83    if(empty($id) && $param=='id') $id = $conf['start'];
84
85    return $id;
86}
87
88/**
89 * Remove unwanted chars from ID
90 *
91 * Cleans a given ID to only use allowed characters. Accented characters are
92 * converted to unaccented ones
93 *
94 * @author Andreas Gohr <andi@splitbrain.org>
95 * @param  string  $raw_id    The pageid to clean
96 * @param  boolean $ascii     Force ASCII
97 * @return string cleaned id
98 */
99function cleanID($raw_id,$ascii=false){
100    global $conf;
101    static $sepcharpat = null;
102
103    global $cache_cleanid;
104    $cache = & $cache_cleanid;
105
106    // check if it's already in the memory cache
107    if (isset($cache[(string)$raw_id])) {
108        return $cache[(string)$raw_id];
109    }
110
111    $sepchar = $conf['sepchar'];
112    if($sepcharpat == null) // build string only once to save clock cycles
113        $sepcharpat = '#\\'.$sepchar.'+#';
114
115    $id = trim((string)$raw_id);
116    $id = utf8_strtolower($id);
117
118    //alternative namespace seperator
119    if($conf['useslash']){
120        $id = strtr($id,';/','::');
121    }else{
122        $id = strtr($id,';/',':'.$sepchar);
123    }
124
125    if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
126    if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
127
128    //remove specials
129    $id = utf8_stripspecials($id,$sepchar,'\*');
130
131    if($ascii) $id = utf8_strip($id);
132
133    //clean up
134    $id = preg_replace($sepcharpat,$sepchar,$id);
135    $id = preg_replace('#:+#',':',$id);
136    $id = trim($id,':._-');
137    $id = preg_replace('#:[:\._\-]+#',':',$id);
138    $id = preg_replace('#[:\._\-]+:#',':',$id);
139
140    $cache[(string)$raw_id] = $id;
141    return($id);
142}
143
144/**
145 * Return namespacepart of a wiki ID
146 *
147 * @author Andreas Gohr <andi@splitbrain.org>
148 */
149function getNS($id){
150    $pos = strrpos((string)$id,':');
151    if($pos!==false){
152        return substr((string)$id,0,$pos);
153    }
154    return false;
155}
156
157/**
158 * Returns the ID without the namespace
159 *
160 * @author Andreas Gohr <andi@splitbrain.org>
161 */
162function noNS($id) {
163    $pos = strrpos($id, ':');
164    if ($pos!==false) {
165        return substr($id, $pos+1);
166    } else {
167        return $id;
168    }
169}
170
171/**
172 * Returns the current namespace
173 *
174 * @author Nathan Fritz <fritzn@crown.edu>
175 */
176function curNS($id) {
177    return noNS(getNS($id));
178}
179
180/**
181 * Returns the ID without the namespace or current namespace for 'start' pages
182 *
183 * @author Nathan Fritz <fritzn@crown.edu>
184 */
185function noNSorNS($id) {
186    global $conf;
187
188    $p = noNS($id);
189    if ($p == $conf['start'] || $p == false) {
190        $p = curNS($id);
191        if ($p == false) {
192            return $conf['start'];
193        }
194    }
195    return $p;
196}
197
198/**
199 * Creates a XHTML valid linkid from a given headline title
200 *
201 * @param string  $title   The headline title
202 * @param array|bool   $check   Existing IDs (title => number)
203 * @return string the title
204 * @author Andreas Gohr <andi@splitbrain.org>
205 */
206function sectionID($title,&$check) {
207    $title = str_replace(array(':','.'),'',cleanID($title));
208    $new = ltrim($title,'0123456789_-');
209    if(empty($new)){
210        $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline
211    }else{
212        $title = $new;
213    }
214
215    if(is_array($check)){
216        // make sure tiles are unique
217        if (!array_key_exists ($title,$check)) {
218            $check[$title] = 0;
219        } else {
220            $title .= ++ $check[$title];
221        }
222    }
223
224    return $title;
225}
226
227
228/**
229 * Wiki page existence check
230 *
231 * parameters as for wikiFN
232 *
233 * @author Chris Smith <chris@jalakai.co.uk>
234 */
235function page_exists($id,$rev='',$clean=true) {
236    return @file_exists(wikiFN($id,$rev,$clean));
237}
238
239/**
240 * returns the full path to the datafile specified by ID and optional revision
241 *
242 * The filename is URL encoded to protect Unicode chars
243 *
244 * @param  $raw_id  string   id of wikipage
245 * @param  $rev     string   page revision, empty string for current
246 * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
247 *                           when $id is guaranteed to have been cleaned already.
248 * @return string full path
249 *
250 * @author Andreas Gohr <andi@splitbrain.org>
251 */
252function wikiFN($raw_id,$rev='',$clean=true){
253    global $conf;
254
255    global $cache_wikifn;
256    $cache = & $cache_wikifn;
257
258    if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
259        return $cache[$raw_id][$rev];
260    }
261
262    $id = $raw_id;
263
264    if ($clean) $id = cleanID($id);
265    $id = str_replace(':','/',$id);
266    if(empty($rev)){
267        $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
268    }else{
269        $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
270        if($conf['compression']){
271            //test for extensions here, we want to read both compressions
272            if (@file_exists($fn . '.gz')){
273                $fn .= '.gz';
274            }else if(@file_exists($fn . '.bz2')){
275                $fn .= '.bz2';
276            }else{
277                //file doesnt exist yet, so we take the configured extension
278                $fn .= '.' . $conf['compression'];
279            }
280        }
281    }
282
283    if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); }
284    $cache[$raw_id][$rev] = $fn;
285    return $fn;
286}
287
288/**
289 * Returns the full path to the file for locking the page while editing.
290 *
291 * @author Ben Coburn <btcoburn@silicodon.net>
292 */
293function wikiLockFN($id) {
294    global $conf;
295    return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
296}
297
298
299/**
300 * returns the full path to the meta file specified by ID and extension
301 *
302 * @author Steven Danz <steven-danz@kc.rr.com>
303 */
304function metaFN($id,$ext){
305    global $conf;
306    $id = cleanID($id);
307    $id = str_replace(':','/',$id);
308    $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
309    return $fn;
310}
311
312/**
313 * returns the full path to the media's meta file specified by ID and extension
314 *
315 * @author Kate Arzamastseva <pshns@ukr.net>
316 */
317function mediaMetaFN($id,$ext){
318    global $conf;
319    $id = cleanID($id);
320    $id = str_replace(':','/',$id);
321    $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext;
322    return $fn;
323}
324
325/**
326 * returns an array of full paths to all metafiles of a given ID
327 *
328 * @author Esther Brunner <esther@kaffeehaus.ch>
329 * @author Michael Hamann <michael@content-space.de>
330 */
331function metaFiles($id){
332    $basename = metaFN($id, '');
333    $files    = glob($basename.'.*', GLOB_MARK);
334    // filter files like foo.bar.meta when $id == 'foo'
335    return    $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array();
336}
337
338/**
339 * returns the full path to the mediafile specified by ID
340 *
341 * The filename is URL encoded to protect Unicode chars
342 *
343 * @author Andreas Gohr <andi@splitbrain.org>
344 * @author Kate Arzamastseva <pshns@ukr.net>
345 */
346function mediaFN($id, $rev=''){
347    global $conf;
348    $id = cleanID($id);
349    $id = str_replace(':','/',$id);
350    if(empty($rev)){
351        $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
352    }else{
353        $ext = mimetype($id);
354        $name = substr($id,0, -1*strlen($ext[0])-1);
355        $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]);
356    }
357    return $fn;
358}
359
360/**
361 * Returns the full filepath to a localized file if local
362 * version isn't found the english one is returned
363 *
364 * @param  string $id  The id of the local file
365 * @param  string $ext The file extension (usually txt)
366 * @return string full filepath to localized file
367 * @author Andreas Gohr <andi@splitbrain.org>
368 */
369function localeFN($id,$ext='txt'){
370    global $conf;
371    $file = DOKU_CONF.'lang/'.$conf['lang'].'/'.$id.'.'.$ext;
372    if(!@file_exists($file)){
373        $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext;
374        if(!@file_exists($file)){
375            //fall back to english
376            $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext;
377        }
378    }
379    return $file;
380}
381
382/**
383 * Resolve relative paths in IDs
384 *
385 * Do not call directly use resolve_mediaid or resolve_pageid
386 * instead
387 *
388 * Partyly based on a cleanPath function found at
389 * http://www.php.net/manual/en/function.realpath.php#57016
390 *
391 * @author <bart at mediawave dot nl>
392 */
393function resolve_id($ns,$id,$clean=true){
394    global $conf;
395
396    // some pre cleaning for useslash:
397    if($conf['useslash']) $id = str_replace('/',':',$id);
398
399    // if the id starts with a dot we need to handle the
400    // relative stuff
401    if($id && $id{0} == '.'){
402        // normalize initial dots without a colon
403        $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
404        // prepend the current namespace
405        $id = $ns.':'.$id;
406
407        // cleanup relatives
408        $result = array();
409        $pathA  = explode(':', $id);
410        if (!$pathA[0]) $result[] = '';
411        foreach ($pathA AS $key => $dir) {
412            if ($dir == '..') {
413                if (end($result) == '..') {
414                    $result[] = '..';
415                } elseif (!array_pop($result)) {
416                    $result[] = '..';
417                }
418            } elseif ($dir && $dir != '.') {
419                $result[] = $dir;
420            }
421        }
422        if (!end($pathA)) $result[] = '';
423        $id = implode(':', $result);
424    }elseif($ns !== false && strpos($id,':') === false){
425        //if link contains no namespace. add current namespace (if any)
426        $id = $ns.':'.$id;
427    }
428
429    if($clean) $id = cleanID($id);
430    return $id;
431}
432
433/**
434 * Returns a full media id
435 *
436 * @author Andreas Gohr <andi@splitbrain.org>
437 */
438function resolve_mediaid($ns,&$page,&$exists){
439    $page   = resolve_id($ns,$page);
440    $file   = mediaFN($page);
441    $exists = @file_exists($file);
442}
443
444/**
445 * Returns a full page id
446 *
447 * @author Andreas Gohr <andi@splitbrain.org>
448 */
449function resolve_pageid($ns,&$page,&$exists){
450    global $conf;
451    global $ID;
452    $exists = false;
453
454    //empty address should point to current page
455    if ($page === "") {
456        $page = $ID;
457    }
458
459    //keep hashlink if exists then clean both parts
460    if (strpos($page,'#')) {
461        list($page,$hash) = explode('#',$page,2);
462    } else {
463        $hash = '';
464    }
465    $hash = cleanID($hash);
466    $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
467
468    // get filename (calls clean itself)
469    $file = wikiFN($page);
470
471    // if ends with colon or slash we have a namespace link
472    if(in_array(substr($page,-1), array(':', ';')) ||
473       ($conf['useslash'] && substr($page,-1) == '/')){
474        if(page_exists($page.$conf['start'])){
475            // start page inside namespace
476            $page = $page.$conf['start'];
477            $exists = true;
478        }elseif(page_exists($page.noNS(cleanID($page)))){
479            // page named like the NS inside the NS
480            $page = $page.noNS(cleanID($page));
481            $exists = true;
482        }elseif(page_exists($page)){
483            // page like namespace exists
484            $page = $page;
485            $exists = true;
486        }else{
487            // fall back to default
488            $page = $page.$conf['start'];
489        }
490    }else{
491        //check alternative plural/nonplural form
492        if(!@file_exists($file)){
493            if( $conf['autoplural'] ){
494                if(substr($page,-1) == 's'){
495                    $try = substr($page,0,-1);
496                }else{
497                    $try = $page.'s';
498                }
499                if(page_exists($try)){
500                    $page   = $try;
501                    $exists = true;
502                }
503            }
504        }else{
505            $exists = true;
506        }
507    }
508
509    // now make sure we have a clean page
510    $page = cleanID($page);
511
512    //add hash if any
513    if(!empty($hash)) $page .= '#'.$hash;
514}
515
516/**
517 * Returns the name of a cachefile from given data
518 *
519 * The needed directory is created by this function!
520 *
521 * @author Andreas Gohr <andi@splitbrain.org>
522 *
523 * @param string $data  This data is used to create a unique md5 name
524 * @param string $ext   This is appended to the filename if given
525 * @return string       The filename of the cachefile
526 */
527function getCacheName($data,$ext=''){
528    global $conf;
529    $md5  = md5($data);
530    $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
531    io_makeFileDir($file);
532    return $file;
533}
534
535/**
536 * Checks a pageid against $conf['hidepages']
537 *
538 * @author Andreas Gohr <gohr@cosmocode.de>
539 */
540function isHiddenPage($id){
541    $data = array(
542        'id' => $id,
543        'hidden' => false
544    );
545    trigger_event('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage');
546    return $data['hidden'];
547}
548
549/**
550 * callback checks if page is hidden
551 *
552 * @param array $data event data    see isHiddenPage()
553 */
554function _isHiddenPage(&$data) {
555    global $conf;
556    global $ACT;
557
558    if ($data['hidden']) return;
559    if(empty($conf['hidepages'])) return;
560    if($ACT == 'admin') return;
561
562    if(preg_match('/'.$conf['hidepages'].'/ui',':'.$data['id'])){
563        $data['hidden'] = true;
564    }
565}
566
567/**
568 * Reverse of isHiddenPage
569 *
570 * @author Andreas Gohr <gohr@cosmocode.de>
571 */
572function isVisiblePage($id){
573    return !isHiddenPage($id);
574}
575
576/**
577 * Format an id for output to a user
578 *
579 * Namespaces are denoted by a trailing “:*”. The root namespace is
580 * “*”. Output is escaped.
581 *
582 * @author Adrian Lang <lang@cosmocode.de>
583 */
584
585function prettyprint_id($id) {
586    if (!$id || $id === ':') {
587        return '*';
588    }
589    if ((substr($id, -1, 1) === ':')) {
590        $id .= '*';
591    }
592    return hsc($id);
593}
594
595/**
596 * Encode a UTF-8 filename to use on any filesystem
597 *
598 * Uses the 'fnencode' option to determine encoding
599 *
600 * When the second parameter is true the string will
601 * be encoded only if non ASCII characters are detected -
602 * This makes it safe to run it multiple times on the
603 * same string (default is true)
604 *
605 * @author Andreas Gohr <andi@splitbrain.org>
606 * @see    urlencode
607 */
608function utf8_encodeFN($file,$safe=true){
609    global $conf;
610    if($conf['fnencode'] == 'utf-8') return $file;
611
612    if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){
613        return $file;
614    }
615
616    if($conf['fnencode'] == 'safe'){
617        return SafeFN::encode($file);
618    }
619
620    $file = urlencode($file);
621    $file = str_replace('%2F','/',$file);
622    return $file;
623}
624
625/**
626 * Decode a filename back to UTF-8
627 *
628 * Uses the 'fnencode' option to determine encoding
629 *
630 * @author Andreas Gohr <andi@splitbrain.org>
631 * @see    urldecode
632 */
633function utf8_decodeFN($file){
634    global $conf;
635    if($conf['fnencode'] == 'utf-8') return $file;
636
637    if($conf['fnencode'] == 'safe'){
638        return SafeFN::decode($file);
639    }
640
641    return urldecode($file);
642}
643
644/**
645 * Find a page in the current namespace (determined from $ID) or any
646 * higher namespace
647 *
648 * Used for sidebars, but can be used other stuff as well
649 *
650 * @todo   add event hook
651 * @param  string $page the pagename you're looking for
652 * @return string|false the full page id of the found page, false if any
653 */
654function page_findnearest($page){
655    if (!$page) return false;
656    global $ID;
657
658    $ns = $ID;
659    do {
660        $ns = getNS($ns);
661        $pageid = ltrim("$ns:$page",':');
662        if(page_exists($pageid)){
663            return $pageid;
664        }
665    } while($ns);
666
667    return false;
668}
669