xref: /dokuwiki/inc/pageutils.php (revision 90bee6003f3d7160fb936d01511a35eeb5bc708c)
1<?php
2/**
3 * Utilities for handling pagenames
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 * @todo       Combine similar functions like {wiki,media,meta}FN()
8 */
9
10/**
11 * Fetch the an ID from request
12 *
13 * Uses either standard $_REQUEST variable or extracts it from
14 * the full request URI when userewrite is set to 2
15 *
16 * For $param='id' $conf['start'] is returned if no id was found.
17 * If the second parameter is true (default) the ID is cleaned.
18 *
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21function getID($param='id',$clean=true){
22    global $INPUT;
23    global $conf;
24    global $ACT;
25
26    $id = $INPUT->str($param);
27
28    //construct page id from request URI
29    if(empty($id) && $conf['userewrite'] == 2){
30        $request = $_SERVER['REQUEST_URI'];
31        $script = '';
32
33        //get the script URL
34        if($conf['basedir']){
35            $relpath = '';
36            if($param != 'id') {
37                $relpath = 'lib/exe/';
38            }
39            $script = $conf['basedir'].$relpath.utf8_basename($_SERVER['SCRIPT_FILENAME']);
40
41        }elseif($_SERVER['PATH_INFO']){
42            $request = $_SERVER['PATH_INFO'];
43        }elseif($_SERVER['SCRIPT_NAME']){
44            $script = $_SERVER['SCRIPT_NAME'];
45        }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
46            $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
47                    $_SERVER['SCRIPT_FILENAME']);
48            $script = '/'.$script;
49        }
50
51        //clean script and request (fixes a windows problem)
52        $script  = preg_replace('/\/\/+/','/',$script);
53        $request = preg_replace('/\/\/+/','/',$request);
54
55        //remove script URL and Querystring to gain the id
56        if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
57            $id = preg_replace ('/\?.*/','',$match[1]);
58        }
59        $id = urldecode($id);
60        //strip leading slashes
61        $id = preg_replace('!^/+!','',$id);
62    }
63
64    // Namespace autolinking from URL
65    if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){
66        if(page_exists($id.$conf['start'])){
67            // start page inside namespace
68            $id = $id.$conf['start'];
69        }elseif(page_exists($id.noNS(cleanID($id)))){
70            // page named like the NS inside the NS
71            $id = $id.noNS(cleanID($id));
72        }elseif(page_exists($id)){
73            // page like namespace exists
74            $id = substr($id,0,-1);
75        }else{
76            // fall back to default
77            $id = $id.$conf['start'];
78        }
79        if (isset($ACT) && $ACT === 'show') send_redirect(wl($id,'',true));
80    }
81
82    if($clean) $id = cleanID($id);
83    if(empty($id) && $param=='id') $id = $conf['start'];
84
85    return $id;
86}
87
88/**
89 * Remove unwanted chars from ID
90 *
91 * Cleans a given ID to only use allowed characters. Accented characters are
92 * converted to unaccented ones
93 *
94 * @author Andreas Gohr <andi@splitbrain.org>
95 * @param  string  $raw_id    The pageid to clean
96 * @param  boolean $ascii     Force ASCII
97 */
98function cleanID($raw_id,$ascii=false){
99    global $conf;
100    static $sepcharpat = null;
101
102    global $cache_cleanid;
103    $cache = & $cache_cleanid;
104
105    // check if it's already in the memory cache
106    if (isset($cache[(string)$raw_id])) {
107        return $cache[(string)$raw_id];
108    }
109
110    $sepchar = $conf['sepchar'];
111    if($sepcharpat == null) // build string only once to save clock cycles
112        $sepcharpat = '#\\'.$sepchar.'+#';
113
114    $id = trim((string)$raw_id);
115    $id = utf8_strtolower($id);
116
117    //alternative namespace seperator
118    if($conf['useslash']){
119        $id = strtr($id,';/','::');
120    }else{
121        $id = strtr($id,';/',':'.$sepchar);
122    }
123
124    if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
125    if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
126
127    //remove specials
128    $id = utf8_stripspecials($id,$sepchar,'\*');
129
130    if($ascii) $id = utf8_strip($id);
131
132    //clean up
133    $id = preg_replace($sepcharpat,$sepchar,$id);
134    $id = preg_replace('#:+#',':',$id);
135    $id = trim($id,':._-');
136    $id = preg_replace('#:[:\._\-]+#',':',$id);
137    $id = preg_replace('#[:\._\-]+:#',':',$id);
138
139    $cache[(string)$raw_id] = $id;
140    return($id);
141}
142
143/**
144 * Return namespacepart of a wiki ID
145 *
146 * @author Andreas Gohr <andi@splitbrain.org>
147 */
148function getNS($id){
149    $pos = strrpos((string)$id,':');
150    if($pos!==false){
151        return substr((string)$id,0,$pos);
152    }
153    return false;
154}
155
156/**
157 * Returns the ID without the namespace
158 *
159 * @author Andreas Gohr <andi@splitbrain.org>
160 */
161function noNS($id) {
162    $pos = strrpos($id, ':');
163    if ($pos!==false) {
164        return substr($id, $pos+1);
165    } else {
166        return $id;
167    }
168}
169
170/**
171 * Returns the current namespace
172 *
173 * @author Nathan Fritz <fritzn@crown.edu>
174 */
175function curNS($id) {
176    return noNS(getNS($id));
177}
178
179/**
180 * Returns the ID without the namespace or current namespace for 'start' pages
181 *
182 * @author Nathan Fritz <fritzn@crown.edu>
183 */
184function noNSorNS($id) {
185    global $conf;
186
187    $p = noNS($id);
188    if ($p == $conf['start'] || $p == false) {
189        $p = curNS($id);
190        if ($p == false) {
191            return $conf['start'];
192        }
193    }
194    return $p;
195}
196
197/**
198 * Creates a XHTML valid linkid from a given headline title
199 *
200 * @param string  $title   The headline title
201 * @param array|bool   $check   Existing IDs (title => number)
202 * @return string the title
203 * @author Andreas Gohr <andi@splitbrain.org>
204 */
205function sectionID($title,&$check) {
206    $title = str_replace(array(':','.'),'',cleanID($title));
207    $new = ltrim($title,'0123456789_-');
208    if(empty($new)){
209        $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline
210    }else{
211        $title = $new;
212    }
213
214    if(is_array($check)){
215        // make sure tiles are unique
216        if (!array_key_exists ($title,$check)) {
217            $check[$title] = 0;
218        } else {
219            $title .= ++ $check[$title];
220        }
221    }
222
223    return $title;
224}
225
226
227/**
228 * Wiki page existence check
229 *
230 * parameters as for wikiFN
231 *
232 * @author Chris Smith <chris@jalakai.co.uk>
233 */
234function page_exists($id,$rev='',$clean=true, $data_at=false) {
235    if($rev !== '' && $date_at) {
236        $pagelog = new PageChangeLog($page);
237        $pagelog_rev = $pagelog->getLastRevisionAt($rev);
238        if($pagelog_rev !== false)
239            $rev = $pagelog_rev;
240    }
241    return @file_exists(wikiFN($id,$rev,$clean));
242}
243
244/**
245 * returns the full path to the datafile specified by ID and optional revision
246 *
247 * The filename is URL encoded to protect Unicode chars
248 *
249 * @param  $raw_id  string   id of wikipage
250 * @param  $rev     string   page revision, empty string for current
251 * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
252 *                           when $id is guaranteed to have been cleaned already.
253 *
254 * @author Andreas Gohr <andi@splitbrain.org>
255 */
256function wikiFN($raw_id,$rev='',$clean=true){
257    global $conf;
258
259    global $cache_wikifn;
260    $cache = & $cache_wikifn;
261
262    if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
263        return $cache[$raw_id][$rev];
264    }
265
266    $id = $raw_id;
267
268    if ($clean) $id = cleanID($id);
269    $id = str_replace(':','/',$id);
270    if(empty($rev)){
271        $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
272    }else{
273        $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
274        if($conf['compression']){
275            //test for extensions here, we want to read both compressions
276            if (@file_exists($fn . '.gz')){
277                $fn .= '.gz';
278            }else if(@file_exists($fn . '.bz2')){
279                $fn .= '.bz2';
280            }else{
281                //file doesnt exist yet, so we take the configured extension
282                $fn .= '.' . $conf['compression'];
283            }
284        }
285    }
286
287    if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); }
288    $cache[$raw_id][$rev] = $fn;
289    return $fn;
290}
291
292/**
293 * Returns the full path to the file for locking the page while editing.
294 *
295 * @author Ben Coburn <btcoburn@silicodon.net>
296 */
297function wikiLockFN($id) {
298    global $conf;
299    return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
300}
301
302
303/**
304 * returns the full path to the meta file specified by ID and extension
305 *
306 * @author Steven Danz <steven-danz@kc.rr.com>
307 */
308function metaFN($id,$ext){
309    global $conf;
310    $id = cleanID($id);
311    $id = str_replace(':','/',$id);
312    $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
313    return $fn;
314}
315
316/**
317 * returns the full path to the media's meta file specified by ID and extension
318 *
319 * @author Kate Arzamastseva <pshns@ukr.net>
320 */
321function mediaMetaFN($id,$ext){
322    global $conf;
323    $id = cleanID($id);
324    $id = str_replace(':','/',$id);
325    $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext;
326    return $fn;
327}
328
329/**
330 * returns an array of full paths to all metafiles of a given ID
331 *
332 * @author Esther Brunner <esther@kaffeehaus.ch>
333 * @author Michael Hamann <michael@content-space.de>
334 */
335function metaFiles($id){
336    $basename = metaFN($id, '');
337    $files    = glob($basename.'.*', GLOB_MARK);
338    // filter files like foo.bar.meta when $id == 'foo'
339    return    $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array();
340}
341
342/**
343 * returns the full path to the mediafile specified by ID
344 *
345 * The filename is URL encoded to protect Unicode chars
346 *
347 * @author Andreas Gohr <andi@splitbrain.org>
348 * @author Kate Arzamastseva <pshns@ukr.net>
349 */
350function mediaFN($id, $rev=''){
351    global $conf;
352    $id = cleanID($id);
353    $id = str_replace(':','/',$id);
354    if(empty($rev)){
355        $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
356    }else{
357        $ext = mimetype($id);
358        $name = substr($id,0, -1*strlen($ext[0])-1);
359        $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]);
360    }
361    return $fn;
362}
363
364/**
365 * Returns the full filepath to a localized file if local
366 * version isn't found the english one is returned
367 *
368 * @param  string $id  The id of the local file
369 * @param  string $ext The file extension (usually txt)
370 * @author Andreas Gohr <andi@splitbrain.org>
371 */
372function localeFN($id,$ext='txt'){
373    global $conf;
374    $file = DOKU_CONF.'lang/'.$conf['lang'].'/'.$id.'.'.$ext;
375    if(!@file_exists($file)){
376        $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext;
377        if(!@file_exists($file)){
378            //fall back to english
379            $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext;
380        }
381    }
382    return $file;
383}
384
385/**
386 * Resolve relative paths in IDs
387 *
388 * Do not call directly use resolve_mediaid or resolve_pageid
389 * instead
390 *
391 * Partyly based on a cleanPath function found at
392 * http://www.php.net/manual/en/function.realpath.php#57016
393 *
394 * @author <bart at mediawave dot nl>
395 */
396function resolve_id($ns,$id,$clean=true){
397    global $conf;
398
399    // some pre cleaning for useslash:
400    if($conf['useslash']) $id = str_replace('/',':',$id);
401
402    // if the id starts with a dot we need to handle the
403    // relative stuff
404    if($id && $id{0} == '.'){
405        // normalize initial dots without a colon
406        $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
407        // prepend the current namespace
408        $id = $ns.':'.$id;
409
410        // cleanup relatives
411        $result = array();
412        $pathA  = explode(':', $id);
413        if (!$pathA[0]) $result[] = '';
414        foreach ($pathA AS $key => $dir) {
415            if ($dir == '..') {
416                if (end($result) == '..') {
417                    $result[] = '..';
418                } elseif (!array_pop($result)) {
419                    $result[] = '..';
420                }
421            } elseif ($dir && $dir != '.') {
422                $result[] = $dir;
423            }
424        }
425        if (!end($pathA)) $result[] = '';
426        $id = implode(':', $result);
427    }elseif($ns !== false && strpos($id,':') === false){
428        //if link contains no namespace. add current namespace (if any)
429        $id = $ns.':'.$id;
430    }
431
432    if($clean) $id = cleanID($id);
433    return $id;
434}
435
436/**
437 * Returns a full media id
438 *
439 * @author Andreas Gohr <andi@splitbrain.org>
440 */
441function resolve_mediaid($ns,&$page,&$exists,$rev='',$date_at=false){
442    if($rev !== '' &&  $date_at){
443        $medialog = new MediaChangeLog($media_id);
444        $medialog_rev = $medialog->getLastRevisionAt($rev);
445        if($medialog_rev !== false) {
446            $rev = $medialog_rev;
447        }
448    }
449    $page   = resolve_id($ns,$page);
450    $file   = mediaFN($page,$rev);
451    $exists = @file_exists($file);
452}
453
454/**
455 * Returns a full page id
456 *
457 * @author Andreas Gohr <andi@splitbrain.org>
458 */
459function resolve_pageid($ns,&$page,&$exists,$rev='',$date_at=false ){
460    global $conf;
461    global $ID;
462    $exists = false;
463
464    //empty address should point to current page
465    if ($page === "") {
466        $page = $ID;
467    }
468
469    //keep hashlink if exists then clean both parts
470    if (strpos($page,'#')) {
471        list($page,$hash) = explode('#',$page,2);
472    } else {
473        $hash = '';
474    }
475    $hash = cleanID($hash);
476    $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
477
478    // get filename (calls clean itself)
479    if($rev !== '' && $date_at) {
480        $pagelog = new PageChangeLog($page);
481        $pagelog_rev = $pagelog->getLastRevisionAt($rev);
482        if($pagelog_rev !== false)//something found
483           $rev  = $pagelog_rev;
484    }
485    $file = wikiFN($page,$rev);
486
487    // if ends with colon or slash we have a namespace link
488    if(in_array(substr($page,-1), array(':', ';')) ||
489       ($conf['useslash'] && substr($page,-1) == '/')){
490        if(page_exists($page.$conf['start'],$rev,true,$date_at)){
491            // start page inside namespace
492            $page = $page.$conf['start'];
493            $exists = true;
494        }elseif(page_exists($page.noNS(cleanID($page)),$rev,true,$date_at)){
495            // page named like the NS inside the NS
496            $page = $page.noNS(cleanID($page));
497            $exists = true;
498        }elseif(page_exists($page,$rev,true,$date_at)){
499            // page like namespace exists
500            $page = $page;
501            $exists = true;
502        }else{
503            // fall back to default
504            $page = $page.$conf['start'];
505        }
506    }else{
507        //check alternative plural/nonplural form
508        if(!@file_exists($file)){
509            if( $conf['autoplural'] ){
510                if(substr($page,-1) == 's'){
511                    $try = substr($page,0,-1);
512                }else{
513                    $try = $page.'s';
514                }
515                if(page_exists($try,$rev,true,$date_at)){
516                    $page   = $try;
517                    $exists = true;
518                }
519            }
520        }else{
521            $exists = true;
522        }
523    }
524
525    // now make sure we have a clean page
526    $page = cleanID($page);
527
528    //add hash if any
529    if(!empty($hash)) $page .= '#'.$hash;
530}
531
532/**
533 * Returns the name of a cachefile from given data
534 *
535 * The needed directory is created by this function!
536 *
537 * @author Andreas Gohr <andi@splitbrain.org>
538 *
539 * @param string $data  This data is used to create a unique md5 name
540 * @param string $ext   This is appended to the filename if given
541 * @return string       The filename of the cachefile
542 */
543function getCacheName($data,$ext=''){
544    global $conf;
545    $md5  = md5($data);
546    $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
547    io_makeFileDir($file);
548    return $file;
549}
550
551/**
552 * Checks a pageid against $conf['hidepages']
553 *
554 * @author Andreas Gohr <gohr@cosmocode.de>
555 */
556function isHiddenPage($id){
557    $data = array(
558        'id' => $id,
559        'hidden' => false
560    );
561    trigger_event('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage');
562    return $data['hidden'];
563}
564
565function _isHiddenPage(&$data) {
566    global $conf;
567    global $ACT;
568
569    if ($data['hidden']) return;
570    if(empty($conf['hidepages'])) return;
571    if($ACT == 'admin') return;
572
573    if(preg_match('/'.$conf['hidepages'].'/ui',':'.$data['id'])){
574        $data['hidden'] = true;
575    }
576}
577
578/**
579 * Reverse of isHiddenPage
580 *
581 * @author Andreas Gohr <gohr@cosmocode.de>
582 */
583function isVisiblePage($id){
584    return !isHiddenPage($id);
585}
586
587/**
588 * Format an id for output to a user
589 *
590 * Namespaces are denoted by a trailing “:*”. The root namespace is
591 * “*”. Output is escaped.
592 *
593 * @author Adrian Lang <lang@cosmocode.de>
594 */
595
596function prettyprint_id($id) {
597    if (!$id || $id === ':') {
598        return '*';
599    }
600    if ((substr($id, -1, 1) === ':')) {
601        $id .= '*';
602    }
603    return hsc($id);
604}
605
606/**
607 * Encode a UTF-8 filename to use on any filesystem
608 *
609 * Uses the 'fnencode' option to determine encoding
610 *
611 * When the second parameter is true the string will
612 * be encoded only if non ASCII characters are detected -
613 * This makes it safe to run it multiple times on the
614 * same string (default is true)
615 *
616 * @author Andreas Gohr <andi@splitbrain.org>
617 * @see    urlencode
618 */
619function utf8_encodeFN($file,$safe=true){
620    global $conf;
621    if($conf['fnencode'] == 'utf-8') return $file;
622
623    if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){
624        return $file;
625    }
626
627    if($conf['fnencode'] == 'safe'){
628        return SafeFN::encode($file);
629    }
630
631    $file = urlencode($file);
632    $file = str_replace('%2F','/',$file);
633    return $file;
634}
635
636/**
637 * Decode a filename back to UTF-8
638 *
639 * Uses the 'fnencode' option to determine encoding
640 *
641 * @author Andreas Gohr <andi@splitbrain.org>
642 * @see    urldecode
643 */
644function utf8_decodeFN($file){
645    global $conf;
646    if($conf['fnencode'] == 'utf-8') return $file;
647
648    if($conf['fnencode'] == 'safe'){
649        return SafeFN::decode($file);
650    }
651
652    return urldecode($file);
653}
654
655/**
656 * Find a page in the current namespace (determined from $ID) or any
657 * higher namespace
658 *
659 * Used for sidebars, but can be used other stuff as well
660 *
661 * @todo   add event hook
662 * @param  string $page the pagename you're looking for
663 * @return string|false the full page id of the found page, false if any
664 */
665function page_findnearest($page){
666    if (!$page) return false;
667    global $ID;
668
669    $ns = $ID;
670    do {
671        $ns = getNS($ns);
672        $pageid = ltrim("$ns:$page",':');
673        if(page_exists($pageid)){
674            return $pageid;
675        }
676    } while($ns);
677
678    return false;
679}
680