xref: /dokuwiki/inc/pageutils.php (revision 8a94404455e7db660088b91f82bf92137bad4195)
1<?php
2/**
3 * Utilities for handling pagenames
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 * @todo       Combine similar functions like {wiki,media,meta}FN()
8 */
9
10/**
11 * Fetch the an ID from request
12 *
13 * Uses either standard $_REQUEST variable or extracts it from
14 * the full request URI when userewrite is set to 2
15 *
16 * For $param='id' $conf['start'] is returned if no id was found.
17 * If the second parameter is true (default) the ID is cleaned.
18 *
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21function getID($param='id',$clean=true){
22    /** @var Input $INPUT */
23    global $INPUT;
24    global $conf;
25    global $ACT;
26
27    $id = $INPUT->str($param);
28
29    //construct page id from request URI
30    if(empty($id) && $conf['userewrite'] == 2){
31        $request = $INPUT->server->str('REQUEST_URI');
32        $script = '';
33
34        //get the script URL
35        if($conf['basedir']){
36            $relpath = '';
37            if($param != 'id') {
38                $relpath = 'lib/exe/';
39            }
40            $script = $conf['basedir'].$relpath.utf8_basename($INPUT->server->str('SCRIPT_FILENAME'));
41
42        }elseif($INPUT->server->str('PATH_INFO')){
43            $request = $INPUT->server->str('PATH_INFO');
44        }elseif($INPUT->server->str('SCRIPT_NAME')){
45            $script = $INPUT->server->str('SCRIPT_NAME');
46        }elseif($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')){
47            $script = preg_replace ('/^'.preg_quote($INPUT->server->str('DOCUMENT_ROOT'),'/').'/','',
48                    $INPUT->server->str('SCRIPT_FILENAME'));
49            $script = '/'.$script;
50        }
51
52        //clean script and request (fixes a windows problem)
53        $script  = preg_replace('/\/\/+/','/',$script);
54        $request = preg_replace('/\/\/+/','/',$request);
55
56        //remove script URL and Querystring to gain the id
57        if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
58            $id = preg_replace ('/\?.*/','',$match[1]);
59        }
60        $id = urldecode($id);
61        //strip leading slashes
62        $id = preg_replace('!^/+!','',$id);
63    }
64
65    // Namespace autolinking from URL
66    if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){
67        if(page_exists($id.$conf['start'])){
68            // start page inside namespace
69            $id = $id.$conf['start'];
70        }elseif(page_exists($id.noNS(cleanID($id)))){
71            // page named like the NS inside the NS
72            $id = $id.noNS(cleanID($id));
73        }elseif(page_exists($id)){
74            // page like namespace exists
75            $id = substr($id,0,-1);
76        }else{
77            // fall back to default
78            $id = $id.$conf['start'];
79        }
80        if (isset($ACT) && $ACT === 'show') send_redirect(wl($id,'',true));
81    }
82
83    if($clean) $id = cleanID($id);
84    if(empty($id) && $param=='id') $id = $conf['start'];
85
86    return $id;
87}
88
89/**
90 * Remove unwanted chars from ID
91 *
92 * Cleans a given ID to only use allowed characters. Accented characters are
93 * converted to unaccented ones
94 *
95 * @author Andreas Gohr <andi@splitbrain.org>
96 * @param  string  $raw_id    The pageid to clean
97 * @param  boolean $ascii     Force ASCII
98 * @return string cleaned id
99 */
100function cleanID($raw_id,$ascii=false){
101    global $conf;
102    static $sepcharpat = null;
103
104    global $cache_cleanid;
105    $cache = & $cache_cleanid;
106
107    // check if it's already in the memory cache
108    if (isset($cache[(string)$raw_id])) {
109        return $cache[(string)$raw_id];
110    }
111
112    $sepchar = $conf['sepchar'];
113    if($sepcharpat == null) // build string only once to save clock cycles
114        $sepcharpat = '#\\'.$sepchar.'+#';
115
116    $id = trim((string)$raw_id);
117    $id = utf8_strtolower($id);
118
119    //alternative namespace seperator
120    if($conf['useslash']){
121        $id = strtr($id,';/','::');
122    }else{
123        $id = strtr($id,';/',':'.$sepchar);
124    }
125
126    if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
127    if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
128
129    //remove specials
130    $id = utf8_stripspecials($id,$sepchar,'\*');
131
132    if($ascii) $id = utf8_strip($id);
133
134    //clean up
135    $id = preg_replace($sepcharpat,$sepchar,$id);
136    $id = preg_replace('#:+#',':',$id);
137    $id = trim($id,':._-');
138    $id = preg_replace('#:[:\._\-]+#',':',$id);
139    $id = preg_replace('#[:\._\-]+:#',':',$id);
140
141    $cache[(string)$raw_id] = $id;
142    return($id);
143}
144
145/**
146 * Return namespacepart of a wiki ID
147 *
148 * @author Andreas Gohr <andi@splitbrain.org>
149 */
150function getNS($id){
151    $pos = strrpos((string)$id,':');
152    if($pos!==false){
153        return substr((string)$id,0,$pos);
154    }
155    return false;
156}
157
158/**
159 * Returns the ID without the namespace
160 *
161 * @author Andreas Gohr <andi@splitbrain.org>
162 */
163function noNS($id) {
164    $pos = strrpos($id, ':');
165    if ($pos!==false) {
166        return substr($id, $pos+1);
167    } else {
168        return $id;
169    }
170}
171
172/**
173 * Returns the current namespace
174 *
175 * @author Nathan Fritz <fritzn@crown.edu>
176 */
177function curNS($id) {
178    return noNS(getNS($id));
179}
180
181/**
182 * Returns the ID without the namespace or current namespace for 'start' pages
183 *
184 * @author Nathan Fritz <fritzn@crown.edu>
185 */
186function noNSorNS($id) {
187    global $conf;
188
189    $p = noNS($id);
190    if ($p == $conf['start'] || $p == false) {
191        $p = curNS($id);
192        if ($p == false) {
193            return $conf['start'];
194        }
195    }
196    return $p;
197}
198
199/**
200 * Creates a XHTML valid linkid from a given headline title
201 *
202 * @param string  $title   The headline title
203 * @param array|bool   $check   Existing IDs (title => number)
204 * @return string the title
205 * @author Andreas Gohr <andi@splitbrain.org>
206 */
207function sectionID($title,&$check) {
208    $title = str_replace(array(':','.'),'',cleanID($title));
209    $new = ltrim($title,'0123456789_-');
210    if(empty($new)){
211        $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline
212    }else{
213        $title = $new;
214    }
215
216    if(is_array($check)){
217        // make sure tiles are unique
218        if (!array_key_exists ($title,$check)) {
219            $check[$title] = 0;
220        } else {
221            $title .= ++ $check[$title];
222        }
223    }
224
225    return $title;
226}
227
228
229/**
230 * Wiki page existence check
231 *
232 * parameters as for wikiFN
233 *
234 * @author Chris Smith <chris@jalakai.co.uk>
235 */
236function page_exists($id,$rev='',$clean=true) {
237    return @file_exists(wikiFN($id,$rev,$clean));
238}
239
240/**
241 * returns the full path to the datafile specified by ID and optional revision
242 *
243 * The filename is URL encoded to protect Unicode chars
244 *
245 * @param  $raw_id  string   id of wikipage
246 * @param  $rev     string   page revision, empty string for current
247 * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
248 *                           when $id is guaranteed to have been cleaned already.
249 * @return string full path
250 *
251 * @author Andreas Gohr <andi@splitbrain.org>
252 */
253function wikiFN($raw_id,$rev='',$clean=true){
254    global $conf;
255
256    global $cache_wikifn;
257    $cache = & $cache_wikifn;
258
259    if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
260        return $cache[$raw_id][$rev];
261    }
262
263    $id = $raw_id;
264
265    if ($clean) $id = cleanID($id);
266    $id = str_replace(':','/',$id);
267    if(empty($rev)){
268        $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
269    }else{
270        $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
271        if($conf['compression']){
272            //test for extensions here, we want to read both compressions
273            if (@file_exists($fn . '.gz')){
274                $fn .= '.gz';
275            }else if(@file_exists($fn . '.bz2')){
276                $fn .= '.bz2';
277            }else{
278                //file doesnt exist yet, so we take the configured extension
279                $fn .= '.' . $conf['compression'];
280            }
281        }
282    }
283
284    if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); }
285    $cache[$raw_id][$rev] = $fn;
286    return $fn;
287}
288
289/**
290 * Returns the full path to the file for locking the page while editing.
291 *
292 * @author Ben Coburn <btcoburn@silicodon.net>
293 */
294function wikiLockFN($id) {
295    global $conf;
296    return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
297}
298
299
300/**
301 * returns the full path to the meta file specified by ID and extension
302 *
303 * @author Steven Danz <steven-danz@kc.rr.com>
304 */
305function metaFN($id,$ext){
306    global $conf;
307    $id = cleanID($id);
308    $id = str_replace(':','/',$id);
309    $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
310    return $fn;
311}
312
313/**
314 * returns the full path to the media's meta file specified by ID and extension
315 *
316 * @author Kate Arzamastseva <pshns@ukr.net>
317 */
318function mediaMetaFN($id,$ext){
319    global $conf;
320    $id = cleanID($id);
321    $id = str_replace(':','/',$id);
322    $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext;
323    return $fn;
324}
325
326/**
327 * returns an array of full paths to all metafiles of a given ID
328 *
329 * @author Esther Brunner <esther@kaffeehaus.ch>
330 * @author Michael Hamann <michael@content-space.de>
331 */
332function metaFiles($id){
333    $basename = metaFN($id, '');
334    $files    = glob($basename.'.*', GLOB_MARK);
335    // filter files like foo.bar.meta when $id == 'foo'
336    return    $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array();
337}
338
339/**
340 * returns the full path to the mediafile specified by ID
341 *
342 * The filename is URL encoded to protect Unicode chars
343 *
344 * @author Andreas Gohr <andi@splitbrain.org>
345 * @author Kate Arzamastseva <pshns@ukr.net>
346 */
347function mediaFN($id, $rev=''){
348    global $conf;
349    $id = cleanID($id);
350    $id = str_replace(':','/',$id);
351    if(empty($rev)){
352        $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
353    }else{
354        $ext = mimetype($id);
355        $name = substr($id,0, -1*strlen($ext[0])-1);
356        $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]);
357    }
358    return $fn;
359}
360
361/**
362 * Returns the full filepath to a localized file if local
363 * version isn't found the english one is returned
364 *
365 * @param  string $id  The id of the local file
366 * @param  string $ext The file extension (usually txt)
367 * @return string full filepath to localized file
368 * @author Andreas Gohr <andi@splitbrain.org>
369 */
370function localeFN($id,$ext='txt'){
371    global $conf;
372    $file = DOKU_CONF.'lang/'.$conf['lang'].'/'.$id.'.'.$ext;
373    if(!@file_exists($file)){
374        $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext;
375        if(!@file_exists($file)){
376            //fall back to english
377            $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext;
378        }
379    }
380    return $file;
381}
382
383/**
384 * Resolve relative paths in IDs
385 *
386 * Do not call directly use resolve_mediaid or resolve_pageid
387 * instead
388 *
389 * Partyly based on a cleanPath function found at
390 * http://www.php.net/manual/en/function.realpath.php#57016
391 *
392 * @author <bart at mediawave dot nl>
393 */
394function resolve_id($ns,$id,$clean=true){
395    global $conf;
396
397    // some pre cleaning for useslash:
398    if($conf['useslash']) $id = str_replace('/',':',$id);
399
400    // if the id starts with a dot we need to handle the
401    // relative stuff
402    if($id && $id{0} == '.'){
403        // normalize initial dots without a colon
404        $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
405        // prepend the current namespace
406        $id = $ns.':'.$id;
407
408        // cleanup relatives
409        $result = array();
410        $pathA  = explode(':', $id);
411        if (!$pathA[0]) $result[] = '';
412        foreach ($pathA AS $key => $dir) {
413            if ($dir == '..') {
414                if (end($result) == '..') {
415                    $result[] = '..';
416                } elseif (!array_pop($result)) {
417                    $result[] = '..';
418                }
419            } elseif ($dir && $dir != '.') {
420                $result[] = $dir;
421            }
422        }
423        if (!end($pathA)) $result[] = '';
424        $id = implode(':', $result);
425    }elseif($ns !== false && strpos($id,':') === false){
426        //if link contains no namespace. add current namespace (if any)
427        $id = $ns.':'.$id;
428    }
429
430    if($clean) $id = cleanID($id);
431    return $id;
432}
433
434/**
435 * Returns a full media id
436 *
437 * @author Andreas Gohr <andi@splitbrain.org>
438 */
439function resolve_mediaid($ns,&$page,&$exists){
440    $page   = resolve_id($ns,$page);
441    $file   = mediaFN($page);
442    $exists = @file_exists($file);
443}
444
445/**
446 * Returns a full page id
447 *
448 * @author Andreas Gohr <andi@splitbrain.org>
449 */
450function resolve_pageid($ns,&$page,&$exists){
451    global $conf;
452    global $ID;
453    $exists = false;
454
455    //empty address should point to current page
456    if ($page === "") {
457        $page = $ID;
458    }
459
460    //keep hashlink if exists then clean both parts
461    if (strpos($page,'#')) {
462        list($page,$hash) = explode('#',$page,2);
463    } else {
464        $hash = '';
465    }
466    $hash = cleanID($hash);
467    $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
468
469    // get filename (calls clean itself)
470    $file = wikiFN($page);
471
472    // if ends with colon or slash we have a namespace link
473    if(in_array(substr($page,-1), array(':', ';')) ||
474       ($conf['useslash'] && substr($page,-1) == '/')){
475        if(page_exists($page.$conf['start'])){
476            // start page inside namespace
477            $page = $page.$conf['start'];
478            $exists = true;
479        }elseif(page_exists($page.noNS(cleanID($page)))){
480            // page named like the NS inside the NS
481            $page = $page.noNS(cleanID($page));
482            $exists = true;
483        }elseif(page_exists($page)){
484            // page like namespace exists
485            $page = $page;
486            $exists = true;
487        }else{
488            // fall back to default
489            $page = $page.$conf['start'];
490        }
491    }else{
492        //check alternative plural/nonplural form
493        if(!@file_exists($file)){
494            if( $conf['autoplural'] ){
495                if(substr($page,-1) == 's'){
496                    $try = substr($page,0,-1);
497                }else{
498                    $try = $page.'s';
499                }
500                if(page_exists($try)){
501                    $page   = $try;
502                    $exists = true;
503                }
504            }
505        }else{
506            $exists = true;
507        }
508    }
509
510    // now make sure we have a clean page
511    $page = cleanID($page);
512
513    //add hash if any
514    if(!empty($hash)) $page .= '#'.$hash;
515}
516
517/**
518 * Returns the name of a cachefile from given data
519 *
520 * The needed directory is created by this function!
521 *
522 * @author Andreas Gohr <andi@splitbrain.org>
523 *
524 * @param string $data  This data is used to create a unique md5 name
525 * @param string $ext   This is appended to the filename if given
526 * @return string       The filename of the cachefile
527 */
528function getCacheName($data,$ext=''){
529    global $conf;
530    $md5  = md5($data);
531    $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
532    io_makeFileDir($file);
533    return $file;
534}
535
536/**
537 * Checks a pageid against $conf['hidepages']
538 *
539 * @author Andreas Gohr <gohr@cosmocode.de>
540 */
541function isHiddenPage($id){
542    $data = array(
543        'id' => $id,
544        'hidden' => false
545    );
546    trigger_event('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage');
547    return $data['hidden'];
548}
549
550/**
551 * callback checks if page is hidden
552 *
553 * @param array $data event data    see isHiddenPage()
554 */
555function _isHiddenPage(&$data) {
556    global $conf;
557    global $ACT;
558
559    if ($data['hidden']) return;
560    if(empty($conf['hidepages'])) return;
561    if($ACT == 'admin') return;
562
563    if(preg_match('/'.$conf['hidepages'].'/ui',':'.$data['id'])){
564        $data['hidden'] = true;
565    }
566}
567
568/**
569 * Reverse of isHiddenPage
570 *
571 * @author Andreas Gohr <gohr@cosmocode.de>
572 */
573function isVisiblePage($id){
574    return !isHiddenPage($id);
575}
576
577/**
578 * Format an id for output to a user
579 *
580 * Namespaces are denoted by a trailing “:*”. The root namespace is
581 * “*”. Output is escaped.
582 *
583 * @author Adrian Lang <lang@cosmocode.de>
584 */
585
586function prettyprint_id($id) {
587    if (!$id || $id === ':') {
588        return '*';
589    }
590    if ((substr($id, -1, 1) === ':')) {
591        $id .= '*';
592    }
593    return hsc($id);
594}
595
596/**
597 * Encode a UTF-8 filename to use on any filesystem
598 *
599 * Uses the 'fnencode' option to determine encoding
600 *
601 * When the second parameter is true the string will
602 * be encoded only if non ASCII characters are detected -
603 * This makes it safe to run it multiple times on the
604 * same string (default is true)
605 *
606 * @author Andreas Gohr <andi@splitbrain.org>
607 * @see    urlencode
608 */
609function utf8_encodeFN($file,$safe=true){
610    global $conf;
611    if($conf['fnencode'] == 'utf-8') return $file;
612
613    if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){
614        return $file;
615    }
616
617    if($conf['fnencode'] == 'safe'){
618        return SafeFN::encode($file);
619    }
620
621    $file = urlencode($file);
622    $file = str_replace('%2F','/',$file);
623    return $file;
624}
625
626/**
627 * Decode a filename back to UTF-8
628 *
629 * Uses the 'fnencode' option to determine encoding
630 *
631 * @author Andreas Gohr <andi@splitbrain.org>
632 * @see    urldecode
633 */
634function utf8_decodeFN($file){
635    global $conf;
636    if($conf['fnencode'] == 'utf-8') return $file;
637
638    if($conf['fnencode'] == 'safe'){
639        return SafeFN::decode($file);
640    }
641
642    return urldecode($file);
643}
644
645/**
646 * Find a page in the current namespace (determined from $ID) or any
647 * higher namespace
648 *
649 * Used for sidebars, but can be used other stuff as well
650 *
651 * @todo   add event hook
652 * @param  string $page the pagename you're looking for
653 * @return string|false the full page id of the found page, false if any
654 */
655function page_findnearest($page){
656    if (!$page) return false;
657    global $ID;
658
659    $ns = $ID;
660    do {
661        $ns = getNS($ns);
662        $pageid = ltrim("$ns:$page",':');
663        if(page_exists($pageid)){
664            return $pageid;
665        }
666    } while($ns);
667
668    return false;
669}
670