xref: /dokuwiki/inc/pageutils.php (revision c62a5f5846e3477044ebc93e96f98b878d3e7a59)
1<?php
2/**
3 * Utilities for handling pagenames
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 * @todo       Combine similar functions like {wiki,media,meta}FN()
8 */
9
10/**
11 * Fetch the an ID from request
12 *
13 * Uses either standard $_REQUEST variable or extracts it from
14 * the full request URI when userewrite is set to 2
15 *
16 * For $param='id' $conf['start'] is returned if no id was found.
17 * If the second parameter is true (default) the ID is cleaned.
18 *
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21function getID($param='id',$clean=true){
22    global $conf;
23
24    $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null;
25
26    //construct page id from request URI
27    if(empty($id) && $conf['userewrite'] == 2){
28        $request = $_SERVER['REQUEST_URI'];
29        $script = '';
30
31        //get the script URL
32        if($conf['basedir']){
33            $relpath = '';
34            if($param != 'id') {
35                $relpath = 'lib/exe/';
36            }
37            $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']);
38
39        }elseif($_SERVER['PATH_INFO']){
40            $request = $_SERVER['PATH_INFO'];
41        }elseif($_SERVER['SCRIPT_NAME']){
42            $script = $_SERVER['SCRIPT_NAME'];
43        }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
44            $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
45                    $_SERVER['SCRIPT_FILENAME']);
46            $script = '/'.$script;
47        }
48
49        //clean script and request (fixes a windows problem)
50        $script  = preg_replace('/\/\/+/','/',$script);
51        $request = preg_replace('/\/\/+/','/',$request);
52
53        //remove script URL and Querystring to gain the id
54        if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
55            $id = preg_replace ('/\?.*/','',$match[1]);
56        }
57        $id = urldecode($id);
58        //strip leading slashes
59        $id = preg_replace('!^/+!','',$id);
60    }
61
62    // Namespace autolinking from URL
63    if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){
64        if(page_exists($id.$conf['start'])){
65            // start page inside namespace
66            $id = $id.$conf['start'];
67        }elseif(page_exists($id.noNS(cleanID($id)))){
68            // page named like the NS inside the NS
69            $id = $id.noNS(cleanID($id));
70        }elseif(page_exists($id)){
71            // page like namespace exists
72            $id = substr($id,0,-1);
73        }else{
74            // fall back to default
75            $id = $id.$conf['start'];
76        }
77        send_redirect(wl($id,'',true));
78    }
79
80    if($clean) $id = cleanID($id);
81    if(empty($id) && $param=='id') $id = $conf['start'];
82
83    return $id;
84}
85
86/**
87 * Remove unwanted chars from ID
88 *
89 * Cleans a given ID to only use allowed characters. Accented characters are
90 * converted to unaccented ones
91 *
92 * @author Andreas Gohr <andi@splitbrain.org>
93 * @param  string  $raw_id    The pageid to clean
94 * @param  boolean $ascii     Force ASCII
95 * @param  boolean $media     Allow leading or trailing _ for media files
96 */
97function cleanID($raw_id,$ascii=false,$media=false){
98    global $conf;
99    static $sepcharpat = null;
100
101    global $cache_cleanid;
102    $cache = & $cache_cleanid;
103
104    // check if it's already in the memory cache
105    if (isset($cache[(string)$raw_id])) {
106        return $cache[(string)$raw_id];
107    }
108
109    $sepchar = $conf['sepchar'];
110    if($sepcharpat == null) // build string only once to save clock cycles
111        $sepcharpat = '#\\'.$sepchar.'+#';
112
113    $id = trim((string)$raw_id);
114    $id = utf8_strtolower($id);
115
116    //alternative namespace seperator
117    $id = strtr($id,';',':');
118    if($conf['useslash']){
119        $id = strtr($id,'/',':');
120    }else{
121        $id = strtr($id,'/',$sepchar);
122    }
123
124    if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
125    if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
126
127    //remove specials
128    $id = utf8_stripspecials($id,$sepchar,'\*');
129
130    if($ascii) $id = utf8_strip($id);
131
132    //clean up
133    $id = preg_replace($sepcharpat,$sepchar,$id);
134    $id = preg_replace('#:+#',':',$id);
135    $id = ($media ? trim($id,':.-') : trim($id,':._-'));
136    $id = preg_replace('#:[:\._\-]+#',':',$id);
137
138    $cache[(string)$raw_id] = $id;
139    return($id);
140}
141
142/**
143 * Return namespacepart of a wiki ID
144 *
145 * @author Andreas Gohr <andi@splitbrain.org>
146 */
147function getNS($id){
148    $pos = strrpos((string)$id,':');
149    if($pos!==false){
150        return substr((string)$id,0,$pos);
151    }
152    return false;
153}
154
155/**
156 * Returns the ID without the namespace
157 *
158 * @author Andreas Gohr <andi@splitbrain.org>
159 */
160function noNS($id) {
161    $pos = strrpos($id, ':');
162    if ($pos!==false) {
163        return substr($id, $pos+1);
164    } else {
165        return $id;
166    }
167}
168
169/**
170 * Returns the current namespace
171 *
172 * @author Nathan Fritz <fritzn@crown.edu>
173 */
174function curNS($id) {
175    return noNS(getNS($id));
176}
177
178/**
179 * Returns the ID without the namespace or current namespace for 'start' pages
180 *
181 * @author Nathan Fritz <fritzn@crown.edu>
182 */
183function noNSorNS($id) {
184    global $conf;
185
186    $p = noNS($id);
187    if ($p == $conf['start'] || $p == false) {
188        $p = curNS($id);
189        if ($p == false) {
190            return $conf['start'];
191        }
192    }
193    return $p;
194}
195
196/**
197 * Creates a XHTML valid linkid from a given headline title
198 *
199 * @param string  $title   The headline title
200 * @param array   $check   Existing IDs (title => number)
201 * @author Andreas Gohr <andi@splitbrain.org>
202 */
203function sectionID($title,&$check) {
204    $title = str_replace(array(':','.'),'',cleanID($title));
205    $new = ltrim($title,'0123456789_-');
206    if(empty($new)){
207        $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline
208    }else{
209        $title = $new;
210    }
211
212    if(is_array($check)){
213        // make sure tiles are unique
214        if (!array_key_exists ($title,$check)) {
215           $check[$title] = 0;
216        } else {
217           $title .= ++ $check[$title];
218        }
219    }
220
221    return $title;
222}
223
224
225/**
226 * Wiki page existence check
227 *
228 * parameters as for wikiFN
229 *
230 * @author Chris Smith <chris@jalakai.co.uk>
231 */
232function page_exists($id,$rev='',$clean=true) {
233    return @file_exists(wikiFN($id,$rev,$clean));
234}
235
236/**
237 * returns the full path to the datafile specified by ID and optional revision
238 *
239 * The filename is URL encoded to protect Unicode chars
240 *
241 * @param  $raw_id  string   id of wikipage
242 * @param  $rev     string   page revision, empty string for current
243 * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
244 *                           when $id is guaranteed to have been cleaned already.
245 *
246 * @author Andreas Gohr <andi@splitbrain.org>
247 */
248function wikiFN($raw_id,$rev='',$clean=true){
249    global $conf;
250
251    global $cache_wikifn;
252    $cache = & $cache_wikifn;
253
254    if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
255        return $cache[$raw_id][$rev];
256    }
257
258    $id = $raw_id;
259
260    if ($clean) $id = cleanID($id);
261    $id = str_replace(':','/',$id);
262    if(empty($rev)){
263        $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
264    }else{
265        $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
266        if($conf['compression']){
267            //test for extensions here, we want to read both compressions
268            if (@file_exists($fn . '.gz')){
269                $fn .= '.gz';
270            }else if(@file_exists($fn . '.bz2')){
271                $fn .= '.bz2';
272            }else{
273                //file doesnt exist yet, so we take the configured extension
274                $fn .= '.' . $conf['compression'];
275            }
276        }
277    }
278
279    if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); }
280    $cache[$raw_id][$rev] = $fn;
281    return $fn;
282}
283
284/**
285 * Returns the full path to the file for locking the page while editing.
286 *
287 * @author Ben Coburn <btcoburn@silicodon.net>
288 */
289function wikiLockFN($id) {
290    global $conf;
291    return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
292}
293
294
295/**
296 * returns the full path to the meta file specified by ID and extension
297 *
298 * @author Steven Danz <steven-danz@kc.rr.com>
299 */
300function metaFN($id,$ext){
301    global $conf;
302    $id = cleanID($id);
303    $id = str_replace(':','/',$id);
304    $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
305    return $fn;
306}
307
308/**
309 * returns the full path to the media's meta file specified by ID and extension
310 *
311 * @author Kate Arzamastseva <pshns@ukr.net>
312 */
313function mediaMetaFN($id,$ext){
314    global $conf;
315    $id = cleanID($id);
316    $id = str_replace(':','/',$id);
317    $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext;
318    return $fn;
319}
320
321/**
322 * returns an array of full paths to all metafiles of a given ID
323 *
324 * @author Esther Brunner <esther@kaffeehaus.ch>
325 * @author Michael Hamann <michael@content-space.de>
326 */
327function metaFiles($id){
328    $basename = metaFN($id, '');
329    $files    = glob($basename.'.*', GLOB_MARK);
330    // filter files like foo.bar.meta when $id == 'foo'
331    return    $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array();
332}
333
334/**
335 * returns the full path to the mediafile specified by ID
336 *
337 * The filename is URL encoded to protect Unicode chars
338 *
339 * @author Andreas Gohr <andi@splitbrain.org>
340 * @author Kate Arzamastseva <pshns@ukr.net>
341 */
342function mediaFN($id, $rev=''){
343    global $conf;
344    $id = cleanID($id);
345    $id = str_replace(':','/',$id);
346    if(empty($rev)){
347        $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
348    }else{
349    	$ext = mimetype($id);
350    	$name = substr($id,0, -1*strlen($ext[0])-1);
351        $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]);
352    }
353    return $fn;
354}
355
356/**
357 * Returns the full filepath to a localized textfile if local
358 * version isn't found the english one is returned
359 *
360 * @author Andreas Gohr <andi@splitbrain.org>
361 */
362function localeFN($id){
363    global $conf;
364    $file = DOKU_CONF.'/lang/'.$conf['lang'].'/'.$id.'.txt';
365    if(!@file_exists($file)){
366        $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt';
367        if(!@file_exists($file)){
368            //fall back to english
369            $file = DOKU_INC.'inc/lang/en/'.$id.'.txt';
370        }
371    }
372    return $file;
373}
374
375/**
376 * Resolve relative paths in IDs
377 *
378 * Do not call directly use resolve_mediaid or resolve_pageid
379 * instead
380 *
381 * Partyly based on a cleanPath function found at
382 * http://www.php.net/manual/en/function.realpath.php#57016
383 *
384 * @author <bart at mediawave dot nl>
385 */
386function resolve_id($ns,$id,$clean=true){
387    global $conf;
388
389    // some pre cleaning for useslash:
390    if($conf['useslash']) $id = str_replace('/',':',$id);
391
392    // if the id starts with a dot we need to handle the
393    // relative stuff
394    if($id{0} == '.'){
395        // normalize initial dots without a colon
396        $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
397        // prepend the current namespace
398        $id = $ns.':'.$id;
399
400        // cleanup relatives
401        $result = array();
402        $pathA  = explode(':', $id);
403        if (!$pathA[0]) $result[] = '';
404        foreach ($pathA AS $key => $dir) {
405            if ($dir == '..') {
406                if (end($result) == '..') {
407                    $result[] = '..';
408                } elseif (!array_pop($result)) {
409                    $result[] = '..';
410                }
411            } elseif ($dir && $dir != '.') {
412                $result[] = $dir;
413            }
414        }
415        if (!end($pathA)) $result[] = '';
416        $id = implode(':', $result);
417    }elseif($ns !== false && strpos($id,':') === false){
418        //if link contains no namespace. add current namespace (if any)
419        $id = $ns.':'.$id;
420    }
421
422    if($clean) $id = cleanID($id);
423    return $id;
424}
425
426/**
427 * Returns a full media id
428 *
429 * @author Andreas Gohr <andi@splitbrain.org>
430 */
431function resolve_mediaid($ns,&$page,&$exists){
432    $page   = resolve_id($ns,$page);
433    $file   = mediaFN($page);
434    $exists = @file_exists($file);
435}
436
437/**
438 * Returns a full page id
439 *
440 * @author Andreas Gohr <andi@splitbrain.org>
441 */
442function resolve_pageid($ns,&$page,&$exists){
443    global $conf;
444    global $ID;
445    $exists = false;
446
447    //empty address should point to current page
448    if ($page === "") {
449        $page = $ID;
450    }
451
452    //keep hashlink if exists then clean both parts
453    if (strpos($page,'#')) {
454        list($page,$hash) = explode('#',$page,2);
455    } else {
456        $hash = '';
457    }
458    $hash = cleanID($hash);
459    $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
460
461    // get filename (calls clean itself)
462    $file = wikiFN($page);
463
464    // if ends with colon or slash we have a namespace link
465    if(in_array(substr($page,-1), array(':', ';')) ||
466       ($conf['useslash'] && substr($page,-1) == '/')){
467        if(page_exists($page.$conf['start'])){
468            // start page inside namespace
469            $page = $page.$conf['start'];
470            $exists = true;
471        }elseif(page_exists($page.noNS(cleanID($page)))){
472            // page named like the NS inside the NS
473            $page = $page.noNS(cleanID($page));
474            $exists = true;
475        }elseif(page_exists($page)){
476            // page like namespace exists
477            $page = $page;
478            $exists = true;
479        }else{
480            // fall back to default
481            $page = $page.$conf['start'];
482        }
483    }else{
484        //check alternative plural/nonplural form
485        if(!@file_exists($file)){
486            if( $conf['autoplural'] ){
487                if(substr($page,-1) == 's'){
488                    $try = substr($page,0,-1);
489                }else{
490                    $try = $page.'s';
491                }
492                if(page_exists($try)){
493                    $page   = $try;
494                    $exists = true;
495                }
496            }
497        }else{
498            $exists = true;
499        }
500    }
501
502    // now make sure we have a clean page
503    $page = cleanID($page);
504
505    //add hash if any
506    if(!empty($hash)) $page .= '#'.$hash;
507}
508
509/**
510 * Returns the name of a cachefile from given data
511 *
512 * The needed directory is created by this function!
513 *
514 * @author Andreas Gohr <andi@splitbrain.org>
515 *
516 * @param string $data  This data is used to create a unique md5 name
517 * @param string $ext   This is appended to the filename if given
518 * @return string       The filename of the cachefile
519 */
520function getCacheName($data,$ext=''){
521    global $conf;
522    $md5  = md5($data);
523    $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
524    io_makeFileDir($file);
525    return $file;
526}
527
528/**
529 * Checks a pageid against $conf['hidepages']
530 *
531 * @author Andreas Gohr <gohr@cosmocode.de>
532 */
533function isHiddenPage($id){
534    global $conf;
535    global $ACT;
536    if(empty($conf['hidepages'])) return false;
537    if($ACT == 'admin') return false;
538
539    if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){
540        return true;
541    }
542    return false;
543}
544
545/**
546 * Reverse of isHiddenPage
547 *
548 * @author Andreas Gohr <gohr@cosmocode.de>
549 */
550function isVisiblePage($id){
551    return !isHiddenPage($id);
552}
553
554/**
555 * Format an id for output to a user
556 *
557 * Namespaces are denoted by a trailing “:*”. The root namespace is
558 * “*”. Output is escaped.
559 *
560 * @author Adrian Lang <lang@cosmocode.de>
561 */
562
563function prettyprint_id($id) {
564    if (!$id || $id === ':') {
565        return '*';
566    }
567    if ((substr($id, -1, 1) === ':')) {
568        $id .= '*';
569    }
570    return hsc($id);
571}
572
573/**
574 * Encode a UTF-8 filename to use on any filesystem
575 *
576 * Uses the 'fnencode' option to determine encoding
577 *
578 * When the second parameter is true the string will
579 * be encoded only if non ASCII characters are detected -
580 * This makes it safe to run it multiple times on the
581 * same string (default is true)
582 *
583 * @author Andreas Gohr <andi@splitbrain.org>
584 * @see    urlencode
585 */
586function utf8_encodeFN($file,$safe=true){
587    global $conf;
588    if($conf['fnencode'] == 'utf-8') return $file;
589
590    if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){
591        return $file;
592    }
593
594    if($conf['fnencode'] == 'safe'){
595        return SafeFN::encode($file);
596    }
597
598    $file = urlencode($file);
599    $file = str_replace('%2F','/',$file);
600    return $file;
601}
602
603/**
604 * Decode a filename back to UTF-8
605 *
606 * Uses the 'fnencode' option to determine encoding
607 *
608 * @author Andreas Gohr <andi@splitbrain.org>
609 * @see    urldecode
610 */
611function utf8_decodeFN($file){
612    global $conf;
613    if($conf['fnencode'] == 'utf-8') return $file;
614
615    if($conf['fnencode'] == 'safe'){
616        return SafeFN::decode($file);
617    }
618
619    return urldecode($file);
620}
621
622