xref: /dokuwiki/inc/pageutils.php (revision 1c07b9e622d139fa815c955c89569f96342475fb)
1<?php
2/**
3 * Utilities for handling pagenames
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 * @todo       Combine similar functions like {wiki,media,meta}FN()
8 */
9
10/**
11 * Fetch the an ID from request
12 *
13 * Uses either standard $_REQUEST variable or extracts it from
14 * the full request URI when userewrite is set to 2
15 *
16 * For $param='id' $conf['start'] is returned if no id was found.
17 * If the second parameter is true (default) the ID is cleaned.
18 *
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21function getID($param='id',$clean=true){
22    global $conf;
23
24    $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null;
25
26    //construct page id from request URI
27    if(empty($id) && $conf['userewrite'] == 2){
28        $request = $_SERVER['REQUEST_URI'];
29        $script = '';
30
31        //get the script URL
32        if($conf['basedir']){
33            $relpath = '';
34            if($param != 'id') {
35                $relpath = 'lib/exe/';
36            }
37            $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']);
38
39        }elseif($_SERVER['PATH_INFO']){
40            $request = $_SERVER['PATH_INFO'];
41        }elseif($_SERVER['SCRIPT_NAME']){
42            $script = $_SERVER['SCRIPT_NAME'];
43        }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
44            $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
45                    $_SERVER['SCRIPT_FILENAME']);
46            $script = '/'.$script;
47        }
48
49        //clean script and request (fixes a windows problem)
50        $script  = preg_replace('/\/\/+/','/',$script);
51        $request = preg_replace('/\/\/+/','/',$request);
52
53        //remove script URL and Querystring to gain the id
54        if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
55            $id = preg_replace ('/\?.*/','',$match[1]);
56        }
57        $id = urldecode($id);
58        //strip leading slashes
59        $id = preg_replace('!^/+!','',$id);
60    }
61
62    // Namespace autolinking from URL
63    if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){
64        if(page_exists($id.$conf['start'])){
65            // start page inside namespace
66            $id = $id.$conf['start'];
67        }elseif(page_exists($id.noNS(cleanID($id)))){
68            // page named like the NS inside the NS
69            $id = $id.noNS(cleanID($id));
70        }elseif(page_exists($id)){
71            // page like namespace exists
72            $id = substr($id,0,-1);
73        }else{
74            // fall back to default
75            $id = $id.$conf['start'];
76        }
77        send_redirect(wl($id,'',true));
78    }
79
80    if($clean) $id = cleanID($id);
81    if(empty($id) && $param=='id') $id = $conf['start'];
82
83    return $id;
84}
85
86/**
87 * Remove unwanted chars from ID
88 *
89 * Cleans a given ID to only use allowed characters. Accented characters are
90 * converted to unaccented ones
91 *
92 * @author Andreas Gohr <andi@splitbrain.org>
93 * @param  string  $raw_id    The pageid to clean
94 * @param  boolean $ascii     Force ASCII
95 * @param  boolean $media     Allow leading or trailing _ for media files
96 */
97function cleanID($raw_id,$ascii=false,$media=false){
98    global $conf;
99    static $sepcharpat = null;
100
101    global $cache_cleanid;
102    $cache = & $cache_cleanid;
103
104    // check if it's already in the memory cache
105    if (isset($cache[(string)$raw_id])) {
106        return $cache[(string)$raw_id];
107    }
108
109    $sepchar = $conf['sepchar'];
110    if($sepcharpat == null) // build string only once to save clock cycles
111        $sepcharpat = '#\\'.$sepchar.'+#';
112
113    $id = trim((string)$raw_id);
114    $id = utf8_strtolower($id);
115
116    //alternative namespace seperator
117    $id = strtr($id,';',':');
118    if($conf['useslash']){
119        $id = strtr($id,'/',':');
120    }else{
121        $id = strtr($id,'/',$sepchar);
122    }
123
124    if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
125    if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
126
127    //remove specials
128    $id = utf8_stripspecials($id,$sepchar,'\*');
129
130    if($ascii) $id = utf8_strip($id);
131
132    //clean up
133    $id = preg_replace($sepcharpat,$sepchar,$id);
134    $id = preg_replace('#:+#',':',$id);
135    $id = ($media ? trim($id,':.-') : trim($id,':._-'));
136    $id = preg_replace('#:[:\._\-]+#',':',$id);
137
138    $cache[(string)$raw_id] = $id;
139    return($id);
140}
141
142/**
143 * Return namespacepart of a wiki ID
144 *
145 * @author Andreas Gohr <andi@splitbrain.org>
146 */
147function getNS($id){
148    $pos = strrpos((string)$id,':');
149    if($pos!==false){
150        return substr((string)$id,0,$pos);
151    }
152    return false;
153}
154
155/**
156 * Returns the ID without the namespace
157 *
158 * @author Andreas Gohr <andi@splitbrain.org>
159 */
160function noNS($id) {
161    $pos = strrpos($id, ':');
162    if ($pos!==false) {
163        return substr($id, $pos+1);
164    } else {
165        return $id;
166    }
167}
168
169/**
170 * Returns the current namespace
171 *
172 * @author Nathan Fritz <fritzn@crown.edu>
173 */
174function curNS($id) {
175    return noNS(getNS($id));
176}
177
178/**
179 * Returns the ID without the namespace or current namespace for 'start' pages
180 *
181 * @author Nathan Fritz <fritzn@crown.edu>
182 */
183function noNSorNS($id) {
184    global $conf;
185
186    $p = noNS($id);
187    if ($p == $conf['start'] || $p == false) {
188        $p = curNS($id);
189        if ($p == false) {
190            return $conf['start'];
191        }
192    }
193    return $p;
194}
195
196/**
197 * Creates a XHTML valid linkid from a given headline title
198 *
199 * @param string  $title   The headline title
200 * @param array   $check   Existing IDs (title => number)
201 * @author Andreas Gohr <andi@splitbrain.org>
202 */
203function sectionID($title,&$check) {
204    $title = str_replace(array(':','.'),'',cleanID($title));
205    $new = ltrim($title,'0123456789_-');
206    if(empty($new)){
207        $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline
208    }else{
209        $title = $new;
210    }
211
212    if(is_array($check)){
213        // make sure tiles are unique
214        if (!array_key_exists ($title,$check)) {
215           $check[$title] = 0;
216        } else {
217           $title .= ++ $check[$title];
218        }
219    }
220
221    return $title;
222}
223
224
225/**
226 * Wiki page existence check
227 *
228 * parameters as for wikiFN
229 *
230 * @author Chris Smith <chris@jalakai.co.uk>
231 */
232function page_exists($id,$rev='',$clean=true) {
233    return @file_exists(wikiFN($id,$rev,$clean));
234}
235
236/**
237 * returns the full path to the datafile specified by ID and optional revision
238 *
239 * The filename is URL encoded to protect Unicode chars
240 *
241 * @param  $raw_id  string   id of wikipage
242 * @param  $rev     string   page revision, empty string for current
243 * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
244 *                           when $id is guaranteed to have been cleaned already.
245 *
246 * @author Andreas Gohr <andi@splitbrain.org>
247 */
248function wikiFN($raw_id,$rev='',$clean=true){
249    global $conf;
250
251    global $cache_wikifn;
252    $cache = & $cache_wikifn;
253
254    if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
255        return $cache[$raw_id][$rev];
256    }
257
258    $id = $raw_id;
259
260    if ($clean) $id = cleanID($id);
261    $id = str_replace(':','/',$id);
262    if(empty($rev)){
263        $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
264    }else{
265        $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
266        if($conf['compression']){
267            //test for extensions here, we want to read both compressions
268            if (@file_exists($fn . '.gz')){
269                $fn .= '.gz';
270            }else if(@file_exists($fn . '.bz2')){
271                $fn .= '.bz2';
272            }else{
273                //file doesnt exist yet, so we take the configured extension
274                $fn .= '.' . $conf['compression'];
275            }
276        }
277    }
278
279    if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); }
280    $cache[$raw_id][$rev] = $fn;
281    return $fn;
282}
283
284/**
285 * Returns the full path to the file for locking the page while editing.
286 *
287 * @author Ben Coburn <btcoburn@silicodon.net>
288 */
289function wikiLockFN($id) {
290    global $conf;
291    return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
292}
293
294
295/**
296 * returns the full path to the meta file specified by ID and extension
297 *
298 * The filename is URL encoded to protect Unicode chars
299 *
300 * @author Steven Danz <steven-danz@kc.rr.com>
301 */
302function metaFN($id,$ext){
303    global $conf;
304    $id = cleanID($id);
305    $id = str_replace(':','/',$id);
306    $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
307    return $fn;
308}
309
310/**
311 * returns an array of full paths to all metafiles of a given ID
312 *
313 * @author Esther Brunner <esther@kaffeehaus.ch>
314 */
315function metaFiles($id){
316    $name   = noNS($id);
317    $ns     = getNS($id);
318    $dir    = ($ns) ? metaFN($ns,'').'/' : metaFN($ns,'');
319    $files  = array();
320    $files  = glob($dir.$name.'.*');
321    return $files;
322}
323
324/**
325 * returns the full path to the mediafile specified by ID
326 *
327 * The filename is URL encoded to protect Unicode chars
328 *
329 * @author Andreas Gohr <andi@splitbrain.org>
330 */
331function mediaFN($id){
332    global $conf;
333    $id = cleanID($id);
334    $id = str_replace(':','/',$id);
335    $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
336    return $fn;
337}
338
339/**
340 * Returns the full filepath to a localized textfile if local
341 * version isn't found the english one is returned
342 *
343 * @author Andreas Gohr <andi@splitbrain.org>
344 */
345function localeFN($id){
346    global $conf;
347    $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt';
348    if(!@file_exists($file)){
349        //fall back to english
350        $file = DOKU_INC.'inc/lang/en/'.$id.'.txt';
351    }
352    return $file;
353}
354
355/**
356 * Resolve relative paths in IDs
357 *
358 * Do not call directly use resolve_mediaid or resolve_pageid
359 * instead
360 *
361 * Partyly based on a cleanPath function found at
362 * http://www.php.net/manual/en/function.realpath.php#57016
363 *
364 * @author <bart at mediawave dot nl>
365 */
366function resolve_id($ns,$id,$clean=true){
367    global $conf;
368
369    // some pre cleaning for useslash:
370    if($conf['useslash']) $id = str_replace('/',':',$id);
371
372    // if the id starts with a dot we need to handle the
373    // relative stuff
374    if($id{0} == '.'){
375        // normalize initial dots without a colon
376        $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
377        // prepend the current namespace
378        $id = $ns.':'.$id;
379
380        // cleanup relatives
381        $result = array();
382        $pathA  = explode(':', $id);
383        if (!$pathA[0]) $result[] = '';
384        foreach ($pathA AS $key => $dir) {
385            if ($dir == '..') {
386                if (end($result) == '..') {
387                    $result[] = '..';
388                } elseif (!array_pop($result)) {
389                    $result[] = '..';
390                }
391            } elseif ($dir && $dir != '.') {
392                $result[] = $dir;
393            }
394        }
395        if (!end($pathA)) $result[] = '';
396        $id = implode(':', $result);
397    }elseif($ns !== false && strpos($id,':') === false){
398        //if link contains no namespace. add current namespace (if any)
399        $id = $ns.':'.$id;
400    }
401
402    if($clean) $id = cleanID($id);
403    return $id;
404}
405
406/**
407 * Returns a full media id
408 *
409 * @author Andreas Gohr <andi@splitbrain.org>
410 */
411function resolve_mediaid($ns,&$page,&$exists){
412    $page   = resolve_id($ns,$page);
413    $file   = mediaFN($page);
414    $exists = @file_exists($file);
415}
416
417/**
418 * Returns a full page id
419 *
420 * @author Andreas Gohr <andi@splitbrain.org>
421 */
422function resolve_pageid($ns,&$page,&$exists){
423    global $conf;
424    $exists = false;
425
426    //keep hashlink if exists then clean both parts
427    if (strpos($page,'#')) {
428        list($page,$hash) = explode('#',$page,2);
429    } else {
430        $hash = '';
431    }
432    $hash = cleanID($hash);
433    $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
434
435    // get filename (calls clean itself)
436    $file = wikiFN($page);
437
438    // if ends with colon or slash we have a namespace link
439    if(in_array(substr($page,-1), array(':', ';')) ||
440       ($conf['useslash'] && substr($page,-1) == '/')){
441        if(page_exists($page.$conf['start'])){
442            // start page inside namespace
443            $page = $page.$conf['start'];
444            $exists = true;
445        }elseif(page_exists($page.noNS(cleanID($page)))){
446            // page named like the NS inside the NS
447            $page = $page.noNS(cleanID($page));
448            $exists = true;
449        }elseif(page_exists($page)){
450            // page like namespace exists
451            $page = $page;
452            $exists = true;
453        }else{
454            // fall back to default
455            $page = $page.$conf['start'];
456        }
457    }else{
458        //check alternative plural/nonplural form
459        if(!@file_exists($file)){
460            if( $conf['autoplural'] ){
461                if(substr($page,-1) == 's'){
462                    $try = substr($page,0,-1);
463                }else{
464                    $try = $page.'s';
465                }
466                if(page_exists($try)){
467                    $page   = $try;
468                    $exists = true;
469                }
470            }
471        }else{
472            $exists = true;
473        }
474    }
475
476    // now make sure we have a clean page
477    $page = cleanID($page);
478
479    //add hash if any
480    if(!empty($hash)) $page .= '#'.$hash;
481}
482
483/**
484 * Returns the name of a cachefile from given data
485 *
486 * The needed directory is created by this function!
487 *
488 * @author Andreas Gohr <andi@splitbrain.org>
489 *
490 * @param string $data  This data is used to create a unique md5 name
491 * @param string $ext   This is appended to the filename if given
492 * @return string       The filename of the cachefile
493 */
494function getCacheName($data,$ext=''){
495    global $conf;
496    $md5  = md5($data);
497    $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
498    io_makeFileDir($file);
499    return $file;
500}
501
502/**
503 * Checks a pageid against $conf['hidepages']
504 *
505 * @author Andreas Gohr <gohr@cosmocode.de>
506 */
507function isHiddenPage($id){
508    global $conf;
509    global $ACT;
510    if(empty($conf['hidepages'])) return false;
511    if($ACT == 'admin') return false;
512
513    if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){
514        return true;
515    }
516    return false;
517}
518
519/**
520 * Reverse of isHiddenPage
521 *
522 * @author Andreas Gohr <gohr@cosmocode.de>
523 */
524function isVisiblePage($id){
525    return !isHiddenPage($id);
526}
527
528/**
529 * Format an id for output to a user
530 *
531 * Namespaces are denoted by a trailing “:*”. The root namespace is
532 * “*”. Output is escaped.
533 *
534 * @author Adrian Lang <lang@cosmocode.de>
535 */
536
537function prettyprint_id($id) {
538    if (!$id || $id === ':') {
539        return '*';
540    }
541    if ((substr($id, -1, 1) === ':')) {
542        $id .= '*';
543    }
544    return hsc($id);
545}
546
547/**
548 * Encode a UTF-8 filename to use on any filesystem
549 *
550 * Uses the 'fnencode' option to determine encoding
551 *
552 * When the second parameter is true the string will
553 * be encoded only if non ASCII characters are detected -
554 * This makes it safe to run it multiple times on the
555 * same string (default is true)
556 *
557 * @author Andreas Gohr <andi@splitbrain.org>
558 * @see    urlencode
559 */
560function utf8_encodeFN($file,$safe=true){
561    global $conf;
562    if($conf['fnencode'] == 'utf-8') return $file;
563
564    if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){
565        return $file;
566    }
567
568    if($conf['fnencode'] == 'safe'){
569        return SafeFN::encode($file);
570    }
571
572    $file = urlencode($file);
573    $file = str_replace('%2F','/',$file);
574    return $file;
575}
576
577/**
578 * Decode a filename back to UTF-8
579 *
580 * Uses the 'fnencode' option to determine encoding
581 *
582 * @author Andreas Gohr <andi@splitbrain.org>
583 * @see    urldecode
584 */
585function utf8_decodeFN($file){
586    global $conf;
587    if($conf['fnencode'] == 'utf-8') return $file;
588
589    if($conf['fnencode'] == 'safe'){
590        return SafeFN::decode($file);
591    }
592
593    return urldecode($file);
594}
595
596