xref: /dokuwiki/inc/pageutils.php (revision 2398a2b54196667d6659d3d0489212b271c43703)
1<?php
2/**
3 * Utilities for handling pagenames
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 * @todo       Combine similar functions like {wiki,media,meta}FN()
8 */
9
10/**
11 * Fetch the an ID from request
12 *
13 * Uses either standard $_REQUEST variable or extracts it from
14 * the full request URI when userewrite is set to 2
15 *
16 * For $param='id' $conf['start'] is returned if no id was found.
17 * If the second parameter is true (default) the ID is cleaned.
18 *
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21function getID($param='id',$clean=true){
22    global $conf;
23
24    $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null;
25
26    //construct page id from request URI
27    if(empty($id) && $conf['userewrite'] == 2){
28        $request = $_SERVER['REQUEST_URI'];
29        $script = '';
30
31        //get the script URL
32        if($conf['basedir']){
33            $relpath = '';
34            if($param != 'id') {
35                $relpath = 'lib/exe/';
36            }
37            $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']);
38
39        }elseif($_SERVER['PATH_INFO']){
40            $request = $_SERVER['PATH_INFO'];
41        }elseif($_SERVER['SCRIPT_NAME']){
42            $script = $_SERVER['SCRIPT_NAME'];
43        }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
44            $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
45                    $_SERVER['SCRIPT_FILENAME']);
46            $script = '/'.$script;
47        }
48
49        //clean script and request (fixes a windows problem)
50        $script  = preg_replace('/\/\/+/','/',$script);
51        $request = preg_replace('/\/\/+/','/',$request);
52
53        //remove script URL and Querystring to gain the id
54        if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
55            $id = preg_replace ('/\?.*/','',$match[1]);
56        }
57        $id = urldecode($id);
58        //strip leading slashes
59        $id = preg_replace('!^/+!','',$id);
60    }
61
62    // Namespace autolinking from URL
63    if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){
64        if(page_exists($id.$conf['start'])){
65            // start page inside namespace
66            $id = $id.$conf['start'];
67        }elseif(page_exists($id.noNS(cleanID($id)))){
68            // page named like the NS inside the NS
69            $id = $id.noNS(cleanID($id));
70        }elseif(page_exists($id)){
71            // page like namespace exists
72            $id = substr($id,0,-1);
73        }else{
74            // fall back to default
75            $id = $id.$conf['start'];
76        }
77        send_redirect(wl($id,'',true));
78    }
79
80    if($clean) $id = cleanID($id);
81    if(empty($id) && $param=='id') $id = $conf['start'];
82
83    return $id;
84}
85
86/**
87 * Remove unwanted chars from ID
88 *
89 * Cleans a given ID to only use allowed characters. Accented characters are
90 * converted to unaccented ones
91 *
92 * @author Andreas Gohr <andi@splitbrain.org>
93 * @param  string  $raw_id    The pageid to clean
94 * @param  boolean $ascii     Force ASCII
95 * @param  boolean $media     Allow leading or trailing _ for media files
96 */
97function cleanID($raw_id,$ascii=false,$media=false){
98    global $conf;
99    static $sepcharpat = null;
100
101    global $cache_cleanid;
102    $cache = & $cache_cleanid;
103
104    // check if it's already in the memory cache
105    if (isset($cache[(string)$raw_id])) {
106        return $cache[(string)$raw_id];
107    }
108
109    $sepchar = $conf['sepchar'];
110    if($sepcharpat == null) // build string only once to save clock cycles
111        $sepcharpat = '#\\'.$sepchar.'+#';
112
113    $id = trim((string)$raw_id);
114    $id = utf8_strtolower($id);
115
116    //alternative namespace seperator
117    $id = strtr($id,';',':');
118    if($conf['useslash']){
119        $id = strtr($id,'/',':');
120    }else{
121        $id = strtr($id,'/',$sepchar);
122    }
123
124    if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
125    if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
126
127    //remove specials
128    $id = utf8_stripspecials($id,$sepchar,'\*');
129
130    if($ascii) $id = utf8_strip($id);
131
132    //clean up
133    $id = preg_replace($sepcharpat,$sepchar,$id);
134    $id = preg_replace('#:+#',':',$id);
135    $id = ($media ? trim($id,':.-') : trim($id,':._-'));
136    $id = preg_replace('#:[:\._\-]+#',':',$id);
137    $id = preg_replace('#[:\._\-]+:#',':',$id);
138
139    $cache[(string)$raw_id] = $id;
140    return($id);
141}
142
143/**
144 * Return namespacepart of a wiki ID
145 *
146 * @author Andreas Gohr <andi@splitbrain.org>
147 */
148function getNS($id){
149    $pos = strrpos((string)$id,':');
150    if($pos!==false){
151        return substr((string)$id,0,$pos);
152    }
153    return false;
154}
155
156/**
157 * Returns the ID without the namespace
158 *
159 * @author Andreas Gohr <andi@splitbrain.org>
160 */
161function noNS($id) {
162    $pos = strrpos($id, ':');
163    if ($pos!==false) {
164        return substr($id, $pos+1);
165    } else {
166        return $id;
167    }
168}
169
170/**
171 * Returns the current namespace
172 *
173 * @author Nathan Fritz <fritzn@crown.edu>
174 */
175function curNS($id) {
176    return noNS(getNS($id));
177}
178
179/**
180 * Returns the ID without the namespace or current namespace for 'start' pages
181 *
182 * @author Nathan Fritz <fritzn@crown.edu>
183 */
184function noNSorNS($id) {
185    global $conf;
186
187    $p = noNS($id);
188    if ($p == $conf['start'] || $p == false) {
189        $p = curNS($id);
190        if ($p == false) {
191            return $conf['start'];
192        }
193    }
194    return $p;
195}
196
197/**
198 * Creates a XHTML valid linkid from a given headline title
199 *
200 * @param string  $title   The headline title
201 * @param array   $check   Existing IDs (title => number)
202 * @author Andreas Gohr <andi@splitbrain.org>
203 */
204function sectionID($title,&$check) {
205    $title = str_replace(array(':','.'),'',cleanID($title));
206    $new = ltrim($title,'0123456789_-');
207    if(empty($new)){
208        $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline
209    }else{
210        $title = $new;
211    }
212
213    if(is_array($check)){
214        // make sure tiles are unique
215        if (!array_key_exists ($title,$check)) {
216           $check[$title] = 0;
217        } else {
218           $title .= ++ $check[$title];
219        }
220    }
221
222    return $title;
223}
224
225
226/**
227 * Wiki page existence check
228 *
229 * parameters as for wikiFN
230 *
231 * @author Chris Smith <chris@jalakai.co.uk>
232 */
233function page_exists($id,$rev='',$clean=true) {
234    return @file_exists(wikiFN($id,$rev,$clean));
235}
236
237/**
238 * returns the full path to the datafile specified by ID and optional revision
239 *
240 * The filename is URL encoded to protect Unicode chars
241 *
242 * @param  $raw_id  string   id of wikipage
243 * @param  $rev     string   page revision, empty string for current
244 * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
245 *                           when $id is guaranteed to have been cleaned already.
246 *
247 * @author Andreas Gohr <andi@splitbrain.org>
248 */
249function wikiFN($raw_id,$rev='',$clean=true){
250    global $conf;
251
252    global $cache_wikifn;
253    $cache = & $cache_wikifn;
254
255    if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
256        return $cache[$raw_id][$rev];
257    }
258
259    $id = $raw_id;
260
261    if ($clean) $id = cleanID($id);
262    $id = str_replace(':','/',$id);
263    if(empty($rev)){
264        $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
265    }else{
266        $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
267        if($conf['compression']){
268            //test for extensions here, we want to read both compressions
269            if (@file_exists($fn . '.gz')){
270                $fn .= '.gz';
271            }else if(@file_exists($fn . '.bz2')){
272                $fn .= '.bz2';
273            }else{
274                //file doesnt exist yet, so we take the configured extension
275                $fn .= '.' . $conf['compression'];
276            }
277        }
278    }
279
280    if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); }
281    $cache[$raw_id][$rev] = $fn;
282    return $fn;
283}
284
285/**
286 * Returns the full path to the file for locking the page while editing.
287 *
288 * @author Ben Coburn <btcoburn@silicodon.net>
289 */
290function wikiLockFN($id) {
291    global $conf;
292    return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
293}
294
295
296/**
297 * returns the full path to the meta file specified by ID and extension
298 *
299 * @author Steven Danz <steven-danz@kc.rr.com>
300 */
301function metaFN($id,$ext){
302    global $conf;
303    $id = cleanID($id);
304    $id = str_replace(':','/',$id);
305    $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
306    return $fn;
307}
308
309/**
310 * returns the full path to the media's meta file specified by ID and extension
311 *
312 * @author Kate Arzamastseva <pshns@ukr.net>
313 */
314function mediaMetaFN($id,$ext){
315    global $conf;
316    $id = cleanID($id);
317    $id = str_replace(':','/',$id);
318    $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext;
319    return $fn;
320}
321
322/**
323 * returns an array of full paths to all metafiles of a given ID
324 *
325 * @author Esther Brunner <esther@kaffeehaus.ch>
326 * @author Michael Hamann <michael@content-space.de>
327 */
328function metaFiles($id){
329    $basename = metaFN($id, '');
330    $files    = glob($basename.'.*', GLOB_MARK);
331    // filter files like foo.bar.meta when $id == 'foo'
332    return    $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array();
333}
334
335/**
336 * returns the full path to the mediafile specified by ID
337 *
338 * The filename is URL encoded to protect Unicode chars
339 *
340 * @author Andreas Gohr <andi@splitbrain.org>
341 * @author Kate Arzamastseva <pshns@ukr.net>
342 */
343function mediaFN($id, $rev=''){
344    global $conf;
345    $id = cleanID($id);
346    $id = str_replace(':','/',$id);
347    if(empty($rev)){
348        $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
349    }else{
350        $ext = mimetype($id);
351        $name = substr($id,0, -1*strlen($ext[0])-1);
352        $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]);
353    }
354    return $fn;
355}
356
357/**
358 * Returns the full filepath to a localized file if local
359 * version isn't found the english one is returned
360 *
361 * @param  string $id  The id of the local file
362 * @param  string $ext The file extension (usually txt)
363 * @author Andreas Gohr <andi@splitbrain.org>
364 */
365function localeFN($id,$ext='txt'){
366    global $conf;
367    $file = DOKU_CONF.'/lang/'.$conf['lang'].'/'.$id.'.'.$ext;
368    if(!@file_exists($file)){
369        $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext;
370        if(!@file_exists($file)){
371            //fall back to english
372            $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext;
373        }
374    }
375    return $file;
376}
377
378/**
379 * Resolve relative paths in IDs
380 *
381 * Do not call directly use resolve_mediaid or resolve_pageid
382 * instead
383 *
384 * Partyly based on a cleanPath function found at
385 * http://www.php.net/manual/en/function.realpath.php#57016
386 *
387 * @author <bart at mediawave dot nl>
388 */
389function resolve_id($ns,$id,$clean=true){
390    global $conf;
391
392    // some pre cleaning for useslash:
393    if($conf['useslash']) $id = str_replace('/',':',$id);
394
395    // if the id starts with a dot we need to handle the
396    // relative stuff
397    if($id{0} == '.'){
398        // normalize initial dots without a colon
399        $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
400        // prepend the current namespace
401        $id = $ns.':'.$id;
402
403        // cleanup relatives
404        $result = array();
405        $pathA  = explode(':', $id);
406        if (!$pathA[0]) $result[] = '';
407        foreach ($pathA AS $key => $dir) {
408            if ($dir == '..') {
409                if (end($result) == '..') {
410                    $result[] = '..';
411                } elseif (!array_pop($result)) {
412                    $result[] = '..';
413                }
414            } elseif ($dir && $dir != '.') {
415                $result[] = $dir;
416            }
417        }
418        if (!end($pathA)) $result[] = '';
419        $id = implode(':', $result);
420    }elseif($ns !== false && strpos($id,':') === false){
421        //if link contains no namespace. add current namespace (if any)
422        $id = $ns.':'.$id;
423    }
424
425    if($clean) $id = cleanID($id);
426    return $id;
427}
428
429/**
430 * Returns a full media id
431 *
432 * @author Andreas Gohr <andi@splitbrain.org>
433 */
434function resolve_mediaid($ns,&$page,&$exists){
435    $page   = resolve_id($ns,$page);
436    $file   = mediaFN($page);
437    $exists = @file_exists($file);
438}
439
440/**
441 * Returns a full page id
442 *
443 * @author Andreas Gohr <andi@splitbrain.org>
444 */
445function resolve_pageid($ns,&$page,&$exists){
446    global $conf;
447    global $ID;
448    $exists = false;
449
450    //empty address should point to current page
451    if ($page === "") {
452        $page = $ID;
453    }
454
455    //keep hashlink if exists then clean both parts
456    if (strpos($page,'#')) {
457        list($page,$hash) = explode('#',$page,2);
458    } else {
459        $hash = '';
460    }
461    $hash = cleanID($hash);
462    $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
463
464    // get filename (calls clean itself)
465    $file = wikiFN($page);
466
467    // if ends with colon or slash we have a namespace link
468    if(in_array(substr($page,-1), array(':', ';')) ||
469       ($conf['useslash'] && substr($page,-1) == '/')){
470        if(page_exists($page.$conf['start'])){
471            // start page inside namespace
472            $page = $page.$conf['start'];
473            $exists = true;
474        }elseif(page_exists($page.noNS(cleanID($page)))){
475            // page named like the NS inside the NS
476            $page = $page.noNS(cleanID($page));
477            $exists = true;
478        }elseif(page_exists($page)){
479            // page like namespace exists
480            $page = $page;
481            $exists = true;
482        }else{
483            // fall back to default
484            $page = $page.$conf['start'];
485        }
486    }else{
487        //check alternative plural/nonplural form
488        if(!@file_exists($file)){
489            if( $conf['autoplural'] ){
490                if(substr($page,-1) == 's'){
491                    $try = substr($page,0,-1);
492                }else{
493                    $try = $page.'s';
494                }
495                if(page_exists($try)){
496                    $page   = $try;
497                    $exists = true;
498                }
499            }
500        }else{
501            $exists = true;
502        }
503    }
504
505    // now make sure we have a clean page
506    $page = cleanID($page);
507
508    //add hash if any
509    if(!empty($hash)) $page .= '#'.$hash;
510}
511
512/**
513 * Returns the name of a cachefile from given data
514 *
515 * The needed directory is created by this function!
516 *
517 * @author Andreas Gohr <andi@splitbrain.org>
518 *
519 * @param string $data  This data is used to create a unique md5 name
520 * @param string $ext   This is appended to the filename if given
521 * @return string       The filename of the cachefile
522 */
523function getCacheName($data,$ext=''){
524    global $conf;
525    $md5  = md5($data);
526    $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
527    io_makeFileDir($file);
528    return $file;
529}
530
531/**
532 * Checks a pageid against $conf['hidepages']
533 *
534 * @author Andreas Gohr <gohr@cosmocode.de>
535 */
536function isHiddenPage($id){
537    global $conf;
538    global $ACT;
539    if(empty($conf['hidepages'])) return false;
540    if($ACT == 'admin') return false;
541
542    if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){
543        return true;
544    }
545    return false;
546}
547
548/**
549 * Reverse of isHiddenPage
550 *
551 * @author Andreas Gohr <gohr@cosmocode.de>
552 */
553function isVisiblePage($id){
554    return !isHiddenPage($id);
555}
556
557/**
558 * Format an id for output to a user
559 *
560 * Namespaces are denoted by a trailing “:*”. The root namespace is
561 * “*”. Output is escaped.
562 *
563 * @author Adrian Lang <lang@cosmocode.de>
564 */
565
566function prettyprint_id($id) {
567    if (!$id || $id === ':') {
568        return '*';
569    }
570    if ((substr($id, -1, 1) === ':')) {
571        $id .= '*';
572    }
573    return hsc($id);
574}
575
576/**
577 * Encode a UTF-8 filename to use on any filesystem
578 *
579 * Uses the 'fnencode' option to determine encoding
580 *
581 * When the second parameter is true the string will
582 * be encoded only if non ASCII characters are detected -
583 * This makes it safe to run it multiple times on the
584 * same string (default is true)
585 *
586 * @author Andreas Gohr <andi@splitbrain.org>
587 * @see    urlencode
588 */
589function utf8_encodeFN($file,$safe=true){
590    global $conf;
591    if($conf['fnencode'] == 'utf-8') return $file;
592
593    if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){
594        return $file;
595    }
596
597    if($conf['fnencode'] == 'safe'){
598        return SafeFN::encode($file);
599    }
600
601    $file = urlencode($file);
602    $file = str_replace('%2F','/',$file);
603    return $file;
604}
605
606/**
607 * Decode a filename back to UTF-8
608 *
609 * Uses the 'fnencode' option to determine encoding
610 *
611 * @author Andreas Gohr <andi@splitbrain.org>
612 * @see    urldecode
613 */
614function utf8_decodeFN($file){
615    global $conf;
616    if($conf['fnencode'] == 'utf-8') return $file;
617
618    if($conf['fnencode'] == 'safe'){
619        return SafeFN::decode($file);
620    }
621
622    return urldecode($file);
623}
624
625