xref: /dokuwiki/inc/pageutils.php (revision a5752066bd5ea83ac9b5dbb512f5768819a3437b)
1<?php
2
3/**
4 * Utilities for handling pagenames
5 *
6 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
7 * @author     Andreas Gohr <andi@splitbrain.org>
8 * @todo       Combine similar functions like {wiki,media,meta}FN()
9 */
10
11use dokuwiki\ChangeLog\MediaChangeLog;
12use dokuwiki\ChangeLog\PageChangeLog;
13use dokuwiki\File\MediaResolver;
14use dokuwiki\File\PageResolver;
15
16/**
17 * Fetch the an ID from request
18 *
19 * Uses either standard $_REQUEST variable or extracts it from
20 * the full request URI when userewrite is set to 2
21 *
22 * For $param='id' $conf['start'] is returned if no id was found.
23 * If the second parameter is true (default) the ID is cleaned.
24 *
25 * @author Andreas Gohr <andi@splitbrain.org>
26 *
27 * @param string $param  the $_REQUEST variable name, default 'id'
28 * @param bool   $clean  if true, ID is cleaned
29 * @return string
30 */
31function getID($param = 'id', $clean = true)
32{
33    /** @var Input $INPUT */
34    global $INPUT;
35    global $conf;
36    global $ACT;
37
38    $id = $INPUT->str($param);
39
40    //construct page id from request URI
41    if (empty($id) && $conf['userewrite'] == 2) {
42        $request = $INPUT->server->str('REQUEST_URI');
43        $script = '';
44
45        //get the script URL
46        if ($conf['basedir']) {
47            $relpath = '';
48            if ($param != 'id') {
49                $relpath = 'lib/exe/';
50            }
51            $script = $conf['basedir'] . $relpath .
52                \dokuwiki\Utf8\PhpString::basename($INPUT->server->str('SCRIPT_FILENAME'));
53        } elseif ($INPUT->server->str('PATH_INFO')) {
54            $request = $INPUT->server->str('PATH_INFO');
55        } elseif ($INPUT->server->str('SCRIPT_NAME')) {
56            $script = $INPUT->server->str('SCRIPT_NAME');
57        } elseif ($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')) {
58            $script = preg_replace(
59                '/^' . preg_quote($INPUT->server->str('DOCUMENT_ROOT'), '/') . '/',
60                '',
61                $INPUT->server->str('SCRIPT_FILENAME')
62            );
63            $script = '/' . $script;
64        }
65
66        //clean script and request (fixes a windows problem)
67        $script  = preg_replace('/\/\/+/', '/', $script);
68        $request = preg_replace('/\/\/+/', '/', $request);
69
70        //remove script URL and Querystring to gain the id
71        if (preg_match('/^' . preg_quote($script, '/') . '(.*)/', $request, $match)) {
72            $id = preg_replace('/\?.*/', '', $match[1]);
73        }
74        $id = urldecode($id);
75        //strip leading slashes
76        $id = preg_replace('!^/+!', '', $id);
77    }
78
79    // Namespace autolinking from URL
80    if (substr($id, -1) == ':' || ($conf['useslash'] && substr($id, -1) == '/')) {
81        if (page_exists($id . $conf['start'])) {
82            // start page inside namespace
83            $id = $id . $conf['start'];
84        } elseif (page_exists($id . noNS(cleanID($id)))) {
85            // page named like the NS inside the NS
86            $id = $id . noNS(cleanID($id));
87        } elseif (page_exists($id)) {
88            // page like namespace exists
89            $id = substr($id, 0, -1);
90        } else {
91            // fall back to default
92            $id = $id . $conf['start'];
93        }
94        if (isset($ACT) && $ACT === 'show') {
95            $urlParameters = $_GET;
96            if (isset($urlParameters['id'])) {
97                unset($urlParameters['id']);
98            }
99            send_redirect(wl($id, $urlParameters, true, '&'));
100        }
101    }
102    if ($clean) $id = cleanID($id);
103    if ($id === '' && $param == 'id') $id = $conf['start'];
104
105    return $id;
106}
107
108/**
109 * Remove unwanted chars from ID
110 *
111 * Cleans a given ID to only use allowed characters. Accented characters are
112 * converted to unaccented ones
113 *
114 * @author Andreas Gohr <andi@splitbrain.org>
115 *
116 * @param  string  $raw_id    The pageid to clean
117 * @param  boolean $ascii     Force ASCII
118 * @return string cleaned id
119 */
120function cleanID($raw_id, $ascii = false)
121{
122    global $conf;
123    static $sepcharpat = null;
124
125    global $cache_cleanid;
126    $cache = & $cache_cleanid;
127
128    // check if it's already in the memory cache
129    if (!$ascii && isset($cache[(string)$raw_id])) {
130        return $cache[(string)$raw_id];
131    }
132
133    $sepchar = $conf['sepchar'];
134    if ($sepcharpat == null) // build string only once to save clock cycles
135        $sepcharpat = '#\\' . $sepchar . '+#';
136
137    $id = trim((string)$raw_id);
138    $id = \dokuwiki\Utf8\PhpString::strtolower($id);
139
140    //alternative namespace seperator
141    if ($conf['useslash']) {
142        $id = strtr($id, ';/', '::');
143    } else {
144        $id = strtr($id, ';/', ':' . $sepchar);
145    }
146
147    if ($conf['deaccent'] == 2 || $ascii) $id = \dokuwiki\Utf8\Clean::romanize($id);
148    if ($conf['deaccent'] || $ascii) $id = \dokuwiki\Utf8\Clean::deaccent($id, -1);
149
150    //remove specials
151    $id = \dokuwiki\Utf8\Clean::stripspecials($id, $sepchar, '\*');
152
153    if ($ascii) $id = \dokuwiki\Utf8\Clean::strip($id);
154
155    //clean up
156    $id = preg_replace($sepcharpat, $sepchar, $id);
157    $id = preg_replace('#:+#', ':', $id);
158    $id = trim($id, ':._-');
159    $id = preg_replace('#:[:\._\-]+#', ':', $id);
160    $id = preg_replace('#[:\._\-]+:#', ':', $id);
161
162    if (!$ascii) $cache[(string)$raw_id] = $id;
163    return($id);
164}
165
166/**
167 * Return namespacepart of a wiki ID
168 *
169 * @author Andreas Gohr <andi@splitbrain.org>
170 *
171 * @param string $id
172 * @return string|false the namespace part or false if the given ID has no namespace (root)
173 */
174function getNS($id)
175{
176    $pos = strrpos((string)$id, ':');
177    if ($pos !== false) {
178        return substr((string)$id, 0, $pos);
179    }
180    return false;
181}
182
183/**
184 * Returns the ID without the namespace
185 *
186 * @author Andreas Gohr <andi@splitbrain.org>
187 *
188 * @param string $id
189 * @return string
190 */
191function noNS($id)
192{
193    $pos = strrpos($id, ':');
194    if ($pos !== false) {
195        return substr($id, $pos + 1);
196    } else {
197        return $id;
198    }
199}
200
201/**
202 * Returns the current namespace
203 *
204 * @author Nathan Fritz <fritzn@crown.edu>
205 *
206 * @param string $id
207 * @return string
208 */
209function curNS($id)
210{
211    return noNS(getNS($id));
212}
213
214/**
215 * Returns the ID without the namespace or current namespace for 'start' pages
216 *
217 * @author Nathan Fritz <fritzn@crown.edu>
218 *
219 * @param string $id
220 * @return string
221 */
222function noNSorNS($id)
223{
224    global $conf;
225
226    $p = noNS($id);
227    if ($p === $conf['start'] || $p === false || $p === '') {
228        $p = curNS($id);
229        if ($p === false || $p === '') {
230            return $conf['start'];
231        }
232    }
233    return $p;
234}
235
236/**
237 * Creates a XHTML valid linkid from a given headline title
238 *
239 * @param string  $title   The headline title
240 * @param array|bool   $check   Existing IDs
241 * @return string the title
242 *
243 * @author Andreas Gohr <andi@splitbrain.org>
244 */
245function sectionID($title, &$check)
246{
247    $title = str_replace(array(':','.'), '', cleanID($title));
248    $new = ltrim($title, '0123456789_-');
249    if (empty($new)) {
250        $title = 'section' . preg_replace('/[^0-9]+/', '', $title); //keep numbers from headline
251    } else {
252        $title = $new;
253    }
254
255    if (is_array($check)) {
256        $suffix = 0;
257        $candidateTitle = $title;
258        while (in_array($candidateTitle, $check)) {
259            $candidateTitle = $title . ++$suffix;
260        }
261        $check [] = $candidateTitle;
262        return $candidateTitle;
263    } else {
264        return $title;
265    }
266}
267
268/**
269 * Wiki page existence check
270 *
271 * parameters as for wikiFN
272 *
273 * @author Chris Smith <chris@jalakai.co.uk>
274 *
275 * @param string $id page id
276 * @param string|int $rev empty or revision timestamp
277 * @param bool $clean flag indicating that $id should be cleaned (see wikiFN as well)
278 * @param bool $date_at
279 * @return bool exists?
280 */
281function page_exists($id, $rev = '', $clean = true, $date_at = false)
282{
283    if ($rev !== '' && $date_at) {
284        $pagelog = new PageChangeLog($id);
285        $pagelog_rev = $pagelog->getLastRevisionAt($rev);
286        if ($pagelog_rev !== false)
287            $rev = $pagelog_rev;
288    }
289    return file_exists(wikiFN($id, $rev, $clean));
290}
291
292/**
293 * Media existence check
294 *
295 * @param string $id page id
296 * @param string|int $rev empty or revision timestamp
297 * @param bool $clean flag indicating that $id should be cleaned (see mediaFN as well)
298 * @param bool $date_at
299 * @return bool exists?
300 */
301function media_exists($id, $rev = '', $clean = true, $date_at = false)
302{
303    if ($rev !== '' && $date_at) {
304        $changeLog = new MediaChangeLog($id);
305        $changelog_rev = $changeLog->getLastRevisionAt($rev);
306        if ($changelog_rev !== false) {
307            $rev = $changelog_rev;
308        }
309    }
310    return file_exists(mediaFN($id, $rev, $clean));
311}
312
313/**
314 * returns the full path to the datafile specified by ID and optional revision
315 *
316 * The filename is URL encoded to protect Unicode chars
317 *
318 * @param  $raw_id  string   id of wikipage
319 * @param  $rev     int|string   page revision, empty string for current
320 * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
321 *                           when $id is guaranteed to have been cleaned already.
322 * @return string full path
323 *
324 * @author Andreas Gohr <andi@splitbrain.org>
325 */
326function wikiFN($raw_id, $rev = '', $clean = true)
327{
328    global $conf;
329
330    global $cache_wikifn;
331    $cache = & $cache_wikifn;
332
333    $id = $raw_id;
334
335    if ($clean) $id = cleanID($id);
336    $id = str_replace(':', '/', $id);
337
338    if (isset($cache[$id]) && isset($cache[$id][$rev])) {
339        return $cache[$id][$rev];
340    }
341
342    if (empty($rev)) {
343        $fn = $conf['datadir'] . '/' . utf8_encodeFN($id) . '.txt';
344    } else {
345        $fn = $conf['olddir'] . '/' . utf8_encodeFN($id) . '.' . $rev . '.txt';
346        if ($conf['compression']) {
347            //test for extensions here, we want to read both compressions
348            if (file_exists($fn . '.gz')) {
349                $fn .= '.gz';
350            } elseif (file_exists($fn . '.bz2')) {
351                $fn .= '.bz2';
352            } else {
353                //file doesnt exist yet, so we take the configured extension
354                $fn .= '.' . $conf['compression'];
355            }
356        }
357    }
358
359    if (!isset($cache[$id])) {
360        $cache[$id] = array();
361    }
362    $cache[$id][$rev] = $fn;
363    return $fn;
364}
365
366/**
367 * Returns the full path to the file for locking the page while editing.
368 *
369 * @author Ben Coburn <btcoburn@silicodon.net>
370 *
371 * @param string $id page id
372 * @return string full path
373 */
374function wikiLockFN($id)
375{
376    global $conf;
377    return $conf['lockdir'] . '/' . md5(cleanID($id)) . '.lock';
378}
379
380
381/**
382 * returns the full path to the meta file specified by ID and extension
383 *
384 * @author Steven Danz <steven-danz@kc.rr.com>
385 *
386 * @param string $id   page id
387 * @param string $ext  file extension
388 * @return string full path
389 */
390function metaFN($id, $ext)
391{
392    global $conf;
393    $id = cleanID($id);
394    $id = str_replace(':', '/', $id);
395    $fn = $conf['metadir'] . '/' . utf8_encodeFN($id) . $ext;
396    return $fn;
397}
398
399/**
400 * returns the full path to the media's meta file specified by ID and extension
401 *
402 * @author Kate Arzamastseva <pshns@ukr.net>
403 *
404 * @param string $id   media id
405 * @param string $ext  extension of media
406 * @return string
407 */
408function mediaMetaFN($id, $ext)
409{
410    global $conf;
411    $id = cleanID($id);
412    $id = str_replace(':', '/', $id);
413    $fn = $conf['mediametadir'] . '/' . utf8_encodeFN($id) . $ext;
414    return $fn;
415}
416
417/**
418 * returns an array of full paths to all metafiles of a given ID
419 *
420 * @author Esther Brunner <esther@kaffeehaus.ch>
421 * @author Michael Hamann <michael@content-space.de>
422 *
423 * @param string $id page id
424 * @return array
425 */
426function metaFiles($id)
427{
428    $basename = metaFN($id, '');
429    $files    = glob($basename . '.*', GLOB_MARK);
430    // filter files like foo.bar.meta when $id == 'foo'
431    return    $files ? preg_grep('/^' . preg_quote($basename, '/') . '\.[^.\/]*$/u', $files) : array();
432}
433
434/**
435 * returns the full path to the mediafile specified by ID
436 *
437 * The filename is URL encoded to protect Unicode chars
438 *
439 * @author Andreas Gohr <andi@splitbrain.org>
440 * @author Kate Arzamastseva <pshns@ukr.net>
441 *
442 * @param string     $id  media id
443 * @param string|int $rev empty string or revision timestamp
444 * @param bool $clean
445 *
446 * @return string full path
447 */
448function mediaFN($id, $rev = '', $clean = true)
449{
450    global $conf;
451    if ($clean) $id = cleanID($id);
452    $id = str_replace(':', '/', $id);
453    if (empty($rev)) {
454        $fn = $conf['mediadir'] . '/' . utf8_encodeFN($id);
455    } else {
456        $ext = mimetype($id);
457        $name = substr($id, 0, -1 * strlen($ext[0]) - 1);
458        $fn = $conf['mediaolddir'] . '/' . utf8_encodeFN($name . '.' . ( (int) $rev ) . '.' . $ext[0]);
459    }
460    return $fn;
461}
462
463/**
464 * Returns the full filepath to a localized file if local
465 * version isn't found the english one is returned
466 *
467 * @param  string $id  The id of the local file
468 * @param  string $ext The file extension (usually txt)
469 * @return string full filepath to localized file
470 *
471 * @author Andreas Gohr <andi@splitbrain.org>
472 */
473function localeFN($id, $ext = 'txt')
474{
475    global $conf;
476    $file = DOKU_CONF . 'lang/' . $conf['lang'] . '/' . $id . '.' . $ext;
477    if (!file_exists($file)) {
478        $file = DOKU_INC . 'inc/lang/' . $conf['lang'] . '/' . $id . '.' . $ext;
479        if (!file_exists($file)) {
480            //fall back to english
481            $file = DOKU_INC . 'inc/lang/en/' . $id . '.' . $ext;
482        }
483    }
484    return $file;
485}
486
487/**
488 * Resolve relative paths in IDs
489 *
490 * Do not call directly use resolve_mediaid or resolve_pageid
491 * instead
492 *
493 * Partyly based on a cleanPath function found at
494 * http://php.net/manual/en/function.realpath.php#57016
495 *
496 * @deprecated 2020-09-30
497 * @param string $ns     namespace which is context of id
498 * @param string $id     relative id
499 * @param bool   $clean  flag indicating that id should be cleaned
500 * @return string
501 */
502function resolve_id($ns, $id, $clean = true)
503{
504    global $conf;
505    dbg_deprecated(\dokuwiki\File\Resolver::class . ' and its children');
506
507    // some pre cleaning for useslash:
508    if ($conf['useslash']) $id = str_replace('/', ':', $id);
509
510    // if the id starts with a dot we need to handle the
511    // relative stuff
512    if ($id && $id[0] == '.') {
513        // normalize initial dots without a colon
514        $id = preg_replace('/^((\.+:)*)(\.+)(?=[^:\.])/', '\1\3:', $id);
515        // prepend the current namespace
516        $id = $ns . ':' . $id;
517
518        // cleanup relatives
519        $result = array();
520        $pathA  = explode(':', $id);
521        if (!$pathA[0]) $result[] = '';
522        foreach ($pathA as $key => $dir) {
523            if ($dir == '..') {
524                if (end($result) == '..') {
525                    $result[] = '..';
526                } elseif (!array_pop($result)) {
527                    $result[] = '..';
528                }
529            } elseif ($dir && $dir != '.') {
530                $result[] = $dir;
531            }
532        }
533        if (!end($pathA)) $result[] = '';
534        $id = implode(':', $result);
535    } elseif ($ns !== false && strpos($id, ':') === false) {
536        //if link contains no namespace. add current namespace (if any)
537        $id = $ns . ':' . $id;
538    }
539
540    if ($clean) $id = cleanID($id);
541    return $id;
542}
543
544/**
545 * Returns a full media id
546 *
547 * @param string $ns namespace which is context of id
548 * @param string &$media (reference) relative media id, updated to resolved id
549 * @param bool &$exists (reference) updated with existance of media
550 * @param int|string $rev
551 * @param bool $date_at
552 * @deprecated 2020-09-30
553 */
554function resolve_mediaid($ns, &$media, &$exists, $rev = '', $date_at = false)
555{
556    dbg_deprecated(MediaResolver::class);
557    $resolver = new MediaResolver("$ns:deprecated");
558    $media = $resolver->resolveId($media, $rev, $date_at);
559    $exists = media_exists($media, $rev, false, $date_at);
560}
561
562/**
563 * Returns a full page id
564 *
565 * @deprecated 2020-09-30
566 * @param string $ns namespace which is context of id
567 * @param string &$page (reference) relative page id, updated to resolved id
568 * @param bool &$exists (reference) updated with existance of media
569 * @param string $rev
570 * @param bool $date_at
571 */
572function resolve_pageid($ns, &$page, &$exists, $rev = '', $date_at = false)
573{
574    dbg_deprecated(PageResolver::class);
575
576    global $ID;
577    if (getNS($ID) == $ns) {
578        $context = $ID; // this is usually the case
579    } else {
580        $context = "$ns:deprecated"; // only used when a different context namespace was given
581    }
582
583    $resolver = new PageResolver($context);
584    $page = $resolver->resolveId($page, $rev, $date_at);
585    $exists = page_exists($page, $rev, false, $date_at);
586}
587
588/**
589 * Returns the name of a cachefile from given data
590 *
591 * The needed directory is created by this function!
592 *
593 * @author Andreas Gohr <andi@splitbrain.org>
594 *
595 * @param string $data  This data is used to create a unique md5 name
596 * @param string $ext   This is appended to the filename if given
597 * @return string       The filename of the cachefile
598 */
599function getCacheName($data, $ext = '')
600{
601    global $conf;
602    $md5  = md5($data);
603    $file = $conf['cachedir'] . '/' . $md5[0] . '/' . $md5 . $ext;
604    io_makeFileDir($file);
605    return $file;
606}
607
608/**
609 * Checks a pageid against $conf['hidepages']
610 *
611 * @author Andreas Gohr <gohr@cosmocode.de>
612 *
613 * @param string $id page id
614 * @return bool
615 */
616function isHiddenPage($id)
617{
618    $data = array(
619        'id' => $id,
620        'hidden' => false
621    );
622    \dokuwiki\Extension\Event::createAndTrigger('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage');
623    return $data['hidden'];
624}
625
626/**
627 * callback checks if page is hidden
628 *
629 * @param array $data event data    - see isHiddenPage()
630 */
631function _isHiddenPage(&$data)
632{
633    global $conf;
634    global $ACT;
635
636    if ($data['hidden']) return;
637    if (empty($conf['hidepages'])) return;
638    if ($ACT == 'admin') return;
639
640    if (preg_match('/' . $conf['hidepages'] . '/ui', ':' . $data['id'])) {
641        $data['hidden'] = true;
642    }
643}
644
645/**
646 * Reverse of isHiddenPage
647 *
648 * @author Andreas Gohr <gohr@cosmocode.de>
649 *
650 * @param string $id page id
651 * @return bool
652 */
653function isVisiblePage($id)
654{
655    return !isHiddenPage($id);
656}
657
658/**
659 * Format an id for output to a user
660 *
661 * Namespaces are denoted by a trailing “:*”. The root namespace is
662 * “*”. Output is escaped.
663 *
664 * @author Adrian Lang <lang@cosmocode.de>
665 *
666 * @param string $id page id
667 * @return string
668 */
669function prettyprint_id($id)
670{
671    if (!$id || $id === ':') {
672        return '*';
673    }
674    if ((substr($id, -1, 1) === ':')) {
675        $id .= '*';
676    }
677    return hsc($id);
678}
679
680/**
681 * Encode a UTF-8 filename to use on any filesystem
682 *
683 * Uses the 'fnencode' option to determine encoding
684 *
685 * When the second parameter is true the string will
686 * be encoded only if non ASCII characters are detected -
687 * This makes it safe to run it multiple times on the
688 * same string (default is true)
689 *
690 * @author Andreas Gohr <andi@splitbrain.org>
691 * @see    urlencode
692 *
693 * @param string $file file name
694 * @param bool   $safe if true, only encoded when non ASCII characters detected
695 * @return string
696 */
697function utf8_encodeFN($file, $safe = true)
698{
699    global $conf;
700    if ($conf['fnencode'] == 'utf-8') return $file;
701
702    if ($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#', $file)) {
703        return $file;
704    }
705
706    if ($conf['fnencode'] == 'safe') {
707        return SafeFN::encode($file);
708    }
709
710    $file = urlencode($file);
711    $file = str_replace('%2F', '/', $file);
712    return $file;
713}
714
715/**
716 * Decode a filename back to UTF-8
717 *
718 * Uses the 'fnencode' option to determine encoding
719 *
720 * @author Andreas Gohr <andi@splitbrain.org>
721 * @see    urldecode
722 *
723 * @param string $file file name
724 * @return string
725 */
726function utf8_decodeFN($file)
727{
728    global $conf;
729    if ($conf['fnencode'] == 'utf-8') return $file;
730
731    if ($conf['fnencode'] == 'safe') {
732        return SafeFN::decode($file);
733    }
734
735    return urldecode($file);
736}
737
738/**
739 * Find a page in the current namespace (determined from $ID) or any
740 * higher namespace that can be accessed by the current user,
741 * this condition can be overriden by an optional parameter.
742 *
743 * Used for sidebars, but can be used other stuff as well
744 *
745 * @todo   add event hook
746 *
747 * @param  string $page the pagename you're looking for
748 * @param bool $useacl only return pages readable by the current user, false to ignore ACLs
749 * @return false|string the full page id of the found page, false if any
750 */
751function page_findnearest($page, $useacl = true)
752{
753    if ((string) $page === '') return false;
754    global $ID;
755
756    $ns = $ID;
757    do {
758        $ns = getNS($ns);
759        $pageid = cleanID("$ns:$page");
760        if (page_exists($pageid) && (!$useacl || auth_quickaclcheck($pageid) >= AUTH_READ)) {
761            return $pageid;
762        }
763    } while ($ns !== false);
764
765    return false;
766}
767