1<?php
2/**
3 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
4 * @author     Esther Brunner <wikidesign@gmail.com>
5 */
6
7use dokuwiki\Extension\Event;
8use dokuwiki\Utf8\PhpString;
9
10/**
11 * Helper part of the tag plugin, allows to query and print tags
12 */
13class helper_plugin_tag extends DokuWiki_Plugin {
14
15    /**
16     * @deprecated 2022-10-02 Use the helper_plugin_tag::getNamespace() function instead!
17     * @var string namespace tag links point to
18     */
19    public $namespace;
20    /**
21     * @var string sort key: 'cdate', 'mdate', 'pagename', 'id', 'ns', 'title'
22     */
23    protected $sort;
24    /**
25     * @var string sort order 'ascending' or 'descending'
26     */
27    protected $sortorder;
28    /**
29     * @var array
30     * @deprecated 2022-08-31 Not used/filled any more by tag plugin
31     */
32    var $topic_idx  = [];
33
34    /**
35     * Constructor gets default preferences and language strings
36     */
37    public function __construct() {
38        global $ID;
39
40        $this->namespace = $this->getConf('namespace');
41        if (!$this->namespace) {
42            $this->namespace = getNS($ID);
43        }
44        $this->sort = $this->getConf('sortkey');
45        $this->sortorder = $this->getConf('sortorder');
46    }
47
48    /**
49     * Returns some documentation of the methods provided by this helper part
50     *
51     * @return array Method description
52     */
53    public function getMethods() {
54        $result = [];
55
56        $result[] = [
57            'name'   => 'overrideSortFlags',
58            'desc'   => 'takes an array of sortflags and overrides predefined value',
59            'params' => [
60                'name' => 'string'
61            ]
62        ];
63        $result[] = [
64                'name'   => 'th',
65                'desc'   => 'returns the header for the tags column for pagelist',
66                'return' => ['header' => 'string'],
67        ];
68        $result[] = [
69                'name'   => 'td',
70                'desc'   => 'returns the tag links of a given page',
71                'params' => ['id' => 'string'],
72                'return' => ['links' => 'string'],
73        ];
74        $result[] = [
75                'name'   => 'tagLinks',
76                'desc'   => 'generates tag links for given words',
77                'params' => ['tags' => 'array'],
78                'return' => ['links' => 'string'],
79        ];
80        $result[] = [
81                'name'   => 'getTopic',
82                'desc'   => 'returns a list of pages tagged with the given keyword',
83                'params' => [
84                    'namespace (optional)' => 'string',
85                    'number (not used)' => 'integer',
86                    'tag (required)' => 'string'
87                ],
88                'return' => ['pages' => 'array'],
89        ];
90        $result[] = [
91                'name'   => 'tagRefine',
92                'desc'   => 'refines an array of pages with tags',
93                'params' => [
94                    'pages to refine' => 'array',
95                    'refinement tags' => 'string'
96                ],
97                'return' => ['pages' => 'array'],
98        ];
99        $result[] = [
100                'name'   => 'tagOccurrences',
101                'desc'   => 'returns a list of tags with their number of occurrences',
102                'params' => [
103                    'list of tags to get the occurrences for' => 'array',
104                    'namespaces to which the search shall be restricted' => 'array',
105                    'if all tags shall be returned (then the first parameter is ignored)' => 'boolean',
106                    'if the namespaces shall be searched recursively' => 'boolean'
107                ],
108                'return' => ['tags' => 'array'],
109        ];
110        return $result;
111    }
112
113    /**
114     * Takes an array of sortflags and overrides predefined value
115     *
116     * @param array $newflags recognizes:
117     *      'sortkey' => string,
118     *      'sortorder' => string
119     * @return void
120     */
121    public function overrideSortFlags($newflags = []) {
122        if(isset($newflags['sortkey'])) {
123            $this->sort = trim($newflags['sortkey']);
124        }
125        if(isset($newflags['sortorder'])) {
126            $this->sortorder = trim($newflags['sortorder']);
127        }
128    }
129
130    /**
131     * Returns the column header for the Pagelist Plugin
132     */
133    public function th() {
134        return $this->getLang('tags');
135    }
136
137    /**
138     * Returns the cell data for the Pagelist Plugin
139     *
140     * @param string $id page id
141     * @return string html content for cell of table
142     */
143    public function td($id) {
144        $subject = $this->getTagsFromPageMetadata($id);
145        return $this->tagLinks($subject);
146    }
147
148    /**
149     *
150     * @return string|false
151     */
152    public function getNamespace() {
153        return $this->namespace;
154    }
155
156    /**
157     * Returns the links for given tags
158     *
159     * @param array $tags an array of tags
160     * @return string HTML link tags
161     */
162    public function tagLinks($tags) {
163        if (empty($tags) || ($tags[0] == '')) {
164            return '';
165        }
166
167        $links = array();
168        foreach ($tags as $tag) {
169            $links[] = $this->tagLink($tag);
170        }
171        return implode(','.DOKU_LF.DOKU_TAB, $links);
172    }
173
174    /**
175     * Returns the link for one given tag
176     *
177     * @param string $tag the tag the link shall point to
178     * @param string $title the title of the link (optional)
179     * @param bool   $dynamic if the link class shall be changed if no pages with the specified tag exist
180     * @return string The HTML code of the link
181     */
182    public function tagLink($tag, $title = '', $dynamic = false) {
183        global $conf;
184        $svtag = $tag;
185        $tagTitle = str_replace('_', ' ', noNS($tag));
186        // Igor and later
187        if (class_exists('dokuwiki\File\PageResolver')) {
188            $resolver = new dokuwiki\File\PageResolver($this->namespace . ':something');
189            $tag = $resolver->resolveId($tag);
190            $exists = page_exists($tag);
191        } else {
192            // Compatibility with older releases
193            resolve_pageid($this->namespace, $tag, $exists);
194        }
195        if ($exists) {
196            $class = 'wikilink1';
197            $url   = wl($tag);
198            if ($conf['useheading']) {
199                // important: set render param to false to prevent recursion!
200                $heading = p_get_first_heading($tag, false);
201                if ($heading) {
202                    $tagTitle = $heading;
203                }
204            }
205        } else {
206            if ($dynamic) {
207                $pages = $this->getTopic('', 1, $svtag);
208                if (empty($pages)) {
209                    $class = 'wikilink2';
210                } else {
211                    $class = 'wikilink1';
212                }
213            } else {
214                $class = 'wikilink1';
215            }
216            $url   = wl($tag, ['do'=>'showtag', 'tag'=>$svtag]);
217        }
218        if (!$title) {
219            $title = $tagTitle;
220        }
221        $link = [
222            'href' => $url,
223            'class' => $class,
224            'tooltip' => hsc($tag),
225            'title' => hsc($title)
226        ];
227        Event::createAndTrigger('PLUGIN_TAG_LINK', $link);
228        return '<a href="'.$link['href'].'" class="'.$link['class'].'" title="'.$link['tooltip'].'" rel="tag">'
229                .$link['title']
230                .'</a>';
231    }
232
233    /**
234     * Returns a list of pages with a certain tag; very similar to ft_backlinks()
235     *
236     * @param string $ns A namespace to which all pages need to belong, "." for only the root namespace
237     * @param int    $num The maximum number of pages that shall be returned
238     * @param string $tagquery The tag string that shall be searched e.g. 'tag +tag -tag'
239     * @return array The list of pages
240     *
241     * @author  Esther Brunner <wikidesign@gmail.com>
242     */
243    public function getTopic($ns = '', $num = null, $tagquery = '') {
244        global $INPUT;
245        if (!$tagquery) {
246            $tagquery = $INPUT->str('tag');
247        }
248        $queryTags = $this->parseTagList($tagquery, true);
249        $result = [];
250
251        // find the pages using subject_w.idx
252        $pages = $this->getIndexedPagesMatchingTagQuery($queryTags);
253        if (!count($pages)) {
254            return $result;
255        }
256
257        foreach ($pages as $page) {
258            // exclude pages depending on ACL and namespace
259            if($this->isNotVisible($page, $ns)) continue;
260
261            $pageTags  = $this->getTagsFromPageMetadata($page);
262            // don't trust index
263            if (!$this->matchWithPageTags($pageTags, $queryTags)) continue;
264
265            // get metadata
266            $meta = p_get_metadata($page);
267
268            $perm = auth_quickaclcheck($page);
269
270            // skip drafts unless for users with create privilege
271            $isDraft = isset($meta['type']) && $meta['type'] == 'draft';
272            if ($isDraft && $perm < AUTH_CREATE) continue;
273
274            $title = $meta['title'] ?? '';
275            $date  = ($this->sort == 'mdate' ? $meta['date']['modified'] : $meta['date']['created'] );
276            $taglinks = $this->tagLinks($pageTags);
277
278            // determine the sort key
279            switch($this->sort) {
280                case 'id':
281                    $sortkey = $page;
282                    break;
283                case 'ns':
284                    $pos = strrpos($page, ':');
285                    if ($pos === false) {
286                        $sortkey = "\0".$page;
287                    } else {
288                        $sortkey = substr_replace($page, "\0\0", $pos, 1);
289                    }
290                    $sortkey = str_replace(':', "\0", $sortkey);
291                    break;
292                case 'pagename':
293                    $sortkey = noNS($page);
294                    break;
295                case 'title':
296                    $sortkey = PhpString::strtolower($title);
297                    if (empty($sortkey)) {
298                        $sortkey = str_replace('_', ' ', noNS($page));
299                    }
300                    break;
301                default:
302                    $sortkey = $date;
303            }
304            // make sure that the key is unique
305            $sortkey = $this->uniqueKey($sortkey, $result);
306
307            $result[$sortkey] = [
308                    'id'     => $page,
309                    'title'  => $title,
310                    'date'   => $date,
311                    'user'   => $meta['creator'],
312                    'desc'   => $meta['description']['abstract'],
313                    'cat'    => $pageTags[0],
314                    'tags'   => $taglinks,
315                    'perm'   => $perm,
316                    'exists' => true,
317                    'draft'  => $isDraft
318            ];
319
320            if ($num && count($result) >= $num) {
321                break;
322            }
323        }
324
325        // finally sort by sort key
326        if ($this->sortorder == 'ascending') {
327            ksort($result);
328        } else {
329            krsort($result);
330        }
331
332        return $result;
333    }
334
335    /**
336     * Refine found pages with tags (+tag: AND, -tag: (AND) NOT)
337     *
338     * @param array $pages The pages that shall be filtered, each page needs to be an array with a key "id"
339     * @param string $tagquery The list of tags in the form "tag +tag2 -tag3". The tags will be cleaned.
340     * @return array The filtered list of pages
341     */
342    public function tagRefine($pages, $tagquery) {
343        if (!is_array($pages)) {
344            // wrong data type
345            return $pages;
346        }
347        $queryTags = $this->parseTagList($tagquery, true);
348        $allMatchedPages = $this->getIndexedPagesMatchingTagQuery($queryTags);
349
350        foreach ($pages as $key => $page) {
351            if (!in_array($page['id'], $allMatchedPages)) {
352                unset($pages[$key]);
353            }
354        }
355
356        return $pages;
357   }
358
359   /**
360    * Get count of occurrences for a list of tags
361    *
362    * @param array $tags array of tags
363    * @param array $namespaces array of namespaces where to count the tags
364    * @param boolean $allTags boolean if all available tags should be counted
365    * @param boolean $isRecursive boolean if counting of pages in subnamespaces is allowed
366    * @return array with:
367    *   $tag => int count
368    */
369    public function tagOccurrences($tags, $namespaces = null, $allTags = false, $isRecursive = null) {
370        // map with trim here in order to remove newlines from tags
371        if($allTags) {
372            $tags = array_map('trim', idx_getIndex('subject', '_w'));
373        }
374        $tags = $this->cleanTagList($tags);
375        $tagOccurrences = []; //occurrences
376        // $namespaces not specified
377        if(!$namespaces || $namespaces[0] == '' || !is_array($namespaces)) {
378            $namespaces = null;
379        }
380
381        $indexer = idx_get_indexer();
382        $indexedPagesWithTags = $indexer->lookupKey('subject', $tags, array($this, 'tagCompare'));
383
384        $isRootAllowed = !($namespaces === null) && in_array('.', $namespaces);
385        if ($isRecursive === null) {
386            $isRecursive = $this->getConf('list_tags_of_subns');
387        }
388
389        foreach ($tags as $tag) {
390            if (!isset($indexedPagesWithTags[$tag])) continue;
391
392            // just to be sure remove duplicate pages from the list of pages
393            $pages = array_unique($indexedPagesWithTags[$tag]);
394
395            // don't count hidden pages or pages the user can't access
396            // for performance reasons this doesn't take drafts into account
397            $pages = array_filter($pages, [$this, 'isVisible']);
398
399            if (empty($pages)) continue;
400
401            if ($namespaces == null || ($isRootAllowed && $isRecursive)) {
402                // count all pages
403                $tagOccurrences[$tag] = count($pages);
404            } else if (!$isRecursive) {
405                // filter by exact namespace
406                $tagOccurrences[$tag] = 0;
407                foreach ($pages as $page) {
408                    $ns = getNS($page);
409                    if (($ns === false && $isRootAllowed) || in_array($ns, $namespaces)) {
410                        $tagOccurrences[$tag]++;
411                    }
412                }
413            } else { // recursive, no root
414                $tagOccurrences[$tag] = 0;
415                foreach ($pages as $page) {
416                    foreach ($namespaces as $ns) {
417                        if(strpos($page, $ns.':') === 0 ) {
418                            $tagOccurrences[$tag]++ ;
419                            break;
420                        }
421                    }
422                }
423            }
424            // don't return tags without pages
425            if ($tagOccurrences[$tag] == 0) {
426                unset($tagOccurrences[$tag]);
427            }
428        }
429        return $tagOccurrences;
430    }
431
432    /**
433     * Get tags from the 'subject' metadata field
434     *
435     * @param string $id the page id
436     * @return array
437     */
438    protected function getTagsFromPageMetadata($id){
439        $tags = p_get_metadata($id, 'subject');
440        if (!is_array($tags)) {
441            $tags = explode(' ', $tags);
442        }
443        return array_unique($tags);
444    }
445
446    /**
447     * Returns pages from index matching the tag query
448     *
449     * @param array $queryTags the tags to filter e.g. ['tag'(OR), '+tag'(AND), '-tag'(NOT)]
450     * @return array the matching page ids
451     */
452    public function getIndexedPagesMatchingTagQuery($queryTags) {
453        $result = []; // array of page ids
454
455        $cleanTags = [];
456        foreach ($queryTags as $i => $tag) {
457            if ($tag[0] == '+' || $tag[0] == '-') {
458                $cleanTags[$i] = substr($tag, 1);
459            } else {
460                $cleanTags[$i] = $tag;
461            }
462        }
463
464        $indexer = idx_get_indexer();
465        $pages = $indexer->lookupKey('subject', $cleanTags, [$this, 'tagCompare']);
466        // use all pages as basis if the first tag isn't an "or"-tag or if there are no tags given
467        if (empty($queryTags) || $cleanTags[0] != $queryTags[0]) {
468            $result = $indexer->getPages();
469        }
470
471        foreach ($queryTags as $i => $queryTag) {
472            $tag = $cleanTags[$i];
473            if (!is_array($pages[$tag])) {
474                $pages[$tag] = [];
475            }
476
477            if ($queryTag[0] == '+') {       // AND: add only if in both arrays
478                $result = array_intersect($result, $pages[$tag]);
479            } elseif ($queryTag[0] == '-') { // NOT: remove array from docs
480                $result = array_diff($result, $pages[$tag]);
481            } else {                         // OR: add array to docs
482                $result = array_unique(array_merge($result, $pages[$tag]));
483            }
484        }
485
486        return $result;
487    }
488
489
490
491    /**
492     * Splits a string into an array of tags
493     *
494     * @param string $tags tag string, if containing spaces use quotes e.g. "tag with spaces", will be replaced by underscores
495     * @param bool $clean replace placeholders and clean id
496     * @return string[]
497     */
498    public function parseTagList($tags, $clean = false) {
499
500        // support for "quoted phrase tags", replaces spaces by underscores
501        if (preg_match_all('#".*?"#', $tags, $matches)) {
502            foreach ($matches[0] as $match) {
503                $replace = str_replace(' ', '_', substr($match, 1, -1));
504                $tags = str_replace($match, $replace, $tags);
505            }
506        }
507
508        $tags = preg_split('/ /', $tags, -1, PREG_SPLIT_NO_EMPTY);
509
510        if ($clean) {
511            return $this->cleanTagList($tags);
512        } else {
513            return $tags;
514        }
515    }
516
517    /**
518     * Clean a list (array) of tags using _cleanTag
519     *
520     * @param string[] $tags
521     * @return string[]
522     */
523    public function cleanTagList($tags) {
524        return array_unique(array_map([$this, 'cleanTag'], $tags));
525    }
526
527    /**
528     * callback: Cleans a tag using cleanID while preserving a possible prefix of + or -, and replace placeholders
529     *
530     * @param string $tag
531     * @return string
532     */
533    protected function cleanTag($tag) {
534        $prefix = substr($tag, 0, 1);
535        $tag = $this->replacePlaceholders($tag);
536        if ($prefix === '-' || $prefix === '+') {
537            return $prefix.cleanID($tag);
538        } else {
539            return cleanID($tag);
540        }
541    }
542
543    /**
544     * Makes user or date dependent topic lists possible by replacing placeholders in tags
545     *
546     * @param string $tag
547     * @return string
548     */
549    protected function replacePlaceholders($tag) {
550        global $INFO, $INPUT;
551
552        $user = $INPUT->server->str('REMOTE_USER');
553
554        //only available for logged-in users
555        if(isset($INFO['userinfo']['grps'])) {
556            $group = $INFO['userinfo']['grps'][0];
557        }   else {
558            $group = '';
559        }
560
561        $replace = [
562                '@USER@'  => cleanID($user),
563                '@NAME@'  => cleanID($INFO['userinfo']['name'] ?? ''),
564                '@GROUP@' => cleanID($group), //FIXME or delete, is unreliable because just first entry of group array is used, regardless the order of groups..
565                '@YEAR@'  => date('Y'),
566                '@MONTH@' => date('m'),
567                '@DAY@'   => date('d'),
568        ];
569        return str_replace(array_keys($replace), array_values($replace), $tag);
570    }
571
572    /**
573     * Non-recursive function to check whether an array key is unique
574     *
575     * @param int|string $key
576     * @param array $result
577     * @return float|int|string
578     *
579     * @author    Ilya S. Lebedev <ilya@lebedev.net>
580     * @author    Esther Brunner <wikidesign@gmail.com>
581     */
582    protected function uniqueKey($key, $result) {
583
584        // increase numeric keys by one
585        if (is_numeric($key)) {
586            while (array_key_exists($key, $result)) {
587                $key++;
588            }
589            return $key;
590
591            // append a number to literal keys
592        } else {
593            $num     = 0;
594            $testkey = $key;
595            while (array_key_exists($testkey, $result)) {
596                $testkey = $key.$num;
597                $num++;
598            }
599            return $testkey;
600        }
601    }
602
603    /**
604     * Opposite of isNotVisible()
605     *
606     * @param string $id the page id
607     * @param string $ns
608     * @return bool if the page is shown
609     */
610    public function isVisible($id, $ns='') {
611        return !$this->isNotVisible($id, $ns);
612    }
613
614    /**
615     * Check visibility of the page
616     *
617     * @param string $id the page id
618     * @param string $ns the namespace authorized
619     * @return bool if the page is hidden
620     */
621    public function isNotVisible($id, $ns="") {
622        // discard hidden pages
623        if (isHiddenPage($id)) {
624            return true;
625        }
626        // discard if user can't read
627        if (auth_quickaclcheck($id) < AUTH_READ) {
628            return true;
629        }
630
631        // filter by namespace, root namespace is identified with a dot
632        if($ns == '.') {
633            // root namespace is specified, discard all pages who lay outside the root namespace
634            if(getNS($id) !== false) {
635                return true;
636            }
637        } else {
638            // hide if ns is not matching the page id (match gives strpos===0)
639            if ($ns && strpos(':'.getNS($id).':', ':'.$ns.':') !== 0) {
640                return true;
641            }
642        }
643        return !page_exists($id, '', false);
644    }
645
646    /**
647     * callback Helper function for the indexer in order to avoid interpreting wildcards
648     *
649     * @param string $tag1 tag being searched
650     * @param string $tag2 tag from index
651     * @return bool is equal?
652     */
653    public function tagCompare($tag1, $tag2) {
654        return $tag1 === $tag2;
655    }
656
657    /**
658     * Check if the page is a real candidate for the result of the getTopic by comparing its tags with the wanted tags
659     *
660     * @param string[] $pageTags cleaned tags from the metadata of the page
661     * @param string[] $queryTags tags we are looking ['tag', '+tag', '-tag']
662     * @return bool
663     */
664    protected function matchWithPageTags($pageTags, $queryTags) {
665        $result = false;
666        foreach($queryTags as $tag) {
667            if ($tag[0] == "+" and !in_array(substr($tag, 1), $pageTags)) {
668                $result = false;
669            }
670            if ($tag[0] == "-" and in_array(substr($tag, 1), $pageTags)) {
671                $result = false;
672            }
673            if (in_array($tag, $pageTags)) {
674                $result = true;
675            }
676        }
677        return $result;
678    }
679
680
681    /**
682     * @deprecated 2022-08-31 use parseTagList() instead !
683     *
684     * @param string $tags
685     * @param bool $clean
686     * @return string[]
687     */
688    public function _parseTagList($tags, $clean = false) {
689        return $this->parseTagList($tags, $clean);
690    }
691
692    /**
693     * Opposite of isNotVisible()
694     *
695     * @deprecated 2022-08-31 use isVisible() instead !
696     *
697     * @param string $id
698     * @param string $ns
699     * @return bool
700     */
701    public function _isVisible($id, $ns='') {
702        return $this->isVisible($id, $ns);
703    }
704
705    /**
706     * Clean a list (array) of tags using _cleanTag
707     *
708     * @deprecated 2022-08-31 use cleanTagList() instead !
709     *
710     * @param string[] $tags
711     * @return string[]
712     */
713    public function _cleanTagList($tags) {
714        return $this->cleanTagList($tags);
715    }
716
717    /**
718     * Returns pages from index matching the tag query
719     *
720     * @param array $queryTags the tags to filter e.g. ['tag'(OR), '+tag'(AND), '-tag'(NOT)]
721     * @return array the matching page ids
722     *
723     * @deprecated 2022-08-31 use getIndexedPagesMatchingTagQuery() instead !
724     */
725    function _tagIndexLookup($queryTags) {
726        return $this->getIndexedPagesMatchingTagQuery($queryTags);
727    }
728
729    /**
730     * Get the subject metadata cleaning the result
731     *
732     * @deprecated 2022-08-31 use getTagsFromPageMetadata() instead !
733     *
734     * @param string $id the page id
735     * @return array
736     */
737    public function _getSubjectMetadata($id){
738        return $this->getTagsFromPageMetadata($id);
739    }
740}
741