1 <?php
2 /**
3  * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
4  * @author     Esther Brunner <wikidesign@gmail.com>
5  */
6 
7 use dokuwiki\Extension\Event;
8 use dokuwiki\Utf8\PhpString;
9 
10 /**
11  * Helper part of the tag plugin, allows to query and print tags
12  */
13 class helper_plugin_tag extends DokuWiki_Plugin {
14 
15     /**
16      * @deprecated 2022-10-02 Use the helper_plugin_tag::getNamespace() function instead!
17      * @var string namespace tag links point to
18      */
19     public $namespace;
20     /**
21      * @var string sort key: 'cdate', 'mdate', 'pagename', 'id', 'ns', 'title'
22      */
23     protected $sort;
24     /**
25      * @var string sort order 'ascending' or 'descending'
26      */
27     protected $sortorder;
28     /**
29      * @var array
30      * @deprecated 2022-08-31 Not used/filled any more by tag plugin
31      */
32     var $topic_idx  = [];
33 
34     /**
35      * Constructor gets default preferences and language strings
36      */
37     public function __construct() {
38         global $ID;
39 
40         $this->namespace = $this->getConf('namespace');
41         if (!$this->namespace) {
42             $this->namespace = getNS($ID);
43         }
44         $this->sort = $this->getConf('sortkey');
45         $this->sortorder = $this->getConf('sortorder');
46     }
47 
48     /**
49      * Returns some documentation of the methods provided by this helper part
50      *
51      * @return array Method description
52      */
53     public function getMethods() {
54         $result = [];
55 
56         $result[] = [
57             'name'   => 'overrideSortFlags',
58             'desc'   => 'takes an array of sortflags and overrides predefined value',
59             'params' => [
60                 'name' => 'string'
61             ]
62         ];
63         $result[] = [
64                 'name'   => 'th',
65                 'desc'   => 'returns the header for the tags column for pagelist',
66                 'return' => ['header' => 'string'],
67         ];
68         $result[] = [
69                 'name'   => 'td',
70                 'desc'   => 'returns the tag links of a given page',
71                 'params' => ['id' => 'string'],
72                 'return' => ['links' => 'string'],
73         ];
74         $result[] = [
75                 'name'   => 'tagLinks',
76                 'desc'   => 'generates tag links for given words',
77                 'params' => ['tags' => 'array'],
78                 'return' => ['links' => 'string'],
79         ];
80         $result[] = [
81                 'name'   => 'getTopic',
82                 'desc'   => 'returns a list of pages tagged with the given keyword',
83                 'params' => [
84                     'namespace (optional)' => 'string',
85                     'number (not used)' => 'integer',
86                     'tag (required)' => 'string'
87                 ],
88                 'return' => ['pages' => 'array'],
89         ];
90         $result[] = [
91                 'name'   => 'tagRefine',
92                 'desc'   => 'refines an array of pages with tags',
93                 'params' => [
94                     'pages to refine' => 'array',
95                     'refinement tags' => 'string'
96                 ],
97                 'return' => ['pages' => 'array'],
98         ];
99         $result[] = [
100                 'name'   => 'tagOccurrences',
101                 'desc'   => 'returns a list of tags with their number of occurrences',
102                 'params' => [
103                     'list of tags to get the occurrences for' => 'array',
104                     'namespaces to which the search shall be restricted' => 'array',
105                     'if all tags shall be returned (then the first parameter is ignored)' => 'boolean',
106                     'if the namespaces shall be searched recursively' => 'boolean'
107                 ],
108                 'return' => ['tags' => 'array'],
109         ];
110         return $result;
111     }
112 
113     /**
114      * Takes an array of sortflags and overrides predefined value
115      *
116      * @param array $newflags recognizes:
117      *      'sortkey' => string,
118      *      'sortorder' => string
119      * @return void
120      */
121     public function overrideSortFlags($newflags = []) {
122         if(isset($newflags['sortkey'])) {
123             $this->sort = trim($newflags['sortkey']);
124         }
125         if(isset($newflags['sortorder'])) {
126             $this->sortorder = trim($newflags['sortorder']);
127         }
128     }
129 
130     /**
131      * Returns the column header for the Pagelist Plugin
132      */
133     public function th() {
134         return $this->getLang('tags');
135     }
136 
137     /**
138      * Returns the cell data for the Pagelist Plugin
139      *
140      * @param string $id page id
141      * @return string html content for cell of table
142      */
143     public function td($id) {
144         $subject = $this->getTagsFromPageMetadata($id);
145         return $this->tagLinks($subject);
146     }
147 
148     /**
149      *
150      * @return string|false
151      */
152     public function getNamespace() {
153         return $this->namespace;
154     }
155 
156     /**
157      * Returns the links for given tags
158      *
159      * @param array $tags an array of tags
160      * @return string HTML link tags
161      */
162     public function tagLinks($tags) {
163         if (empty($tags) || ($tags[0] == '')) {
164             return '';
165         }
166 
167         $links = array();
168         foreach ($tags as $tag) {
169             $links[] = $this->tagLink($tag);
170         }
171         return implode(','.DOKU_LF.DOKU_TAB, $links);
172     }
173 
174     /**
175      * Returns the link for one given tag
176      *
177      * @param string $tag the tag the link shall point to
178      * @param string $title the title of the link (optional)
179      * @param bool   $dynamic if the link class shall be changed if no pages with the specified tag exist
180      * @return string The HTML code of the link
181      */
182     public function tagLink($tag, $title = '', $dynamic = false) {
183         global $conf;
184         $svtag = $tag;
185         $tagTitle = str_replace('_', ' ', noNS($tag));
186         // Igor and later
187         if (class_exists('dokuwiki\File\PageResolver')) {
188             $resolver = new dokuwiki\File\PageResolver($this->namespace . ':something');
189             $tag = $resolver->resolveId($tag);
190             $exists = page_exists($tag);
191         } else {
192             // Compatibility with older releases
193             resolve_pageid($this->namespace, $tag, $exists);
194         }
195         if ($exists) {
196             $class = 'wikilink1';
197             $url   = wl($tag);
198             if ($conf['useheading']) {
199                 // important: set render param to false to prevent recursion!
200                 $heading = p_get_first_heading($tag, false);
201                 if ($heading) {
202                     $tagTitle = $heading;
203                 }
204             }
205         } else {
206             if ($dynamic) {
207                 $pages = $this->getTopic('', 1, $svtag);
208                 if (empty($pages)) {
209                     $class = 'wikilink2';
210                 } else {
211                     $class = 'wikilink1';
212                 }
213             } else {
214                 $class = 'wikilink1';
215             }
216             $url   = wl($tag, ['do'=>'showtag', 'tag'=>$svtag]);
217         }
218         if (!$title) {
219             $title = $tagTitle;
220         }
221         $link = [
222             'href' => $url,
223             'class' => $class,
224             'tooltip' => hsc($tag),
225             'title' => hsc($title)
226         ];
227         Event::createAndTrigger('PLUGIN_TAG_LINK', $link);
228         return '<a href="'.$link['href'].'" class="'.$link['class'].'" title="'.$link['tooltip'].'" rel="tag">'
229                 .$link['title']
230                 .'</a>';
231     }
232 
233     /**
234      * Returns a list of pages with a certain tag; very similar to ft_backlinks()
235      *
236      * @param string $ns A namespace to which all pages need to belong, "." for only the root namespace
237      * @param int    $num The maximum number of pages that shall be returned
238      * @param string $tagquery The tag string that shall be searched e.g. 'tag +tag -tag'
239      * @return array The list of pages
240      *
241      * @author  Esther Brunner <wikidesign@gmail.com>
242      */
243     public function getTopic($ns = '', $num = null, $tagquery = '') {
244         global $INPUT;
245         if (!$tagquery) {
246             $tagquery = $INPUT->str('tag');
247         }
248         $queryTags = $this->parseTagList($tagquery, true);
249         $result = [];
250 
251         // find the pages using subject_w.idx
252         $pages = $this->getIndexedPagesMatchingTagQuery($queryTags);
253         if (!count($pages)) {
254             return $result;
255         }
256 
257         foreach ($pages as $page) {
258             // exclude pages depending on ACL and namespace
259             if($this->isNotVisible($page, $ns)) continue;
260 
261             $pageTags  = $this->getTagsFromPageMetadata($page);
262             // don't trust index
263             if (!$this->matchWithPageTags($pageTags, $queryTags)) continue;
264 
265             // get metadata
266             $meta = p_get_metadata($page);
267 
268             $perm = auth_quickaclcheck($page);
269 
270             // skip drafts unless for users with create privilege
271             $isDraft = isset($meta['type']) && $meta['type'] == 'draft';
272             if ($isDraft && $perm < AUTH_CREATE) continue;
273 
274             $title = $meta['title'] ?? '';
275             $date  = ($this->sort == 'mdate' ? $meta['date']['modified'] : $meta['date']['created'] );
276             $taglinks = $this->tagLinks($pageTags);
277 
278             // determine the sort key
279             switch($this->sort) {
280                 case 'id':
281                     $sortkey = $page;
282                     break;
283                 case 'ns':
284                     $pos = strrpos($page, ':');
285                     if ($pos === false) {
286                         $sortkey = "\0".$page;
287                     } else {
288                         $sortkey = substr_replace($page, "\0\0", $pos, 1);
289                     }
290                     $sortkey = str_replace(':', "\0", $sortkey);
291                     break;
292                 case 'pagename':
293                     $sortkey = noNS($page);
294                     break;
295                 case 'title':
296                     $sortkey = PhpString::strtolower($title);
297                     if (empty($sortkey)) {
298                         $sortkey = str_replace('_', ' ', noNS($page));
299                     }
300                     break;
301                 default:
302                     $sortkey = $date;
303             }
304             // make sure that the key is unique
305             $sortkey = $this->uniqueKey($sortkey, $result);
306 
307             $result[$sortkey] = [
308                     'id'     => $page,
309                     'title'  => $title,
310                     'date'   => $date,
311                     'user'   => $meta['creator'],
312                     'desc'   => $meta['description']['abstract'],
313                     'cat'    => $pageTags[0],
314                     'tags'   => $taglinks,
315                     'perm'   => $perm,
316                     'exists' => true,
317                     'draft'  => $isDraft
318             ];
319 
320             if ($num && count($result) >= $num) {
321                 break;
322             }
323         }
324 
325         // finally sort by sort key
326         if ($this->sortorder == 'ascending') {
327             ksort($result);
328         } else {
329             krsort($result);
330         }
331 
332         return $result;
333     }
334 
335     /**
336      * Refine found pages with tags (+tag: AND, -tag: (AND) NOT)
337      *
338      * @param array $pages The pages that shall be filtered, each page needs to be an array with a key "id"
339      * @param string $tagquery The list of tags in the form "tag +tag2 -tag3". The tags will be cleaned.
340      * @return array The filtered list of pages
341      */
342     public function tagRefine($pages, $tagquery) {
343         if (!is_array($pages)) {
344             // wrong data type
345             return $pages;
346         }
347         $queryTags = $this->parseTagList($tagquery, true);
348         $allMatchedPages = $this->getIndexedPagesMatchingTagQuery($queryTags);
349 
350         foreach ($pages as $key => $page) {
351             if (!in_array($page['id'], $allMatchedPages)) {
352                 unset($pages[$key]);
353             }
354         }
355 
356         return $pages;
357    }
358 
359    /**
360     * Get count of occurrences for a list of tags
361     *
362     * @param array $tags array of tags
363     * @param array $namespaces array of namespaces where to count the tags
364     * @param boolean $allTags boolean if all available tags should be counted
365     * @param boolean $isRecursive boolean if counting of pages in subnamespaces is allowed
366     * @return array with:
367     *   $tag => int count
368     */
369     public function tagOccurrences($tags, $namespaces = null, $allTags = false, $isRecursive = null) {
370         // map with trim here in order to remove newlines from tags
371         if($allTags) {
372             $tags = array_map('trim', idx_getIndex('subject', '_w'));
373         }
374         $tags = $this->cleanTagList($tags);
375         $tagOccurrences = []; //occurrences
376         // $namespaces not specified
377         if(!$namespaces || $namespaces[0] == '' || !is_array($namespaces)) {
378             $namespaces = null;
379         }
380 
381         $indexer = idx_get_indexer();
382         $indexedPagesWithTags = $indexer->lookupKey('subject', $tags, array($this, 'tagCompare'));
383 
384         $isRootAllowed = !($namespaces === null) && in_array('.', $namespaces);
385         if ($isRecursive === null) {
386             $isRecursive = $this->getConf('list_tags_of_subns');
387         }
388 
389         foreach ($tags as $tag) {
390             if (!isset($indexedPagesWithTags[$tag])) continue;
391 
392             // just to be sure remove duplicate pages from the list of pages
393             $pages = array_unique($indexedPagesWithTags[$tag]);
394 
395             // don't count hidden pages or pages the user can't access
396             // for performance reasons this doesn't take drafts into account
397             $pages = array_filter($pages, [$this, 'isVisible']);
398 
399             if (empty($pages)) continue;
400 
401             if ($namespaces == null || ($isRootAllowed && $isRecursive)) {
402                 // count all pages
403                 $tagOccurrences[$tag] = count($pages);
404             } else if (!$isRecursive) {
405                 // filter by exact namespace
406                 $tagOccurrences[$tag] = 0;
407                 foreach ($pages as $page) {
408                     $ns = getNS($page);
409                     if (($ns === false && $isRootAllowed) || in_array($ns, $namespaces)) {
410                         $tagOccurrences[$tag]++;
411                     }
412                 }
413             } else { // recursive, no root
414                 $tagOccurrences[$tag] = 0;
415                 foreach ($pages as $page) {
416                     foreach ($namespaces as $ns) {
417                         if(strpos($page, $ns.':') === 0 ) {
418                             $tagOccurrences[$tag]++ ;
419                             break;
420                         }
421                     }
422                 }
423             }
424             // don't return tags without pages
425             if ($tagOccurrences[$tag] == 0) {
426                 unset($tagOccurrences[$tag]);
427             }
428         }
429         return $tagOccurrences;
430     }
431 
432     /**
433      * Get tags from the 'subject' metadata field
434      *
435      * @param string $id the page id
436      * @return array
437      */
438     protected function getTagsFromPageMetadata($id){
439         $tags = p_get_metadata($id, 'subject');
440         if (!is_array($tags)) {
441             $tags = explode(' ', $tags);
442         }
443         return array_unique($tags);
444     }
445 
446     /**
447      * Returns pages from index matching the tag query
448      *
449      * @param array $queryTags the tags to filter e.g. ['tag'(OR), '+tag'(AND), '-tag'(NOT)]
450      * @return array the matching page ids
451      */
452     public function getIndexedPagesMatchingTagQuery($queryTags) {
453         $result = []; // array of page ids
454 
455         $cleanTags = [];
456         foreach ($queryTags as $i => $tag) {
457             if ($tag[0] == '+' || $tag[0] == '-') {
458                 $cleanTags[$i] = substr($tag, 1);
459             } else {
460                 $cleanTags[$i] = $tag;
461             }
462         }
463 
464         $indexer = idx_get_indexer();
465         $pages = $indexer->lookupKey('subject', $cleanTags, [$this, 'tagCompare']);
466         // use all pages as basis if the first tag isn't an "or"-tag or if there are no tags given
467         if (empty($queryTags) || $cleanTags[0] != $queryTags[0]) {
468             $result = $indexer->getPages();
469         }
470 
471         foreach ($queryTags as $i => $queryTag) {
472             $tag = $cleanTags[$i];
473             if (!is_array($pages[$tag])) {
474                 $pages[$tag] = [];
475             }
476 
477             if ($queryTag[0] == '+') {       // AND: add only if in both arrays
478                 $result = array_intersect($result, $pages[$tag]);
479             } elseif ($queryTag[0] == '-') { // NOT: remove array from docs
480                 $result = array_diff($result, $pages[$tag]);
481             } else {                         // OR: add array to docs
482                 $result = array_unique(array_merge($result, $pages[$tag]));
483             }
484         }
485 
486         return $result;
487     }
488 
489 
490 
491     /**
492      * Splits a string into an array of tags
493      *
494      * @param string $tags tag string, if containing spaces use quotes e.g. "tag with spaces", will be replaced by underscores
495      * @param bool $clean replace placeholders and clean id
496      * @return string[]
497      */
498     public function parseTagList($tags, $clean = false) {
499 
500         // support for "quoted phrase tags", replaces spaces by underscores
501         if (preg_match_all('#".*?"#', $tags, $matches)) {
502             foreach ($matches[0] as $match) {
503                 $replace = str_replace(' ', '_', substr($match, 1, -1));
504                 $tags = str_replace($match, $replace, $tags);
505             }
506         }
507 
508         $tags = preg_split('/ /', $tags, -1, PREG_SPLIT_NO_EMPTY);
509 
510         if ($clean) {
511             return $this->cleanTagList($tags);
512         } else {
513             return $tags;
514         }
515     }
516 
517     /**
518      * Clean a list (array) of tags using _cleanTag
519      *
520      * @param string[] $tags
521      * @return string[]
522      */
523     public function cleanTagList($tags) {
524         return array_unique(array_map([$this, 'cleanTag'], $tags));
525     }
526 
527     /**
528      * callback: Cleans a tag using cleanID while preserving a possible prefix of + or -, and replace placeholders
529      *
530      * @param string $tag
531      * @return string
532      */
533     protected function cleanTag($tag) {
534         $prefix = substr($tag, 0, 1);
535         $tag = $this->replacePlaceholders($tag);
536         if ($prefix === '-' || $prefix === '+') {
537             return $prefix.cleanID($tag);
538         } else {
539             return cleanID($tag);
540         }
541     }
542 
543     /**
544      * Makes user or date dependent topic lists possible by replacing placeholders in tags
545      *
546      * @param string $tag
547      * @return string
548      */
549     protected function replacePlaceholders($tag) {
550         global $USERINFO, $INPUT;
551 
552         $user = $INPUT->server->str('REMOTE_USER');
553 
554         //only available for logged-in users
555         if(isset($USERINFO)) {
556             if(is_array($USERINFO) && isset($USERINFO['name'])) {
557                 $name  = cleanID($USERINFO['name']);
558             }
559             else {
560                 $name = '';
561             }
562             // FIXME or delete, is unreliable because just first entry of group array is used, regardless the order of groups..
563             if(is_array($USERINFO) && is_array($USERINFO['grps']) && isset($USERINFO['grps'][0])) {
564                 $group = cleanID($USERINFO['grps'][0]);
565             }
566             else {
567                 $group = '';
568             }
569         } else {
570             $name  = '';
571             $group = '';
572         }
573 
574         $replace = [
575                 '@USER@'  => cleanID($user),
576                 '@NAME@'  => $name,
577                 '@GROUP@' => $group,
578                 '@YEAR@'  => date('Y'),
579                 '@MONTH@' => date('m'),
580                 '@DAY@'   => date('d'),
581         ];
582         return str_replace(array_keys($replace), array_values($replace), $tag);
583     }
584 
585     /**
586      * Non-recursive function to check whether an array key is unique
587      *
588      * @param int|string $key
589      * @param array $result
590      * @return float|int|string
591      *
592      * @author    Ilya S. Lebedev <ilya@lebedev.net>
593      * @author    Esther Brunner <wikidesign@gmail.com>
594      */
595     protected function uniqueKey($key, $result) {
596 
597         // increase numeric keys by one
598         if (is_numeric($key)) {
599             while (array_key_exists($key, $result)) {
600                 $key++;
601             }
602             return $key;
603 
604             // append a number to literal keys
605         } else {
606             $num     = 0;
607             $testkey = $key;
608             while (array_key_exists($testkey, $result)) {
609                 $testkey = $key.$num;
610                 $num++;
611             }
612             return $testkey;
613         }
614     }
615 
616     /**
617      * Opposite of isNotVisible()
618      *
619      * @param string $id the page id
620      * @param string $ns
621      * @return bool if the page is shown
622      */
623     public function isVisible($id, $ns='') {
624         return !$this->isNotVisible($id, $ns);
625     }
626 
627     /**
628      * Check visibility of the page
629      *
630      * @param string $id the page id
631      * @param string $ns the namespace authorized
632      * @return bool if the page is hidden
633      */
634     public function isNotVisible($id, $ns="") {
635         // discard hidden pages
636         if (isHiddenPage($id)) {
637             return true;
638         }
639         // discard if user can't read
640         if (auth_quickaclcheck($id) < AUTH_READ) {
641             return true;
642         }
643 
644         // filter by namespace, root namespace is identified with a dot
645         if($ns == '.') {
646             // root namespace is specified, discard all pages who lay outside the root namespace
647             if(getNS($id) !== false) {
648                 return true;
649             }
650         } else {
651             // hide if ns is not matching the page id (match gives strpos===0)
652             if ($ns && strpos(':'.getNS($id).':', ':'.$ns.':') !== 0) {
653                 return true;
654             }
655         }
656         return !page_exists($id, '', false);
657     }
658 
659     /**
660      * callback Helper function for the indexer in order to avoid interpreting wildcards
661      *
662      * @param string $tag1 tag being searched
663      * @param string $tag2 tag from index
664      * @return bool is equal?
665      */
666     public function tagCompare($tag1, $tag2) {
667         return $tag1 === $tag2;
668     }
669 
670     /**
671      * Check if the page is a real candidate for the result of the getTopic by comparing its tags with the wanted tags
672      *
673      * @param string[] $pageTags cleaned tags from the metadata of the page
674      * @param string[] $queryTags tags we are looking ['tag', '+tag', '-tag']
675      * @return bool
676      */
677     protected function matchWithPageTags($pageTags, $queryTags) {
678         $result = false;
679         foreach($queryTags as $tag) {
680             if ($tag[0] == "+" and !in_array(substr($tag, 1), $pageTags)) {
681                 $result = false;
682             }
683             if ($tag[0] == "-" and in_array(substr($tag, 1), $pageTags)) {
684                 $result = false;
685             }
686             if (in_array($tag, $pageTags)) {
687                 $result = true;
688             }
689         }
690         return $result;
691     }
692 
693 
694     /**
695      * @deprecated 2022-08-31 use parseTagList() instead !
696      *
697      * @param string $tags
698      * @param bool $clean
699      * @return string[]
700      */
701     public function _parseTagList($tags, $clean = false) {
702         return $this->parseTagList($tags, $clean);
703     }
704 
705     /**
706      * Opposite of isNotVisible()
707      *
708      * @deprecated 2022-08-31 use isVisible() instead !
709      *
710      * @param string $id
711      * @param string $ns
712      * @return bool
713      */
714     public function _isVisible($id, $ns='') {
715         return $this->isVisible($id, $ns);
716     }
717 
718     /**
719      * Clean a list (array) of tags using _cleanTag
720      *
721      * @deprecated 2022-08-31 use cleanTagList() instead !
722      *
723      * @param string[] $tags
724      * @return string[]
725      */
726     public function _cleanTagList($tags) {
727         return $this->cleanTagList($tags);
728     }
729 
730     /**
731      * Returns pages from index matching the tag query
732      *
733      * @param array $queryTags the tags to filter e.g. ['tag'(OR), '+tag'(AND), '-tag'(NOT)]
734      * @return array the matching page ids
735      *
736      * @deprecated 2022-08-31 use getIndexedPagesMatchingTagQuery() instead !
737      */
738     function _tagIndexLookup($queryTags) {
739         return $this->getIndexedPagesMatchingTagQuery($queryTags);
740     }
741 
742     /**
743      * Get the subject metadata cleaning the result
744      *
745      * @deprecated 2022-08-31 use getTagsFromPageMetadata() instead !
746      *
747      * @param string $id the page id
748      * @return array
749      */
750     public function _getSubjectMetadata($id){
751         return $this->getTagsFromPageMetadata($id);
752     }
753 }
754