1<?php
2
3use dokuwiki\Utf8\PhpString;
4
5/**
6 * DokuWiki Plugin tagfilter (Helper Component)
7 *
8 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
9 * @author  lisps
10 */
11class helper_plugin_tagfilter extends DokuWiki_Plugin
12{
13    /**
14     *
15     * @var helper_plugin_tag
16     */
17    protected $taghelper;
18
19    /**
20     * Constructor gets default preferences and language strings
21     */
22    public function __construct()
23    {
24        $this->taghelper = $this->loadHelper('tag');
25    }
26
27    public function getMethods()
28    {
29        $result = [];
30        $result[] = [
31            'name' => 'getTagsByRegExp',
32            'desc' => 'returns tags for given Regular Expression',
33            'params' => [
34                'tags (required)' => 'string',
35                'namespace (optional)' => 'string',],
36            'return' => ['tags' => 'array'],
37        ];
38        $result[] = [
39            'name' => 'getTagsByNamespace',
40            'desc' => 'returns tags for given namespace',
41            'params' => [
42                'namespace' => 'string',],
43            'return' => ['tags' => 'array'],
44        ];
45        $result[] = [
46            'name' => 'getTagsByPageID',
47            'desc' => 'returns tags for given pageID',
48            'params' => [
49                'pageID' => 'string',],
50            'return' => ['tags' => 'array'],
51        ];
52
53        return $result;
54    }
55
56    /**
57     * Search in Tagindex for tags that matches the tag pattern and are in requested namespace
58     *
59     * @param string $tagExpression regexp pattern of wanted tags e.g. "status:.*"
60     * @param string $ns list only pages from this namespace
61     * @param bool $aclSafe if true, add only tags that are on readable pages
62     * @return string[]|false with tag=>label pairs
63     *
64     */
65    public function getTagsByRegExp($tagExpression, $ns = '', $aclSafe = false)
66    {
67        if (!$this->taghelper) {
68            return false;
69        }
70
71        $tags = $this->getIndex('subject', '_w');
72
73        $matchedTag_label = [];
74        foreach ($tags as $tag) {
75            if ($this->matchesTagExpression($tagExpression, $tag) && $this->isTagInNamespace($tag, $ns, $aclSafe)) {
76                $matchedTag_label[$tag] = $this->getTagLabel($tag);
77            }
78        }
79        asort($matchedTag_label);  //TODO update next release to dokuwiki builtin sort
80        return $matchedTag_label;
81    }
82
83
84    /**
85     * Test if tag matches with requested pattern
86     *
87     * @param string $tagExpression regexp pattern of wanted tags e.g. "status:.*"
88     * @param string $tag
89     * @return bool
90     */
91    public function matchesTagExpression($tagExpression, $tag)
92    {
93        return (bool)@preg_match('/^' . $tagExpression . '$/i', $tag);
94    }
95
96    /**
97     * Returns latest part of tag as label
98     *
99     * @param string $tag
100     * @return string
101     */
102    public function getTagLabel($tag)
103    {
104        $label = strrchr($tag, ':');
105        $label = $label != '' ? $label : $tag;
106        return PhpString::ucwords(str_replace('_', ' ', trim($label, ':')));
107    }
108
109
110    /**
111     * Returns all tags used in given namespace
112     *
113     * @param string $ns list only tags used on pages from this namespace
114     * @param bool $aclSafe if true, checks if user has read permission for the pages containing the tags
115     * @return array|false|int[]|string[]
116     */
117    public function getTagsByNamespace($ns = '', $aclSafe = true)
118    {
119        if (!$this->taghelper) {
120            return false;
121        }
122
123        return array_keys($this->getTagsByRegExp('.*', $ns, $aclSafe));
124    }
125
126    /**
127     * Checks if current user can read the given pageid
128     *
129     * @param string $pageid
130     * @return bool
131     */
132    public function canRead($pageid)
133    {
134        return auth_quickaclcheck($pageid) >= AUTH_READ;
135    }
136
137    /**
138     * Returns all tags for the given pageid
139     *
140     * @param string $pageID
141     * @return array|mixed
142     */
143    public function getTagsByPageID($pageID)
144    {
145        $meta = p_get_metadata($pageID, 'subject');
146        if ($meta === null) {
147            $meta = [];
148        }
149        return $meta;
150    }
151
152    /**
153     * Returns true if tags are equal
154     *
155     * @param string $tag1 tag being searched
156     * @param string $tag2 tag from index
157     * @return bool whether equal tags
158     */
159    public function tagCompare($tag1, $tag2)
160    {
161        return $tag1 == $tag2;
162    }
163
164
165    /**
166     * Checks if tag is used in the namespace, eventually can consider read permission as well
167     *
168     * @param string $tag
169     * @param string $ns list pages from this namespace
170     * @param bool $aclSafe if true, uses tag from a page only if user has read permissions
171     * @return bool
172     */
173    protected function isTagInNamespace($tag, $ns, $aclSafe = true)
174    {
175        if ($ns == '') {
176            return true;
177        }
178        if (!$this->taghelper) {
179            return false;
180        }
181
182        $indexer = idx_get_indexer();
183        $pages = $indexer->lookupKey('subject', $tag, [$this, 'tagCompare']);
184
185        foreach ($pages as $page) {
186            if ($this->taghelper->isVisible($page, $ns)) {
187                if (!$aclSafe) {
188                    return true;
189                }
190                if (auth_quickaclcheck($page) >= AUTH_READ) {
191                    return true;
192                }
193            }
194        }
195        return false;
196    }
197
198    /**
199     * Returns entire index file as array
200     *
201     * from inc/indexer.php
202     *
203     * @param string $idx
204     * @param string $suffix
205     * @return array|false
206     */
207    protected function getIndex($idx, $suffix)
208    {
209        global $conf;
210        $fn = $conf['indexdir'] . '/' . $idx . $suffix . '.idx';
211        if (!@file_exists($fn)) {
212            return [];
213        }
214        return file($fn, FILE_IGNORE_NEW_LINES);
215    }
216
217    /** @var string */
218    protected $ps_ns = '';
219    /** @var array */
220    protected $ps_pages_id = [];
221    /** @var array */
222    protected $ps_pages = [];
223
224    /**
225     * @param string $tag space separated tags
226     * @param string $ns list only pages from this namespace
227     * @return array
228     */
229    public function getPagesByTag($tag, $ns = '')
230    {
231        $tags = explode(' ', $tag);
232        $this->startPageSearch($ns);
233        foreach ($tags as $t) {
234            if ($t[0] == '+') {
235                $this->addAndTag(substr($t, 1));
236            } elseif ($t[0] == '-') {
237                $this->addSubTag(substr($t, 1));
238            } else {
239                $this->addOrTag($t);
240            }
241        }
242        return $this->getPages();
243    }
244
245    /**
246     * @param string $ns
247     */
248    protected function startPageSearch($ns = '')
249    {
250        $this->ps_ns = $ns;
251        $this->ps_pages_id = [];
252        $this->ps_pages = [];
253    }
254
255    /**
256     * @param string $tagExpression regexp pattern of wanted tags e.g. "status:.*"
257     */
258    protected function addAndTag($tagExpression)
259    {
260        $tags = $this->getTagsByRegExp($tagExpression, $this->ps_ns);
261        $pages = [];
262        foreach ($tags as $t => $v) {
263            $Hpages = $this->taghelper->getTopic($this->ps_ns, null, $t);
264            foreach ($Hpages as $p) {
265                $pages[] = $p['id'];
266                if (!isset($this->ps_pages[$p['id']])) {
267                    $this->ps_pages[$p['id']] = $p;
268                }
269            }
270
271        }
272        $pages = array_unique($pages);
273        $this->ps_pages_id = array_intersect($this->ps_pages_id, $pages);
274    }
275
276    /**
277     * @param string $tagExpression regexp pattern of wanted tags e.g. "status:.*"
278     */
279    protected function addSubTag($tagExpression)
280    {
281        $tags = $this->getTagsByRegExp($tagExpression, $this->ps_ns);
282        $pages = array();
283        foreach ($tags as $t => $v) {
284            $Hpages = $this->taghelper->getTopic($this->ps_ns, '', $t);
285            foreach ($Hpages as $p) {
286                $pages[] = $p['id'];
287            }
288        }
289        $pages = array_unique($pages);
290        $this->ps_pages_id = array_diff($this->ps_pages_id, $pages);
291    }
292
293    /**
294     * @param string $tagExpression regexp pattern of wanted tags e.g. "status:.*"
295     * @return void
296     */
297    protected function addOrTag($tagExpression)
298    {
299        $tags = $this->getTagsByRegExp($tagExpression, $this->ps_ns);
300        $pages = array();
301        foreach ($tags as $t => $v) {
302            $Hpages = $this->taghelper->getTopic($this->ps_ns, '', $t);
303            foreach ($Hpages as $p) {
304                $pages[] = $p['id'];
305                if (!isset($this->ps_pages[$p['id']])) {
306                    $this->ps_pages[$p['id']] = $p;
307                }
308            }
309        }
310        $pages = array_unique($pages);
311        $this->ps_pages_id = array_merge($this->ps_pages_id, $pages);
312        $this->ps_pages_id = array_unique($this->ps_pages_id);
313    }
314
315    /**
316     * @return array
317     */
318    protected function getPages()
319    {
320        $ret = [];
321        foreach ($this->ps_pages_id as $id) {
322            $ret[] = $this->ps_pages[$id];
323        }
324        return $ret;
325    }
326
327    /**
328     * @param string $tag
329     * @return false|string
330     */
331    public function getImageLinkByTag($tag)
332    {
333        $id = $this->getConf('nsTagImage') . ':' . str_replace([' ', ':'], '_', $tag);
334        $src = $id . '.jpg';
335        if (!@file_exists(mediaFN($src))) {
336            $src = $id . '.png';
337            if (!@file_exists(mediaFN($src))) {
338                $src = $id . '.jpeg';
339                if (!@file_exists(mediaFN($src))) {
340                    $src = false;
341                }
342            }
343        }
344        if ($src !== false) {
345            return ml($src);
346        }
347        return false;
348    }
349
350    /**
351     * Generate html for in a cell of the column of the tags as images
352     *
353     * @param string $id pageid
354     * @param string $col tagexpression: regexp pattern of wanted tags e.g. "status:.*"
355     * @param string $ns namespace with images
356     * @return string html of tagimage(s) in cell
357     */
358    public function getTagImageColumn($id, $col, $ns)
359    {
360        if (!isset($this->tagsPerPage[$id])) {
361            $this->tagsPerPage[$id] = $this->getTagsByPageID($id);
362        }
363        $foundTags = [];
364        foreach ($this->tagsPerPage[$id] as $tag) {
365            if ($this->matchesTagExpression($col, $tag)) {
366                $foundTags[] = hsc($this->getTagLabel($tag));
367            }
368        }
369        $images = [];
370        foreach ($foundTags as $foundTag) {
371            $imageid = $ns . ':' . substr($foundTag, strrpos($foundTag, ':'));
372
373            $src = $imageid . '.jpg';
374            if (!@file_exists(mediaFN($src))) {
375                $src = $imageid . '.png';
376                if (!@file_exists(mediaFN($src))) {
377                    $src = $imageid . '.jpeg';
378                    if (!@file_exists(mediaFN($src))) {
379                        $src = $imageid . '.gif';
380                        if (!@file_exists(mediaFN($src))) {
381                            $src = false;
382                        }
383                    }
384                }
385            }
386            if ($src !== false) {
387                $images[] = '<img src="' . ml($src) . ' " class="media" style="height:max-width:200px;"/>';
388            }
389        }
390
391        return implode("<br>", $images);
392
393    }
394
395    /**
396     * return all pages defined by tag_list_r in a specific namespace
397     *
398     * @param string $ns the namespace to look in
399     * @param array $tag_list_r an array containing strings with tags seperated by ' '
400     *
401     */
402    public function getAllPages($ns, $tag_list_r)
403    {
404        $pages = array();
405        $pages[''] = '';
406
407        $tag_list = implode(' ', $tag_list_r);
408
409        $page_r = $this->getPagesByTags($ns, $tag_list);
410
411        foreach ($page_r as $page) {
412            $title = p_get_metadata($page, 'title', METADATA_DONT_RENDER);
413            $title = $title ?: $page;
414            $pages[$page] = strip_tags($title);  //FIXME hsc() doesent work with chosen
415        }
416
417        asort($pages);
418        return $pages;
419    }
420
421    /**
422     * Returns page title, otherwise pageid
423     *
424     * @param string $pageid
425     * @return string
426     */
427    public function getPageTitle($pageid)
428    {
429        $title = p_get_metadata($pageid, 'title', METADATA_DONT_RENDER);
430        $title = $title ?: $pageid;
431        return strip_tags($title);
432    }
433
434    /**
435     * Gets the pages defined by tag_list
436     *
437     * partially copied from tag->helper with less checks (on cache) and no meta lookups
438     * @param string $ns the namespace to look in
439     * @param string $tag_list the tags separated by ' '
440     *
441     * @return array array of page ids
442     */
443    public function getPagesByTags($ns, $tag_list)
444    {
445        $tags = $this->taghelper->parseTagList($tag_list, true);
446        $matchedPages = $this->taghelper->getIndexedPagesMatchingTagQuery($tags);
447
448        $filteredPages = [];
449        foreach ($matchedPages as $matchedPage) {
450            // filter by namespace, root namespace is identified with a dot // root namespace is specified, discard all pages who lay outside the root namespace
451            if (($ns == '.' && getNS($matchedPage) === false) || strpos(':' . getNS($matchedPage) . ':', ':' . $ns . ':') === 0 || $ns === '') {
452                if (auth_quickaclcheck($matchedPage) >= AUTH_READ) {
453                    $filteredPages[] = $matchedPage;
454                }
455            }
456        }
457        return $filteredPages;
458    }
459
460    /**
461     * @param $tag
462     * @return string
463     */
464    public function getTagCategory($tag)
465    {
466        $label = strstr($tag, ':', true);
467        $label = $label != '' ? $label : $tag;
468        return PhpString::ucwords(str_replace('_', ' ', trim($label, ':')));
469    }
470
471    /**
472     * Used by pagelist plugin for filling the cell of the table header
473     *
474     * @param string $column column name is a tagexpression
475     * @return string
476     */
477    public function th($column = '')
478    {
479        if (strpos($column, '*')) {
480            return $this->getTagCategory($column);
481        } else {
482            return $this->getTagLabel($column);
483        }
484    }
485
486    /** @var array[] with pageid => array with tags */
487    protected $tagsPerPage = [];
488
489    /**
490     * Used by pagelist plugin for filling the cells of the table
491     * and in listing by the tagfilter
492     *
493     * @param string $id page id of row
494     * @param string $column column name is a tagexpression: regexp pattern of wanted tags e.g. "status:.*". Supported since 2022 in pagelist plugin
495     * @return string
496     */
497    public function td($id, $column = null)
498    {
499        if($column === null) {
500            return '';
501        }
502        if (!isset($this->tagsPerPage[$id])) {
503            $this->tagsPerPage[$id] = $this->getTagsByPageID($id);
504        }
505        $foundTags = [];
506        foreach ($this->tagsPerPage[$id] as $tag) {
507            if ($this->matchesTagExpression($column, $tag)) {
508                $foundTags[] = hsc($this->getTagLabel($tag));
509            }
510        }
511        return implode("<br>", $foundTags);
512    }
513
514
515    /**
516     * Returns per tag the pages where these are used as array with: tag=>array with pages
517     * The tags matches the tag regexp pattern and only shown if it is used at pages in requested namespace, these pages
518     * are listed in an array per tag
519     *
520     * Does not check ACL
521     *
522     * @param string $tags tag expression e.g. "status:.*"
523     * @param string $ns list only pages from this namespace
524     * @return array [tag]=>array pages where tag is used
525     */
526    public function getPagesByMatchedTags($tags, $ns = '')
527    {
528        if (!$this->taghelper) return [];
529
530        $tags = $this->taghelper->parseTagList($tags, false); //array
531
532        $indexer = idx_get_indexer();
533        $indexTags = array_keys($indexer->histogram(1, 0, 3, 'subject'));
534
535        $matchedTags = [];
536        foreach ($indexTags as $tag) {
537            foreach ($tags as $tagExpr) {
538                if ($this->matchesTagExpression($tagExpr, $tag))
539                    $matchedTags[] = $tag;
540            }
541        }
542        $matchedTags = array_unique($matchedTags);
543
544        $matchedPages = [];
545        foreach ($matchedTags as $tag) {
546            $pages = $this->taghelper->getIndexedPagesMatchingTagQuery([$tag]);
547
548            // keep only if in requested ns
549            $matchedPages[$tag] = array_filter($pages, function ($pageid) use ($ns) {
550                return $ns === '' || strpos(':' . getNS($pageid) . ':', ':' . $ns . ':') === 0;
551            });
552        }
553
554        //clean empty tags, because not in requested namespace
555        $matchedPages = array_filter($matchedPages);
556        ksort($matchedPages);
557
558        return $matchedPages;
559    }
560}
561
562