1<?php
2/**
3 * Cloud Plugin: shows a cloud of the most frequently used words
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Esther Brunner <wikidesign@gmail.com>
7 */
8
9use dokuwiki\File\PageResolver;
10
11class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin {
12    protected $knownFlags = array('showCount');
13    protected $stopwords = null;
14
15    /**
16     * Constructor. Loads stopwords.
17     */
18    public function __construct() {
19        $this->stopwords = $this->_getStopwords();
20    }
21
22    function getType() { return 'substition'; }
23    function getPType() { return 'block'; }
24    function getSort() { return 98; }
25
26    function connectTo($mode) {
27        $this->Lexer->addSpecialPattern('~~\w*?CLOUD.*?~~', $mode, 'plugin_cloud');
28    }
29
30    function handle($match, $state, $pos, Doku_Handler $handler) {
31        $match = substr($match, 2, -2); // strip markup
32
33        if (substr($match, 0, 3) == 'TAG') {
34            $type = 'tag';
35        } elseif (substr($match, 0, 6) == 'SEARCH') {
36            $type = 'search';
37        } else {
38            $type = 'word';
39        }
40
41        // Ensure we always have 2 entries in the exploded array
42        list($num, $ns) = array_pad(explode('>', $match, 2), 2, '');
43        list($junk, $num) = array_pad(explode(':', $num, 2), 2, '');
44
45        $flags = [
46            'showCount' => false,
47        ];
48        if (preg_match('/\[.*]/', $junk, $matches) === 1) {
49            $matches = trim($matches[0], '[]');
50            $found = explode(',', $matches);
51            $flags = array();
52            foreach ($found as $flag) {
53                if (in_array($flag, $this->knownFlags)) {
54                    // Actually we just set flags as present
55                    // Later we might add values to flags like key=value pairs
56                    $flags [$flag] = true;
57                }
58            }
59        }
60
61        if (!is_numeric($num)) $num = 50;
62        if(!is_null($ns)) $namespaces = explode('|', $ns);
63        else $namespaces = null;
64
65        return array($type, $num, $namespaces, $flags);
66    }
67
68    function render($format, Doku_Renderer $renderer, $data) {
69        global $conf;
70
71        list($type, $num, $namespaces, $flags) = $data;
72        if ($format == 'xhtml') {
73
74            if ($type == 'tag') { // we need the tag helper plugin
75                /** @var helper_plugin_tag $tag */
76                if (plugin_isdisabled('tag') || (!$tag = plugin_load('helper', 'tag'))) {
77                    msg('The Tag Plugin must be installed to display tag clouds.', -1);
78                    return false;
79                }
80                $cloud = $this->_getTagCloud($num, $min, $max, $namespaces, $tag);
81            } elseif($type == 'search') {
82                /** @var helper_plugin_searchstats $helper */
83                $helper = plugin_load('helper', 'searchstats');
84                if($helper) {
85                    $cloud = $helper->getSearchWordArray($num);
86                    $this->_filterCloud($cloud, 'search_blacklist');
87                    // calculate min/max values
88                    $min = PHP_INT_MAX;
89                    $max = 0;
90                    foreach ($cloud as $size) {
91                        $min = min($size, $min);
92                        $max = max($size, $max);
93                    }
94                } else {
95                    msg('You have to install the searchstats plugin to use this feature.', -1);
96                    return false;
97                }
98            } else {
99                $cloud = $this->_getWordCloud($num, $min, $max);
100            }
101            if (!is_array($cloud) || empty($cloud)) return false;
102            $delta = ($max-$min)/16;
103
104            // prevent caching to ensure the included pages are always fresh
105            $renderer->nocache();
106
107            // and render the cloud
108            $renderer->doc .= '<div class="cloud">'.DOKU_LF;
109            foreach ($cloud as $word => $size) {
110                if ($size < $min+round($delta)) $class = 'cloud1';
111                elseif ($size < $min+round(2*$delta)) $class = 'cloud2';
112                elseif ($size < $min+round(4*$delta)) $class = 'cloud3';
113                elseif ($size < $min+round(8*$delta)) $class = 'cloud4';
114                else $class = 'cloud5';
115
116                $name = $word;
117                if ($type == 'tag' && isset($tag)) {
118                    if (class_exists('dokuwiki\File\PageResolver')) {
119                        // Compatibility with tag plugin < 2022-09-30
120                        $ns = method_exists($tag, 'getNamespace') ? $tag->getNamespace() : $tag->namespace;
121                        $resolver = new PageResolver($ns . ':');
122                        $page = $resolver->resolveId($word);
123                        $exists = page_exists($page);
124                    } else {
125                        // Compatibility with Hogfather and older
126                        $page = $word;
127                        resolve_pageid($tag->namespace, $page, $exists);
128                    }
129                    if($exists) {
130                        $link = wl($page);
131                        if($conf['useheading']) {
132                            $name = p_get_first_heading($page, false);
133                            if (empty($name)) {
134                                $name = $word;
135                            }
136                        }
137                        $class .= '_tag1';
138                    } else {
139                        $link = wl($word, array('do'=>'showtag', 'tag'=>$word));
140                        $class .= '_tag2';
141                    }
142                    $title = $word;
143                } else {
144                    if($conf['userewrite'] == 2) {
145                        $link = wl($word, array('do'=>'search', 'id'=>$word));
146                    } else {
147                        $link = wl($word, 'do=search');
148                    }
149                    $title = $size;
150                }
151
152                if ($flags['showCount']) {
153                    $name .= '('.$size.')';
154                }
155                $renderer->doc .= DOKU_TAB . '<a href="' . $link . '" class="' . $class .'"'
156                               .' title="' . $title . '">' . hsc($name) . '</a>' . DOKU_LF;
157            }
158            $renderer->doc .= '</div>' . DOKU_LF;
159            return true;
160        }
161        return false;
162    }
163
164    /**
165     * Helper function for loading and returning the array with stopwords.
166     *
167     * Stopwords files are loaded from two locations:
168     * - inc/lang/"actual language"/stopwords.txt
169     * - conf/stopwords.txt
170     *
171     * If both files exists, then both files are used - the content is merged.
172     */
173    protected function _getStopwords() {
174        global $conf;
175        // load stopwords
176        $swfile   = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
177        if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES);
178        else $stopwords = array();
179
180        // load extra local stopwords
181        $swfile = DOKU_CONF.'stopwords.txt';
182        if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES));
183
184        if (count($stopwords) == 0) {
185            return null;
186        }
187
188        return $stopwords;
189    }
190
191    /**
192     * Applies filters on the cloud:
193     * - removes all short words, see config option 'minimum_word_length'
194     * - removes all words in configured blacklist $balcklistName from $cloud array
195     */
196    function _filterCloud(&$cloud, $balcklistName) {
197        // Remove to short words
198        $min = $this->getConf('minimum_word_length');
199        foreach ($cloud as $key => $count) {
200            if (iconv_strlen($key) < $min)
201                unset($cloud[$key]);
202        }
203
204        // Remove stopwords
205        if ($this->stopwords != null)
206        {
207            foreach ($this->stopwords as $word) {
208                if (isset($cloud[$word]))
209                    unset($cloud[$word]);
210            }
211        }
212
213        // Remove word which are on the blacklist
214        $blacklist = $this->getConf($balcklistName);
215        if(!empty($blacklist)) {
216            $blacklist = explode(',', $blacklist);
217            $blacklist = str_replace(' ', '', $blacklist);	// remove spaces
218
219            foreach ($blacklist as $word) {
220                if (isset($cloud[$word]))
221                    unset($cloud[$word]);
222            }
223        }
224    }
225
226    /**
227     * Returns the sorted word cloud array
228     */
229    function _getWordCloud($num, &$min, &$max) {
230        global $conf;
231
232        $cloud = array();
233
234        if (@file_exists($conf['indexdir'].'/page.idx')) { // new word-length based index
235            require_once(DOKU_INC.'inc/indexer.php');
236
237            $lengths = idx_indexLengths(0);
238            foreach ($lengths as $len) {
239                $idx      = idx_getIndex('i', $len);
240                $word_idx = idx_getIndex('w', $len);
241
242                $this->_addWordsToCloud($cloud, $idx, $word_idx);
243            }
244
245        } else {                                          // old index
246            $idx      = file($conf['cachedir'].'/index.idx');
247            $word_idx = file($conf['cachedir'].'/word.idx');
248
249            $this->_addWordsToCloud($cloud, $idx, $word_idx);
250        }
251
252        $this->_filterCloud($cloud, 'word_blacklist');
253
254        return $this->_sortCloud($cloud, $num, $min, $max);
255    }
256
257    /**
258     * Adds all words in given index as $word => $freq to $cloud array
259     */
260    function _addWordsToCloud(&$cloud, $idx, $word_idx) {
261        $wcount = count($word_idx);
262
263        // collect the frequency of the words
264        for ($i = 0; $i < $wcount; $i++) {
265            $key = trim($word_idx[$i]);
266            $value = explode(':', $idx[$i]);
267            if (!trim($value[0])) continue;
268            $cloud[$key] = count($value);
269        }
270    }
271
272    /**
273     * Returns the sorted tag cloud array
274     */
275    function _getTagCloud($num, &$min, &$max, $namespaces, helper_plugin_tag $tag) {
276        $cloud = $tag->tagOccurrences([], $namespaces, true, $this->getConf('list_tags_of_subns'));
277
278        $this->_filterCloud($cloud, 'tag_blacklist');
279
280        return $this->_sortCloud($cloud, $num, $min, $max);
281    }
282
283    /**
284     * Sorts and slices the cloud
285     */
286    function _sortCloud($cloud, $num, &$min, &$max) {
287        if(empty($cloud)) return $cloud;
288
289        // sort by frequency, then alphabetically
290        arsort($cloud);
291        $cloud = array_chunk($cloud, $num, true);
292        $max = current($cloud[0]);
293        $min = end($cloud[0]);
294        ksort($cloud[0]);
295
296        return $cloud[0];
297    }
298}
299// vim:ts=4:sw=4:et:
300