1 <?php
2 /**
3  * Cloud Plugin: shows a cloud of the most frequently used words
4  *
5  * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6  * @author     Esther Brunner <wikidesign@gmail.com>
7  */
8 
9 use dokuwiki\File\PageResolver;
10 
11 class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin {
12     protected $knownFlags = array('showCount');
13     protected $stopwords = null;
14 
15     /**
16      * Constructor. Loads stopwords.
17      */
18     public function __construct() {
19         $this->stopwords = $this->_getStopwords();
20     }
21 
22     function getType() { return 'substition'; }
23     function getPType() { return 'block'; }
24     function getSort() { return 98; }
25 
26     function connectTo($mode) {
27         $this->Lexer->addSpecialPattern('~~\w*?CLOUD.*?~~', $mode, 'plugin_cloud');
28     }
29 
30     function handle($match, $state, $pos, Doku_Handler $handler) {
31         $match = substr($match, 2, -2); // strip markup
32 
33         if (substr($match, 0, 3) == 'TAG') {
34             $type = 'tag';
35         } elseif (substr($match, 0, 6) == 'SEARCH') {
36             $type = 'search';
37         } else {
38             $type = 'word';
39         }
40 
41         // Ensure we always have 2 entries in the exploded array
42         list($num, $ns) = array_pad(explode('>', $match, 2), 2, '');
43         list($junk, $num) = array_pad(explode(':', $num, 2), 2, '');
44 
45         $flags = [
46             'showCount' => false,
47         ];
48         if (preg_match('/\[.*]/', $junk, $matches) === 1) {
49             $matches = trim($matches[0], '[]');
50             $found = explode(',', $matches);
51             $flags = array();
52             foreach ($found as $flag) {
53                 if (in_array($flag, $this->knownFlags)) {
54                     // Actually we just set flags as present
55                     // Later we might add values to flags like key=value pairs
56                     $flags [$flag] = true;
57                 }
58             }
59         }
60 
61         if (!is_numeric($num)) $num = 50;
62         if(!is_null($ns)) $namespaces = explode('|', $ns);
63         else $namespaces = null;
64 
65         return array($type, $num, $namespaces, $flags);
66     }
67 
68     function render($format, Doku_Renderer $renderer, $data) {
69         global $conf;
70 
71         list($type, $num, $namespaces, $flags) = $data;
72         if ($format == 'xhtml') {
73 
74             if ($type == 'tag') { // we need the tag helper plugin
75                 /** @var helper_plugin_tag $tag */
76                 if (plugin_isdisabled('tag') || (!$tag = plugin_load('helper', 'tag'))) {
77                     msg('The Tag Plugin must be installed to display tag clouds.', -1);
78                     return false;
79                 }
80                 $cloud = $this->_getTagCloud($num, $min, $max, $namespaces, $tag);
81             } elseif($type == 'search') {
82                 /** @var helper_plugin_searchstats $helper */
83                 $helper = plugin_load('helper', 'searchstats');
84                 if($helper) {
85                     $cloud = $helper->getSearchWordArray($num);
86                     $this->_filterCloud($cloud, 'search_blacklist');
87                     // calculate min/max values
88                     $min = PHP_INT_MAX;
89                     $max = 0;
90                     foreach ($cloud as $size) {
91                         $min = min($size, $min);
92                         $max = max($size, $max);
93                     }
94                 } else {
95                     msg('You have to install the searchstats plugin to use this feature.', -1);
96                     return false;
97                 }
98             } else {
99                 $cloud = $this->_getWordCloud($num, $min, $max);
100             }
101             if (!is_array($cloud) || empty($cloud)) return false;
102             $delta = ($max-$min)/16;
103 
104             // prevent caching to ensure the included pages are always fresh
105             $renderer->nocache();
106 
107             // and render the cloud
108             $renderer->doc .= '<div class="cloud">'.DOKU_LF;
109             foreach ($cloud as $word => $size) {
110                 if ($size < $min+round($delta)) $class = 'cloud1';
111                 elseif ($size < $min+round(2*$delta)) $class = 'cloud2';
112                 elseif ($size < $min+round(4*$delta)) $class = 'cloud3';
113                 elseif ($size < $min+round(8*$delta)) $class = 'cloud4';
114                 else $class = 'cloud5';
115 
116                 $name = $word;
117                 if ($type == 'tag' && isset($tag)) {
118                     if (class_exists('dokuwiki\File\PageResolver')) {
119                         // Compatibility with tag plugin < 2022-09-30
120                         $ns = method_exists($tag, 'getNamespace') ? $tag->getNamespace() : $tag->namespace;
121                         $resolver = new PageResolver($ns . ':');
122                         $page = $resolver->resolveId($word);
123                         $exists = page_exists($page);
124                     } else {
125                         // Compatibility with Hogfather and older
126                         $page = $word;
127                         resolve_pageid($tag->namespace, $page, $exists);
128                     }
129                     if($exists) {
130                         $link = wl($page);
131                         if($conf['useheading']) {
132                             $name = p_get_first_heading($page, false);
133                             if (empty($name)) {
134                                 $name = $word;
135                             }
136                         }
137                         $class .= '_tag1';
138                     } else {
139                         $link = wl($word, array('do'=>'showtag', 'tag'=>$word));
140                         $class .= '_tag2';
141                     }
142                     $title = $word;
143                 } else {
144                     if($conf['userewrite'] == 2) {
145                         $link = wl($word, array('do'=>'search', 'id'=>$word));
146                     } else {
147                         $link = wl($word, 'do=search');
148                     }
149                     $title = $size;
150                 }
151 
152                 if ($flags['showCount']) {
153                     $name .= '('.$size.')';
154                 }
155                 $renderer->doc .= DOKU_TAB . '<a href="' . $link . '" class="' . $class .'"'
156                                .' title="' . $title . '">' . hsc($name) . '</a>' . DOKU_LF;
157             }
158             $renderer->doc .= '</div>' . DOKU_LF;
159             return true;
160         }
161         return false;
162     }
163 
164     /**
165      * Helper function for loading and returning the array with stopwords.
166      *
167      * Stopwords files are loaded from two locations:
168      * - inc/lang/"actual language"/stopwords.txt
169      * - conf/stopwords.txt
170      *
171      * If both files exists, then both files are used - the content is merged.
172      */
173     protected function _getStopwords() {
174         global $conf;
175         // load stopwords
176         $swfile   = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
177         if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES);
178         else $stopwords = array();
179 
180         // load extra local stopwords
181         $swfile = DOKU_CONF.'stopwords.txt';
182         if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES));
183 
184         if (count($stopwords) == 0) {
185             return null;
186         }
187 
188         return $stopwords;
189     }
190 
191     /**
192      * Applies filters on the cloud:
193      * - removes all short words, see config option 'minimum_word_length'
194      * - removes all words in configured blacklist $balcklistName from $cloud array
195      */
196     function _filterCloud(&$cloud, $balcklistName) {
197         // Remove to short words
198         $min = $this->getConf('minimum_word_length');
199         foreach ($cloud as $key => $count) {
200             if (iconv_strlen($key) < $min)
201                 unset($cloud[$key]);
202         }
203 
204         // Remove stopwords
205         if ($this->stopwords != null)
206         {
207             foreach ($this->stopwords as $word) {
208                 if (isset($cloud[$word]))
209                     unset($cloud[$word]);
210             }
211         }
212 
213         // Remove word which are on the blacklist
214         $blacklist = $this->getConf($balcklistName);
215         if(!empty($blacklist)) {
216             $blacklist = explode(',', $blacklist);
217             $blacklist = str_replace(' ', '', $blacklist);	// remove spaces
218 
219             foreach ($blacklist as $word) {
220                 if (isset($cloud[$word]))
221                     unset($cloud[$word]);
222             }
223         }
224     }
225 
226     /**
227      * Returns the sorted word cloud array
228      */
229     function _getWordCloud($num, &$min, &$max) {
230         global $conf;
231 
232         $cloud = array();
233 
234         if (@file_exists($conf['indexdir'].'/page.idx')) { // new word-length based index
235             require_once(DOKU_INC.'inc/indexer.php');
236 
237             $lengths = idx_indexLengths(0);
238             foreach ($lengths as $len) {
239                 $idx      = idx_getIndex('i', $len);
240                 $word_idx = idx_getIndex('w', $len);
241 
242                 $this->_addWordsToCloud($cloud, $idx, $word_idx);
243             }
244 
245         } else {                                          // old index
246             $idx      = file($conf['cachedir'].'/index.idx');
247             $word_idx = file($conf['cachedir'].'/word.idx');
248 
249             $this->_addWordsToCloud($cloud, $idx, $word_idx);
250         }
251 
252         $this->_filterCloud($cloud, 'word_blacklist');
253 
254         return $this->_sortCloud($cloud, $num, $min, $max);
255     }
256 
257     /**
258      * Adds all words in given index as $word => $freq to $cloud array
259      */
260     function _addWordsToCloud(&$cloud, $idx, $word_idx) {
261         $wcount = count($word_idx);
262 
263         // collect the frequency of the words
264         for ($i = 0; $i < $wcount; $i++) {
265             $key = trim($word_idx[$i]);
266             $value = explode(':', $idx[$i]);
267             if (!trim($value[0])) continue;
268             $cloud[$key] = count($value);
269         }
270     }
271 
272     /**
273      * Returns the sorted tag cloud array
274      */
275     function _getTagCloud($num, &$min, &$max, $namespaces, helper_plugin_tag $tag) {
276         $cloud = $tag->tagOccurrences([], $namespaces, true, $this->getConf('list_tags_of_subns'));
277 
278         $this->_filterCloud($cloud, 'tag_blacklist');
279 
280         return $this->_sortCloud($cloud, $num, $min, $max);
281     }
282 
283     /**
284      * Sorts and slices the cloud
285      */
286     function _sortCloud($cloud, $num, &$min, &$max) {
287         if(empty($cloud)) return $cloud;
288 
289         // sort by frequency, then alphabetically
290         arsort($cloud);
291         $cloud = array_chunk($cloud, $num, true);
292         $max = current($cloud[0]);
293         $min = end($cloud[0]);
294         ksort($cloud[0]);
295 
296         return $cloud[0];
297     }
298 }
299 // vim:ts=4:sw=4:et:
300