1<?php
2/**
3 * Cloud Plugin: shows a cloud of the most frequently used words
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Esther Brunner <wikidesign@gmail.com>
7 */
8
9class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin {
10    protected $knownFlags = array('showCount');
11    protected $stopwords = null;
12
13    /**
14     * Constructor. Loads stopwords.
15     */
16    public function __construct() {
17        $this->stopwords = $this->_getStopwords();
18    }
19
20    function getType() { return 'substition'; }
21    function getPType() { return 'block'; }
22    function getSort() { return 98; }
23
24    function connectTo($mode) {
25        $this->Lexer->addSpecialPattern('~~\w*?CLOUD.*?~~', $mode, 'plugin_cloud');
26    }
27
28    function handle($match, $state, $pos, Doku_Handler $handler) {
29        $match = substr($match, 2, -2); // strip markup
30
31        if (substr($match, 0, 3) == 'TAG') {
32            $type = 'tag';
33        } elseif (substr($match, 0, 6) == 'SEARCH') {
34            $type = 'search';
35        } else {
36            $type = 'word';
37        }
38
39        // Ensure we always have 2 entries in the exploded array
40        list($num, $ns) = array_pad(explode('>', $match, 2), 2, '');
41        list($junk, $num) = array_pad(explode(':', $num, 2), 2, '');
42
43        $flags = [
44            'showCount' => false,
45        ];
46        if (preg_match('/\[.*\]/', $junk, $matches) === 1) {
47            $matches = trim($matches[0], '[]');
48            $found = explode(',', $matches);
49            $flags = array();
50            foreach ($found as $flag) {
51                if (in_array($flag, $this->knownFlags)) {
52                    // Actually we just set flags as present
53                    // Later we might add values to flags like key=value pairs
54                    $flags [$flag] = true;
55                }
56            }
57        }
58
59        if (!is_numeric($num)) $num = 50;
60        if(!is_null($ns)) $namespaces = explode('|', $ns);
61        else $namespaces = null;
62
63        return array($type, $num, $namespaces, $flags);
64    }
65
66    function render($mode, Doku_Renderer $renderer, $data) {
67        global $conf;
68
69        list($type, $num, $namespaces, $flags) = $data;
70        if ($mode == 'xhtml') {
71
72            if ($type == 'tag') { // we need the tag helper plugin
73                /** @var helper_plugin_tag $tag */
74                if (plugin_isdisabled('tag') || (!$tag = plugin_load('helper', 'tag'))) {
75                    msg('The Tag Plugin must be installed to display tag clouds.', -1);
76                    return false;
77                }
78                $cloud = $this->_getTagCloud($num, $min, $max, $namespaces, $tag);
79            } elseif($type == 'search') {
80                /** @var helper_plugin_searchstats $helper */
81                $helper = plugin_load('helper', 'searchstats');
82                if($helper) {
83                    $cloud = $helper->getSearchWordArray($num);
84                    $this->_filterCloud($cloud, 'search_blacklist');
85                    // calculate min/max values
86                    $min = PHP_INT_MAX;
87                    $max = 0;
88                    foreach ($cloud as $size) {
89                        $min = min($size, $min);
90                        $max = max($size, $max);
91                    }
92                } else {
93                    msg('You have to install the searchstats plugin to use this feature.', -1);
94                    return false;
95                }
96            } else {
97                $cloud = $this->_getWordCloud($num, $min, $max);
98            }
99            if (!is_array($cloud) || empty($cloud)) return false;
100            $delta = ($max-$min)/16;
101
102            // prevent caching to ensure the included pages are always fresh
103            $renderer->nocache();
104
105            // and render the cloud
106            $renderer->doc .= '<div class="cloud">'.DOKU_LF;
107            foreach ($cloud as $word => $size) {
108                if ($size < $min+round($delta)) $class = 'cloud1';
109                elseif ($size < $min+round(2*$delta)) $class = 'cloud2';
110                elseif ($size < $min+round(4*$delta)) $class = 'cloud3';
111                elseif ($size < $min+round(8*$delta)) $class = 'cloud4';
112                else $class = 'cloud5';
113
114                $name = $word;
115                if ($type == 'tag' && isset($tag)) {
116                    $id = $word;
117                    $exists = false;
118                    resolve_pageID($tag->namespace, $id, $exists);
119                    if($exists) {
120                        $link = wl($id);
121                        if($conf['useheading']) {
122                            $name = p_get_first_heading($id, false);
123                            if (empty($name)) {
124                                $name = $word;
125                            }
126                        }
127                    } else {
128                        $link = wl($id, array('do'=>'showtag', 'tag'=>$word));
129                    }
130                    $title = $word;
131                    $class .= ($exists ? '_tag1' : '_tag2');
132                } else {
133                    if($conf['userewrite'] == 2) {
134                        $link = wl($word, array('do'=>'search', 'id'=>$word));
135                        $title = $size;
136                    } else {
137                        $link = wl($word, 'do=search');
138                        $title = $size;
139                    }
140                }
141
142                if ($flags['showCount']) {
143                    $name .= '('.$size.')';
144                }
145                $renderer->doc .= DOKU_TAB . '<a href="' . $link . '" class="' . $class .'"'
146                               .' title="' . $title . '">' . hsc($name) . '</a>' . DOKU_LF;
147            }
148            $renderer->doc .= '</div>' . DOKU_LF;
149            return true;
150        }
151        return false;
152    }
153
154    /**
155     * Helper function for loading and returning the array with stopwords.
156     *
157     * Stopwords files are loaded from two locations:
158     * - inc/lang/"actual language"/stopwords.txt
159     * - conf/stopwords.txt
160     *
161     * If both files exists, then both files are used - the content is merged.
162     */
163    protected function _getStopwords() {
164        global $conf;
165        // load stopwords
166        $swfile   = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
167        if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES);
168        else $stopwords = array();
169
170        // load extra local stopwords
171        $swfile = DOKU_CONF.'stopwords.txt';
172        if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES));
173
174        if (count($stopwords) == 0) {
175            return null;
176        }
177
178        return $stopwords;
179    }
180
181    /**
182     * Applies filters on the cloud:
183     * - removes all short words, see config option 'minimum_word_length'
184     * - removes all words in configured blacklist $balcklistName from $cloud array
185     */
186    function _filterCloud(&$cloud, $balcklistName) {
187        // Remove to short words
188        $min = $this->getConf('minimum_word_length');
189        foreach ($cloud as $key => $count) {
190            if (iconv_strlen($key) < $min)
191                unset($cloud[$key]);
192        }
193
194        // Remove stopwords
195        if ($this->stopwords != null)
196        {
197            foreach ($this->stopwords as $word) {
198                if (isset($cloud[$word]))
199                    unset($cloud[$word]);
200            }
201        }
202
203        // Remove word which are on the blacklist
204        $blacklist = $this->getConf($balcklistName);
205        if(!empty($blacklist)) {
206            $blacklist = explode(',', $blacklist);
207            $blacklist = str_replace(' ', '', $blacklist);	// remove spaces
208
209            foreach ($blacklist as $word) {
210                if (isset($cloud[$word]))
211                    unset($cloud[$word]);
212            }
213        }
214    }
215
216    /**
217     * Returns the sorted word cloud array
218     */
219    function _getWordCloud($num, &$min, &$max) {
220        global $conf;
221
222        $cloud = array();
223
224        if (@file_exists($conf['indexdir'].'/page.idx')) { // new word-length based index
225            require_once(DOKU_INC.'inc/indexer.php');
226
227            $lengths = idx_indexLengths(0);
228            foreach ($lengths as $len) {
229                $idx      = idx_getIndex('i', $len);
230                $word_idx = idx_getIndex('w', $len);
231
232                $this->_addWordsToCloud($cloud, $idx, $word_idx);
233            }
234
235        } else {                                          // old index
236            $idx      = file($conf['cachedir'].'/index.idx');
237            $word_idx = file($conf['cachedir'].'/word.idx');
238
239            $this->_addWordsToCloud($cloud, $idx, $word_idx);
240        }
241
242        $this->_filterCloud($cloud, 'word_blacklist');
243
244        return $this->_sortCloud($cloud, $num, $min, $max);
245    }
246
247    /**
248     * Adds all words in given index as $word => $freq to $cloud array
249     */
250    function _addWordsToCloud(&$cloud, $idx, $word_idx) {
251        $wcount = count($word_idx);
252
253        // collect the frequency of the words
254        for ($i = 0; $i < $wcount; $i++) {
255            $key = trim($word_idx[$i]);
256            $value = explode(':', $idx[$i]);
257            if (!trim($value[0])) continue;
258            $cloud[$key] = count($value);
259        }
260    }
261
262    /**
263     * Returns the sorted tag cloud array
264     */
265    function _getTagCloud($num, &$min, &$max, $namespaces = NULL, helper_plugin_tag &$tag) {
266        $cloud = $tag->tagOccurrences(NULL, $namespaces, true, $this->getConf('list_tags_of_subns'));
267
268        $this->_filterCloud($cloud, 'tag_blacklist');
269
270        return $this->_sortCloud($cloud, $num, $min, $max);
271    }
272
273    /**
274     * Sorts and slices the cloud
275     */
276    function _sortCloud($cloud, $num, &$min, &$max) {
277        if(empty($cloud)) return $cloud;
278
279        // sort by frequency, then alphabetically
280        arsort($cloud);
281        $cloud = array_chunk($cloud, $num, true);
282        $max = current($cloud[0]);
283        $min = end($cloud[0]);
284        ksort($cloud[0]);
285
286        return $cloud[0];
287    }
288}
289// vim:ts=4:sw=4:et:
290