1<?php
2/**
3 * Cloud Plugin: shows a cloud of the most frequently used words
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Esther Brunner <wikidesign@gmail.com>
7 */
8
9class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin {
10    protected $knownFlags = array('showCount');
11    protected $stopwords = null;
12
13    /**
14     * Constructor. Loads stopwords.
15     */
16    public function __construct() {
17        $this->stopwords = $this->_getStopwords();
18    }
19
20    function getType() { return 'substition'; }
21    function getPType() { return 'block'; }
22    function getSort() { return 98; }
23
24    function connectTo($mode) {
25        $this->Lexer->addSpecialPattern('~~\w*?CLOUD.*?~~', $mode, 'plugin_cloud');
26    }
27
28    function handle($match, $state, $pos, Doku_Handler $handler) {
29        $match = substr($match, 2, -2); // strip markup
30
31        if (substr($match, 0, 3) == 'TAG') {
32            $type = 'tag';
33        } elseif (substr($match, 0, 6) == 'SEARCH') {
34            $type = 'search';
35        } else {
36            $type = 'word';
37        }
38
39        list($num, $ns) = explode('>', $match, 2);
40        list($junk, $num) = explode(':', $num, 2);
41        $flags = null;
42        if (preg_match ('/\[.*\]/', $junk, $flags) === 1) {
43            $flags = trim ($flags [0], '[]');
44            $found = explode(',', $flags);
45            $flags = array();
46            foreach ($found as $flag) {
47                if (in_array($flag, $this->knownFlags)) {
48                    // Actually we just set flags as present
49                    // Later we might add values to flags like key=value pairs
50                    $flags [$flag] = true;
51                }
52            }
53        }
54
55        if (!is_numeric($num)) $num = 50;
56        if(!is_null($ns)) $namespaces = explode('|', $ns);
57        else $namespaces = null;
58
59        return array($type, $num, $namespaces, $flags);
60    }
61
62    function render($mode, Doku_Renderer $renderer, $data) {
63        global $conf;
64
65        list($type, $num, $namespaces, $flags) = $data;
66        if ($mode == 'xhtml') {
67
68            if ($type == 'tag') { // we need the tag helper plugin
69                /** @var helper_plugin_tag $tag */
70                if (plugin_isdisabled('tag') || (!$tag = plugin_load('helper', 'tag'))) {
71                    msg('The Tag Plugin must be installed to display tag clouds.', -1);
72                    return false;
73                }
74                $cloud = $this->_getTagCloud($num, $min, $max, $namespaces, $tag);
75            } elseif($type == 'search') {
76                /** @var helper_plugin_searchstats $helper */
77                $helper = plugin_load('helper', 'searchstats');
78                if($helper) {
79                    $cloud = $helper->getSearchWordArray($num);
80                    $this->_filterCloud($cloud, 'search_blacklist');
81                    // calculate min/max values
82                    $min = PHP_INT_MAX;
83                    $max = 0;
84                    foreach ($cloud as $size) {
85                        $min = min($size, $min);
86                        $max = max($size, $max);
87                    }
88                } else {
89                    msg('You have to install the searchstats plugin to use this feature.', -1);
90                    return false;
91                }
92            } else {
93                $cloud = $this->_getWordCloud($num, $min, $max);
94            }
95            if (!is_array($cloud) || empty($cloud)) return false;
96            $delta = ($max-$min)/16;
97
98            // prevent caching to ensure the included pages are always fresh
99            $renderer->nocache();
100
101            // and render the cloud
102            $renderer->doc .= '<div class="cloud">'.DOKU_LF;
103            foreach ($cloud as $word => $size) {
104                if ($size < $min+round($delta)) $class = 'cloud1';
105                elseif ($size < $min+round(2*$delta)) $class = 'cloud2';
106                elseif ($size < $min+round(4*$delta)) $class = 'cloud3';
107                elseif ($size < $min+round(8*$delta)) $class = 'cloud4';
108                else $class = 'cloud5';
109
110                $name = $word;
111                if ($type == 'tag' && isset($tag)) {
112                    $id = $word;
113                    $exists = false;
114                    resolve_pageID($tag->namespace, $id, $exists);
115                    if($exists) {
116                        $link = wl($id);
117                        if($conf['useheading']) {
118                            $name = p_get_first_heading($id, false);
119                            if (empty($name)) {
120                                $name = $word;
121                            }
122                        }
123                    } else {
124                        $link = wl($id, array('do'=>'showtag', 'tag'=>$word));
125                    }
126                    $title = $word;
127                    $class .= ($exists ? '_tag1' : '_tag2');
128                } else {
129                    if($conf['userewrite'] == 2) {
130                        $link = wl($word, array('do'=>'search', 'id'=>$word));
131                        $title = $size;
132                    } else {
133                        $link = wl($word, 'do=search');
134                        $title = $size;
135                    }
136                }
137
138                if ($flags ['showCount'] === true) {
139                    $name .= '('.$size.')';
140                }
141                $renderer->doc .= DOKU_TAB . '<a href="' . $link . '" class="' . $class .'"'
142                               .' title="' . $title . '">' . hsc($name) . '</a>' . DOKU_LF;
143            }
144            $renderer->doc .= '</div>' . DOKU_LF;
145            return true;
146        }
147        return false;
148    }
149
150    /**
151     * Helper function for loading and returning the array with stopwords.
152     *
153     * Stopwords files are loaded from two locations:
154     * - inc/lang/"actual language"/stopwords.txt
155     * - conf/stopwords.txt
156     *
157     * If both files exists, then both files are used - the content is merged.
158     */
159    protected function _getStopwords() {
160        // load stopwords
161        $swfile   = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
162        if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES);
163        else $stopwords = array();
164
165        // load extra local stopwords
166        $swfile = DOKU_CONF.'stopwords.txt';
167        if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES));
168
169        if (count($stopwords) == 0) {
170            return null;
171        }
172
173        return $stopwords;
174    }
175
176    /**
177     * Applies filters on the cloud:
178     * - removes all short words, see config option 'minimum_word_length'
179     * - removes all words in configured blacklist $balcklistName from $cloud array
180     */
181    function _filterCloud(&$cloud, $balcklistName) {
182        // Remove to short words
183        $min = $this->getConf('minimum_word_length');
184        foreach ($cloud as $key => $count) {
185            if (iconv_strlen($key) < $min)
186                unset($cloud[$key]);
187        }
188
189        // Remove stopwords
190        if ($this->stopwords != null)
191        {
192            foreach ($this->stopwords as $word) {
193                if (isset($cloud[$word]))
194                    unset($cloud[$word]);
195            }
196        }
197
198        // Remove word which are on the blacklist
199        $blacklist = $this->getConf($balcklistName);
200        if(!empty($blacklist)) {
201            $blacklist = explode(',', $blacklist);
202            $blacklist = str_replace(' ', '', $blacklist);	// remove spaces
203
204            foreach ($blacklist as $word) {
205                if (isset($cloud[$word]))
206                    unset($cloud[$word]);
207            }
208        }
209    }
210
211    /**
212     * Returns the sorted word cloud array
213     */
214    function _getWordCloud($num, &$min, &$max) {
215        global $conf;
216
217        $cloud = array();
218
219        if (@file_exists($conf['indexdir'].'/page.idx')) { // new word-length based index
220            require_once(DOKU_INC.'inc/indexer.php');
221
222            $lengths = idx_indexLengths(0);
223            foreach ($lengths as $len) {
224                $idx      = idx_getIndex('i', $len);
225                $word_idx = idx_getIndex('w', $len);
226
227                $this->_addWordsToCloud($cloud, $idx, $word_idx);
228            }
229
230        } else {                                          // old index
231            $idx      = file($conf['cachedir'].'/index.idx');
232            $word_idx = file($conf['cachedir'].'/word.idx');
233
234            $this->_addWordsToCloud($cloud, $idx, $word_idx);
235        }
236
237        $this->_filterCloud($cloud, 'word_blacklist');
238
239        return $this->_sortCloud($cloud, $num, $min, $max);
240    }
241
242    /**
243     * Adds all words in given index as $word => $freq to $cloud array
244     */
245    function _addWordsToCloud(&$cloud, $idx, $word_idx) {
246        $wcount = count($word_idx);
247
248        // collect the frequency of the words
249        for ($i = 0; $i < $wcount; $i++) {
250            $key = trim($word_idx[$i]);
251            $value = explode(':', $idx[$i]);
252            if (!trim($value[0])) continue;
253            $cloud[$key] = count($value);
254        }
255    }
256
257    /**
258     * Returns the sorted tag cloud array
259     */
260    function _getTagCloud($num, &$min, &$max, $namespaces = NULL, helper_plugin_tag &$tag) {
261        $cloud = $tag->tagOccurrences(NULL, $namespaces, true, $this->getConf('list_tags_of_subns'));
262
263        $this->_filterCloud($cloud, 'tag_blacklist');
264
265        return $this->_sortCloud($cloud, $num, $min, $max);
266    }
267
268    /**
269     * Sorts and slices the cloud
270     */
271    function _sortCloud($cloud, $num, &$min, &$max) {
272        if(empty($cloud)) return $cloud;
273
274        // sort by frequency, then alphabetically
275        arsort($cloud);
276        $cloud = array_chunk($cloud, $num, true);
277        $max = current($cloud[0]);
278        $min = end($cloud[0]);
279        ksort($cloud[0]);
280
281        return $cloud[0];
282    }
283}
284// vim:ts=4:sw=4:et:
285