1<?php 2/** 3 * Cloud Plugin: shows a cloud of the most frequently used words 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Esther Brunner <wikidesign@gmail.com> 7 */ 8 9class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin { 10 protected $knownFlags = array('showCount'); 11 protected $stopwords = null; 12 13 /** 14 * Constructor. Loads stopwords. 15 */ 16 public function __construct() { 17 $this->stopwords = $this->_getStopwords(); 18 } 19 20 function getType() { return 'substition'; } 21 function getPType() { return 'block'; } 22 function getSort() { return 98; } 23 24 function connectTo($mode) { 25 $this->Lexer->addSpecialPattern('~~\w*?CLOUD.*?~~', $mode, 'plugin_cloud'); 26 } 27 28 function handle($match, $state, $pos, Doku_Handler $handler) { 29 $match = substr($match, 2, -2); // strip markup 30 31 if (substr($match, 0, 3) == 'TAG') { 32 $type = 'tag'; 33 } elseif (substr($match, 0, 6) == 'SEARCH') { 34 $type = 'search'; 35 } else { 36 $type = 'word'; 37 } 38 39 // Ensure we always have 2 entries in the exploded array 40 list($num, $ns) = array_pad(explode('>', $match, 2), 2, ''); 41 list($junk, $num) = array_pad(explode(':', $num, 2), 2, ''); 42 43 $flags = [ 44 'showCount' => false, 45 ]; 46 if (preg_match('/\[.*\]/', $junk, $matches) === 1) { 47 $matches = trim($matches[0], '[]'); 48 $found = explode(',', $matches); 49 $flags = array(); 50 foreach ($found as $flag) { 51 if (in_array($flag, $this->knownFlags)) { 52 // Actually we just set flags as present 53 // Later we might add values to flags like key=value pairs 54 $flags [$flag] = true; 55 } 56 } 57 } 58 59 if (!is_numeric($num)) $num = 50; 60 if(!is_null($ns)) $namespaces = explode('|', $ns); 61 else $namespaces = null; 62 63 return array($type, $num, $namespaces, $flags); 64 } 65 66 function render($mode, Doku_Renderer $renderer, $data) { 67 global $conf; 68 69 list($type, $num, $namespaces, $flags) = $data; 70 if ($mode == 'xhtml') { 71 72 if ($type == 'tag') { // we need the tag helper plugin 73 /** @var helper_plugin_tag $tag */ 74 if (plugin_isdisabled('tag') || (!$tag = plugin_load('helper', 'tag'))) { 75 msg('The Tag Plugin must be installed to display tag clouds.', -1); 76 return false; 77 } 78 $cloud = $this->_getTagCloud($num, $min, $max, $namespaces, $tag); 79 } elseif($type == 'search') { 80 /** @var helper_plugin_searchstats $helper */ 81 $helper = plugin_load('helper', 'searchstats'); 82 if($helper) { 83 $cloud = $helper->getSearchWordArray($num); 84 $this->_filterCloud($cloud, 'search_blacklist'); 85 // calculate min/max values 86 $min = PHP_INT_MAX; 87 $max = 0; 88 foreach ($cloud as $size) { 89 $min = min($size, $min); 90 $max = max($size, $max); 91 } 92 } else { 93 msg('You have to install the searchstats plugin to use this feature.', -1); 94 return false; 95 } 96 } else { 97 $cloud = $this->_getWordCloud($num, $min, $max); 98 } 99 if (!is_array($cloud) || empty($cloud)) return false; 100 $delta = ($max-$min)/16; 101 102 // prevent caching to ensure the included pages are always fresh 103 $renderer->nocache(); 104 105 // and render the cloud 106 $renderer->doc .= '<div class="cloud">'.DOKU_LF; 107 foreach ($cloud as $word => $size) { 108 if ($size < $min+round($delta)) $class = 'cloud1'; 109 elseif ($size < $min+round(2*$delta)) $class = 'cloud2'; 110 elseif ($size < $min+round(4*$delta)) $class = 'cloud3'; 111 elseif ($size < $min+round(8*$delta)) $class = 'cloud4'; 112 else $class = 'cloud5'; 113 114 $name = $word; 115 if ($type == 'tag' && isset($tag)) { 116 $id = $word; 117 $exists = false; 118 resolve_pageID($tag->namespace, $id, $exists); 119 if($exists) { 120 $link = wl($id); 121 if($conf['useheading']) { 122 $name = p_get_first_heading($id, false); 123 if (empty($name)) { 124 $name = $word; 125 } 126 } 127 } else { 128 $link = wl($id, array('do'=>'showtag', 'tag'=>$word)); 129 } 130 $title = $word; 131 $class .= ($exists ? '_tag1' : '_tag2'); 132 } else { 133 if($conf['userewrite'] == 2) { 134 $link = wl($word, array('do'=>'search', 'id'=>$word)); 135 $title = $size; 136 } else { 137 $link = wl($word, 'do=search'); 138 $title = $size; 139 } 140 } 141 142 if ($flags['showCount']) { 143 $name .= '('.$size.')'; 144 } 145 $renderer->doc .= DOKU_TAB . '<a href="' . $link . '" class="' . $class .'"' 146 .' title="' . $title . '">' . hsc($name) . '</a>' . DOKU_LF; 147 } 148 $renderer->doc .= '</div>' . DOKU_LF; 149 return true; 150 } 151 return false; 152 } 153 154 /** 155 * Helper function for loading and returning the array with stopwords. 156 * 157 * Stopwords files are loaded from two locations: 158 * - inc/lang/"actual language"/stopwords.txt 159 * - conf/stopwords.txt 160 * 161 * If both files exists, then both files are used - the content is merged. 162 */ 163 protected function _getStopwords() { 164 global $conf; 165 // load stopwords 166 $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt'; 167 if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES); 168 else $stopwords = array(); 169 170 // load extra local stopwords 171 $swfile = DOKU_CONF.'stopwords.txt'; 172 if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES)); 173 174 if (count($stopwords) == 0) { 175 return null; 176 } 177 178 return $stopwords; 179 } 180 181 /** 182 * Applies filters on the cloud: 183 * - removes all short words, see config option 'minimum_word_length' 184 * - removes all words in configured blacklist $balcklistName from $cloud array 185 */ 186 function _filterCloud(&$cloud, $balcklistName) { 187 // Remove to short words 188 $min = $this->getConf('minimum_word_length'); 189 foreach ($cloud as $key => $count) { 190 if (iconv_strlen($key) < $min) 191 unset($cloud[$key]); 192 } 193 194 // Remove stopwords 195 if ($this->stopwords != null) 196 { 197 foreach ($this->stopwords as $word) { 198 if (isset($cloud[$word])) 199 unset($cloud[$word]); 200 } 201 } 202 203 // Remove word which are on the blacklist 204 $blacklist = $this->getConf($balcklistName); 205 if(!empty($blacklist)) { 206 $blacklist = explode(',', $blacklist); 207 $blacklist = str_replace(' ', '', $blacklist); // remove spaces 208 209 foreach ($blacklist as $word) { 210 if (isset($cloud[$word])) 211 unset($cloud[$word]); 212 } 213 } 214 } 215 216 /** 217 * Returns the sorted word cloud array 218 */ 219 function _getWordCloud($num, &$min, &$max) { 220 global $conf; 221 222 $cloud = array(); 223 224 if (@file_exists($conf['indexdir'].'/page.idx')) { // new word-length based index 225 require_once(DOKU_INC.'inc/indexer.php'); 226 227 $lengths = idx_indexLengths(0); 228 foreach ($lengths as $len) { 229 $idx = idx_getIndex('i', $len); 230 $word_idx = idx_getIndex('w', $len); 231 232 $this->_addWordsToCloud($cloud, $idx, $word_idx); 233 } 234 235 } else { // old index 236 $idx = file($conf['cachedir'].'/index.idx'); 237 $word_idx = file($conf['cachedir'].'/word.idx'); 238 239 $this->_addWordsToCloud($cloud, $idx, $word_idx); 240 } 241 242 $this->_filterCloud($cloud, 'word_blacklist'); 243 244 return $this->_sortCloud($cloud, $num, $min, $max); 245 } 246 247 /** 248 * Adds all words in given index as $word => $freq to $cloud array 249 */ 250 function _addWordsToCloud(&$cloud, $idx, $word_idx) { 251 $wcount = count($word_idx); 252 253 // collect the frequency of the words 254 for ($i = 0; $i < $wcount; $i++) { 255 $key = trim($word_idx[$i]); 256 $value = explode(':', $idx[$i]); 257 if (!trim($value[0])) continue; 258 $cloud[$key] = count($value); 259 } 260 } 261 262 /** 263 * Returns the sorted tag cloud array 264 */ 265 function _getTagCloud($num, &$min, &$max, $namespaces = NULL, helper_plugin_tag &$tag) { 266 $cloud = $tag->tagOccurrences(NULL, $namespaces, true, $this->getConf('list_tags_of_subns')); 267 268 $this->_filterCloud($cloud, 'tag_blacklist'); 269 270 return $this->_sortCloud($cloud, $num, $min, $max); 271 } 272 273 /** 274 * Sorts and slices the cloud 275 */ 276 function _sortCloud($cloud, $num, &$min, &$max) { 277 if(empty($cloud)) return $cloud; 278 279 // sort by frequency, then alphabetically 280 arsort($cloud); 281 $cloud = array_chunk($cloud, $num, true); 282 $max = current($cloud[0]); 283 $min = end($cloud[0]); 284 ksort($cloud[0]); 285 286 return $cloud[0]; 287 } 288} 289// vim:ts=4:sw=4:et: 290