1<?php 2/** 3 * Cloud Plugin: shows a cloud of the most frequently used words 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Esther Brunner <wikidesign@gmail.com> 7 */ 8 9use dokuwiki\File\PageResolver; 10 11class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin { 12 protected $knownFlags = array('showCount'); 13 protected $stopwords = null; 14 15 /** 16 * Constructor. Loads stopwords. 17 */ 18 public function __construct() { 19 $this->stopwords = $this->_getStopwords(); 20 } 21 22 function getType() { return 'substition'; } 23 function getPType() { return 'block'; } 24 function getSort() { return 98; } 25 26 function connectTo($mode) { 27 $this->Lexer->addSpecialPattern('~~\w*?CLOUD.*?~~', $mode, 'plugin_cloud'); 28 } 29 30 function handle($match, $state, $pos, Doku_Handler $handler) { 31 $match = substr($match, 2, -2); // strip markup 32 33 if (substr($match, 0, 3) == 'TAG') { 34 $type = 'tag'; 35 } elseif (substr($match, 0, 6) == 'SEARCH') { 36 $type = 'search'; 37 } else { 38 $type = 'word'; 39 } 40 41 // Ensure we always have 2 entries in the exploded array 42 list($num, $ns) = array_pad(explode('>', $match, 2), 2, ''); 43 list($junk, $num) = array_pad(explode(':', $num, 2), 2, ''); 44 45 $flags = [ 46 'showCount' => false, 47 ]; 48 if (preg_match('/\[.*]/', $junk, $matches) === 1) { 49 $matches = trim($matches[0], '[]'); 50 $found = explode(',', $matches); 51 $flags = array(); 52 foreach ($found as $flag) { 53 if (in_array($flag, $this->knownFlags)) { 54 // Actually we just set flags as present 55 // Later we might add values to flags like key=value pairs 56 $flags [$flag] = true; 57 } 58 } 59 } 60 61 if (!is_numeric($num)) $num = 50; 62 if(!is_null($ns)) $namespaces = explode('|', $ns); 63 else $namespaces = null; 64 65 return array($type, $num, $namespaces, $flags); 66 } 67 68 function render($format, Doku_Renderer $renderer, $data) { 69 global $conf; 70 71 list($type, $num, $namespaces, $flags) = $data; 72 if ($format == 'xhtml') { 73 74 if ($type == 'tag') { // we need the tag helper plugin 75 /** @var helper_plugin_tag $tag */ 76 if (plugin_isdisabled('tag') || (!$tag = plugin_load('helper', 'tag'))) { 77 msg('The Tag Plugin must be installed to display tag clouds.', -1); 78 return false; 79 } 80 $cloud = $this->_getTagCloud($num, $min, $max, $namespaces, $tag); 81 } elseif($type == 'search') { 82 /** @var helper_plugin_searchstats $helper */ 83 $helper = plugin_load('helper', 'searchstats'); 84 if($helper) { 85 $cloud = $helper->getSearchWordArray($num); 86 $this->_filterCloud($cloud, 'search_blacklist'); 87 // calculate min/max values 88 $min = PHP_INT_MAX; 89 $max = 0; 90 foreach ($cloud as $size) { 91 $min = min($size, $min); 92 $max = max($size, $max); 93 } 94 } else { 95 msg('You have to install the searchstats plugin to use this feature.', -1); 96 return false; 97 } 98 } else { 99 $cloud = $this->_getWordCloud($num, $min, $max); 100 } 101 if (!is_array($cloud) || empty($cloud)) return false; 102 $delta = ($max-$min)/16; 103 104 // prevent caching to ensure the included pages are always fresh 105 $renderer->nocache(); 106 107 // and render the cloud 108 $renderer->doc .= '<div class="cloud">'.DOKU_LF; 109 foreach ($cloud as $word => $size) { 110 if ($size < $min+round($delta)) $class = 'cloud1'; 111 elseif ($size < $min+round(2*$delta)) $class = 'cloud2'; 112 elseif ($size < $min+round(4*$delta)) $class = 'cloud3'; 113 elseif ($size < $min+round(8*$delta)) $class = 'cloud4'; 114 else $class = 'cloud5'; 115 116 $name = $word; 117 if ($type == 'tag' && isset($tag)) { 118 if (class_exists('dokuwiki\File\PageResolver')) { 119 // Compatibility with tag plugin < 2022-09-30 120 $ns = method_exists($tag, 'getNamespace') ? $tag->getNamespace() : $tag->namespace; 121 $resolver = new PageResolver($ns . ':'); 122 $page = $resolver->resolveId($word); 123 $exists = page_exists($page); 124 } else { 125 // Compatibility with Hogfather and older 126 $page = $word; 127 resolve_pageid($tag->namespace, $page, $exists); 128 } 129 if($exists) { 130 $link = wl($page); 131 if($conf['useheading']) { 132 $name = p_get_first_heading($page, false); 133 if (empty($name)) { 134 $name = $word; 135 } 136 } 137 $class .= '_tag1'; 138 } else { 139 $link = wl($word, array('do'=>'showtag', 'tag'=>$word)); 140 $class .= '_tag2'; 141 } 142 $title = $word; 143 } else { 144 if($conf['userewrite'] == 2) { 145 $link = wl($word, array('do'=>'search', 'id'=>$word)); 146 } else { 147 $link = wl($word, 'do=search'); 148 } 149 $title = $size; 150 } 151 152 if ($flags['showCount']) { 153 $name .= '('.$size.')'; 154 } 155 $renderer->doc .= DOKU_TAB . '<a href="' . $link . '" class="' . $class .'"' 156 .' title="' . $title . '">' . hsc($name) . '</a>' . DOKU_LF; 157 } 158 $renderer->doc .= '</div>' . DOKU_LF; 159 return true; 160 } 161 return false; 162 } 163 164 /** 165 * Helper function for loading and returning the array with stopwords. 166 * 167 * Stopwords files are loaded from two locations: 168 * - inc/lang/"actual language"/stopwords.txt 169 * - conf/stopwords.txt 170 * 171 * If both files exists, then both files are used - the content is merged. 172 */ 173 protected function _getStopwords() { 174 global $conf; 175 // load stopwords 176 $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt'; 177 if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES); 178 else $stopwords = array(); 179 180 // load extra local stopwords 181 $swfile = DOKU_CONF.'stopwords.txt'; 182 if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES)); 183 184 if (count($stopwords) == 0) { 185 return null; 186 } 187 188 return $stopwords; 189 } 190 191 /** 192 * Applies filters on the cloud: 193 * - removes all short words, see config option 'minimum_word_length' 194 * - removes all words in configured blacklist $balcklistName from $cloud array 195 */ 196 function _filterCloud(&$cloud, $balcklistName) { 197 // Remove to short words 198 $min = $this->getConf('minimum_word_length'); 199 foreach ($cloud as $key => $count) { 200 if (iconv_strlen($key) < $min) 201 unset($cloud[$key]); 202 } 203 204 // Remove stopwords 205 if ($this->stopwords != null) 206 { 207 foreach ($this->stopwords as $word) { 208 if (isset($cloud[$word])) 209 unset($cloud[$word]); 210 } 211 } 212 213 // Remove word which are on the blacklist 214 $blacklist = $this->getConf($balcklistName); 215 if(!empty($blacklist)) { 216 $blacklist = explode(',', $blacklist); 217 $blacklist = str_replace(' ', '', $blacklist); // remove spaces 218 219 foreach ($blacklist as $word) { 220 if (isset($cloud[$word])) 221 unset($cloud[$word]); 222 } 223 } 224 } 225 226 /** 227 * Returns the sorted word cloud array 228 */ 229 function _getWordCloud($num, &$min, &$max) { 230 global $conf; 231 232 $cloud = array(); 233 234 if (@file_exists($conf['indexdir'].'/page.idx')) { // new word-length based index 235 require_once(DOKU_INC.'inc/indexer.php'); 236 237 $lengths = idx_indexLengths(0); 238 foreach ($lengths as $len) { 239 $idx = idx_getIndex('i', $len); 240 $word_idx = idx_getIndex('w', $len); 241 242 $this->_addWordsToCloud($cloud, $idx, $word_idx); 243 } 244 245 } else { // old index 246 $idx = file($conf['cachedir'].'/index.idx'); 247 $word_idx = file($conf['cachedir'].'/word.idx'); 248 249 $this->_addWordsToCloud($cloud, $idx, $word_idx); 250 } 251 252 $this->_filterCloud($cloud, 'word_blacklist'); 253 254 return $this->_sortCloud($cloud, $num, $min, $max); 255 } 256 257 /** 258 * Adds all words in given index as $word => $freq to $cloud array 259 */ 260 function _addWordsToCloud(&$cloud, $idx, $word_idx) { 261 $wcount = count($word_idx); 262 263 // collect the frequency of the words 264 for ($i = 0; $i < $wcount; $i++) { 265 $key = trim($word_idx[$i]); 266 $value = explode(':', $idx[$i]); 267 if (!trim($value[0])) continue; 268 $cloud[$key] = count($value); 269 } 270 } 271 272 /** 273 * Returns the sorted tag cloud array 274 */ 275 function _getTagCloud($num, &$min, &$max, $namespaces, helper_plugin_tag $tag) { 276 $cloud = $tag->tagOccurrences([], $namespaces, true, $this->getConf('list_tags_of_subns')); 277 278 $this->_filterCloud($cloud, 'tag_blacklist'); 279 280 return $this->_sortCloud($cloud, $num, $min, $max); 281 } 282 283 /** 284 * Sorts and slices the cloud 285 */ 286 function _sortCloud($cloud, $num, &$min, &$max) { 287 if(empty($cloud)) return $cloud; 288 289 // sort by frequency, then alphabetically 290 arsort($cloud); 291 $cloud = array_chunk($cloud, $num, true); 292 $max = current($cloud[0]); 293 $min = end($cloud[0]); 294 ksort($cloud[0]); 295 296 return $cloud[0]; 297 } 298} 299// vim:ts=4:sw=4:et: 300