*/ use dokuwiki\File\PageResolver; class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin { protected $knownFlags = array('showCount'); protected $stopwords = null; /** * Constructor. Loads stopwords. */ public function __construct() { $this->stopwords = $this->_getStopwords(); } function getType() { return 'substition'; } function getPType() { return 'block'; } function getSort() { return 98; } function connectTo($mode) { $this->Lexer->addSpecialPattern('~~\w*?CLOUD.*?~~', $mode, 'plugin_cloud'); } function handle($match, $state, $pos, Doku_Handler $handler) { $match = substr($match, 2, -2); // strip markup if (substr($match, 0, 3) == 'TAG') { $type = 'tag'; } elseif (substr($match, 0, 6) == 'SEARCH') { $type = 'search'; } else { $type = 'word'; } // Ensure we always have 2 entries in the exploded array list($num, $ns) = array_pad(explode('>', $match, 2), 2, ''); list($junk, $num) = array_pad(explode(':', $num, 2), 2, ''); $flags = [ 'showCount' => false, ]; if (preg_match('/\[.*]/', $junk, $matches) === 1) { $matches = trim($matches[0], '[]'); $found = explode(',', $matches); $flags = array(); foreach ($found as $flag) { if (in_array($flag, $this->knownFlags)) { // Actually we just set flags as present // Later we might add values to flags like key=value pairs $flags [$flag] = true; } } } if (!is_numeric($num)) $num = 50; if(!is_null($ns)) $namespaces = explode('|', $ns); else $namespaces = null; return array($type, $num, $namespaces, $flags); } function render($format, Doku_Renderer $renderer, $data) { global $conf; list($type, $num, $namespaces, $flags) = $data; if ($format == 'xhtml') { if ($type == 'tag') { // we need the tag helper plugin /** @var helper_plugin_tag $tag */ if (plugin_isdisabled('tag') || (!$tag = plugin_load('helper', 'tag'))) { msg('The Tag Plugin must be installed to display tag clouds.', -1); return false; } $cloud = $this->_getTagCloud($num, $min, $max, $namespaces, $tag); } elseif($type == 'search') { /** @var helper_plugin_searchstats $helper */ $helper = plugin_load('helper', 'searchstats'); if($helper) { $cloud = $helper->getSearchWordArray($num); $this->_filterCloud($cloud, 'search_blacklist'); // calculate min/max values $min = PHP_INT_MAX; $max = 0; foreach ($cloud as $size) { $min = min($size, $min); $max = max($size, $max); } } else { msg('You have to install the searchstats plugin to use this feature.', -1); return false; } } else { $cloud = $this->_getWordCloud($num, $min, $max); } if (!is_array($cloud) || empty($cloud)) return false; $delta = ($max-$min)/16; // prevent caching to ensure the included pages are always fresh $renderer->nocache(); // and render the cloud $renderer->doc .= '
'.DOKU_LF; foreach ($cloud as $word => $size) { if ($size < $min+round($delta)) $class = 'cloud1'; elseif ($size < $min+round(2*$delta)) $class = 'cloud2'; elseif ($size < $min+round(4*$delta)) $class = 'cloud3'; elseif ($size < $min+round(8*$delta)) $class = 'cloud4'; else $class = 'cloud5'; $name = $word; if ($type == 'tag' && isset($tag)) { if (class_exists('dokuwiki\File\PageResolver')) { // Compatibility with tag plugin < 2022-09-30 $ns = method_exists($tag, 'getNamespace') ? $tag->getNamespace() : $tag->namespace; $resolver = new PageResolver($ns . ':'); $page = $resolver->resolveId($word); $exists = page_exists($page); } else { // Compatibility with Hogfather and older $page = $word; resolve_pageid($tag->namespace, $page, $exists); } if($exists) { $link = wl($page); if($conf['useheading']) { $name = p_get_first_heading($page, false); if (empty($name)) { $name = $word; } } $class .= '_tag1'; } else { $link = wl($word, array('do'=>'showtag', 'tag'=>$word)); $class .= '_tag2'; } $title = $word; } else { if($conf['userewrite'] == 2) { $link = wl($word, array('do'=>'search', 'id'=>$word)); } else { $link = wl($word, 'do=search'); } $title = $size; } if ($flags['showCount']) { $name .= '('.$size.')'; } $renderer->doc .= DOKU_TAB . '' . hsc($name) . '' . DOKU_LF; } $renderer->doc .= '
' . DOKU_LF; return true; } return false; } /** * Helper function for loading and returning the array with stopwords. * * Stopwords files are loaded from two locations: * - inc/lang/"actual language"/stopwords.txt * - conf/stopwords.txt * * If both files exists, then both files are used - the content is merged. */ protected function _getStopwords() { global $conf; // load stopwords $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt'; if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES); else $stopwords = array(); // load extra local stopwords $swfile = DOKU_CONF.'stopwords.txt'; if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES)); if (count($stopwords) == 0) { return null; } return $stopwords; } /** * Applies filters on the cloud: * - removes all short words, see config option 'minimum_word_length' * - removes all words in configured blacklist $balcklistName from $cloud array */ function _filterCloud(&$cloud, $balcklistName) { // Remove to short words $min = $this->getConf('minimum_word_length'); foreach ($cloud as $key => $count) { if (iconv_strlen($key) < $min) unset($cloud[$key]); } // Remove stopwords if ($this->stopwords != null) { foreach ($this->stopwords as $word) { if (isset($cloud[$word])) unset($cloud[$word]); } } // Remove word which are on the blacklist $blacklist = $this->getConf($balcklistName); if(!empty($blacklist)) { $blacklist = explode(',', $blacklist); $blacklist = str_replace(' ', '', $blacklist); // remove spaces foreach ($blacklist as $word) { if (isset($cloud[$word])) unset($cloud[$word]); } } } /** * Returns the sorted word cloud array */ function _getWordCloud($num, &$min, &$max) { global $conf; $cloud = array(); if (@file_exists($conf['indexdir'].'/page.idx')) { // new word-length based index require_once(DOKU_INC.'inc/indexer.php'); $lengths = idx_indexLengths(0); foreach ($lengths as $len) { $idx = idx_getIndex('i', $len); $word_idx = idx_getIndex('w', $len); $this->_addWordsToCloud($cloud, $idx, $word_idx); } } else { // old index $idx = file($conf['cachedir'].'/index.idx'); $word_idx = file($conf['cachedir'].'/word.idx'); $this->_addWordsToCloud($cloud, $idx, $word_idx); } $this->_filterCloud($cloud, 'word_blacklist'); return $this->_sortCloud($cloud, $num, $min, $max); } /** * Adds all words in given index as $word => $freq to $cloud array */ function _addWordsToCloud(&$cloud, $idx, $word_idx) { $wcount = count($word_idx); // collect the frequency of the words for ($i = 0; $i < $wcount; $i++) { $key = trim($word_idx[$i]); $value = explode(':', $idx[$i]); if (!trim($value[0])) continue; $cloud[$key] = count($value); } } /** * Returns the sorted tag cloud array */ function _getTagCloud($num, &$min, &$max, $namespaces, helper_plugin_tag $tag) { $cloud = $tag->tagOccurrences([], $namespaces, true, $this->getConf('list_tags_of_subns')); $this->_filterCloud($cloud, 'tag_blacklist'); return $this->_sortCloud($cloud, $num, $min, $max); } /** * Sorts and slices the cloud */ function _sortCloud($cloud, $num, &$min, &$max) { if(empty($cloud)) return $cloud; // sort by frequency, then alphabetically arsort($cloud); $cloud = array_chunk($cloud, $num, true); $max = current($cloud[0]); $min = end($cloud[0]); ksort($cloud[0]); return $cloud[0]; } } // vim:ts=4:sw=4:et: