*/
use dokuwiki\File\PageResolver;
class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin {
protected $knownFlags = array('showCount');
protected $stopwords = null;
/**
* Constructor. Loads stopwords.
*/
public function __construct() {
$this->stopwords = $this->_getStopwords();
}
function getType() { return 'substition'; }
function getPType() { return 'block'; }
function getSort() { return 98; }
function connectTo($mode) {
$this->Lexer->addSpecialPattern('~~\w*?CLOUD.*?~~', $mode, 'plugin_cloud');
}
function handle($match, $state, $pos, Doku_Handler $handler) {
$match = substr($match, 2, -2); // strip markup
if (substr($match, 0, 3) == 'TAG') {
$type = 'tag';
} elseif (substr($match, 0, 6) == 'SEARCH') {
$type = 'search';
} else {
$type = 'word';
}
// Ensure we always have 2 entries in the exploded array
list($num, $ns) = array_pad(explode('>', $match, 2), 2, '');
list($junk, $num) = array_pad(explode(':', $num, 2), 2, '');
$flags = [
'showCount' => false,
];
if (preg_match('/\[.*]/', $junk, $matches) === 1) {
$matches = trim($matches[0], '[]');
$found = explode(',', $matches);
$flags = array();
foreach ($found as $flag) {
if (in_array($flag, $this->knownFlags)) {
// Actually we just set flags as present
// Later we might add values to flags like key=value pairs
$flags [$flag] = true;
}
}
}
if (!is_numeric($num)) $num = 50;
if(!is_null($ns)) $namespaces = explode('|', $ns);
else $namespaces = null;
return array($type, $num, $namespaces, $flags);
}
function render($format, Doku_Renderer $renderer, $data) {
global $conf;
list($type, $num, $namespaces, $flags) = $data;
if ($format == 'xhtml') {
if ($type == 'tag') { // we need the tag helper plugin
/** @var helper_plugin_tag $tag */
if (plugin_isdisabled('tag') || (!$tag = plugin_load('helper', 'tag'))) {
msg('The Tag Plugin must be installed to display tag clouds.', -1);
return false;
}
$cloud = $this->_getTagCloud($num, $min, $max, $namespaces, $tag);
} elseif($type == 'search') {
/** @var helper_plugin_searchstats $helper */
$helper = plugin_load('helper', 'searchstats');
if($helper) {
$cloud = $helper->getSearchWordArray($num);
$this->_filterCloud($cloud, 'search_blacklist');
// calculate min/max values
$min = PHP_INT_MAX;
$max = 0;
foreach ($cloud as $size) {
$min = min($size, $min);
$max = max($size, $max);
}
} else {
msg('You have to install the searchstats plugin to use this feature.', -1);
return false;
}
} else {
$cloud = $this->_getWordCloud($num, $min, $max);
}
if (!is_array($cloud) || empty($cloud)) return false;
$delta = ($max-$min)/16;
// prevent caching to ensure the included pages are always fresh
$renderer->nocache();
// and render the cloud
$renderer->doc .= '
'.DOKU_LF;
foreach ($cloud as $word => $size) {
if ($size < $min+round($delta)) $class = 'cloud1';
elseif ($size < $min+round(2*$delta)) $class = 'cloud2';
elseif ($size < $min+round(4*$delta)) $class = 'cloud3';
elseif ($size < $min+round(8*$delta)) $class = 'cloud4';
else $class = 'cloud5';
$name = $word;
if ($type == 'tag' && isset($tag)) {
if (class_exists('dokuwiki\File\PageResolver')) {
// Compatibility with tag plugin < 2022-09-30
$ns = method_exists($tag, 'getNamespace') ? $tag->getNamespace() : $tag->namespace;
$resolver = new PageResolver($ns . ':');
$page = $resolver->resolveId($word);
$exists = page_exists($page);
} else {
// Compatibility with Hogfather and older
$page = $word;
resolve_pageid($tag->namespace, $page, $exists);
}
if($exists) {
$link = wl($page);
if($conf['useheading']) {
$name = p_get_first_heading($page, false);
if (empty($name)) {
$name = $word;
}
}
$class .= '_tag1';
} else {
$link = wl($word, array('do'=>'showtag', 'tag'=>$word));
$class .= '_tag2';
}
$title = $word;
} else {
if($conf['userewrite'] == 2) {
$link = wl($word, array('do'=>'search', 'id'=>$word));
} else {
$link = wl($word, 'do=search');
}
$title = $size;
}
if ($flags['showCount']) {
$name .= '('.$size.')';
}
$renderer->doc .= DOKU_TAB . '
' . hsc($name) . '' . DOKU_LF;
}
$renderer->doc .= '
' . DOKU_LF;
return true;
}
return false;
}
/**
* Helper function for loading and returning the array with stopwords.
*
* Stopwords files are loaded from two locations:
* - inc/lang/"actual language"/stopwords.txt
* - conf/stopwords.txt
*
* If both files exists, then both files are used - the content is merged.
*/
protected function _getStopwords() {
global $conf;
// load stopwords
$swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES);
else $stopwords = array();
// load extra local stopwords
$swfile = DOKU_CONF.'stopwords.txt';
if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES));
if (count($stopwords) == 0) {
return null;
}
return $stopwords;
}
/**
* Applies filters on the cloud:
* - removes all short words, see config option 'minimum_word_length'
* - removes all words in configured blacklist $balcklistName from $cloud array
*/
function _filterCloud(&$cloud, $balcklistName) {
// Remove to short words
$min = $this->getConf('minimum_word_length');
foreach ($cloud as $key => $count) {
if (iconv_strlen($key) < $min)
unset($cloud[$key]);
}
// Remove stopwords
if ($this->stopwords != null)
{
foreach ($this->stopwords as $word) {
if (isset($cloud[$word]))
unset($cloud[$word]);
}
}
// Remove word which are on the blacklist
$blacklist = $this->getConf($balcklistName);
if(!empty($blacklist)) {
$blacklist = explode(',', $blacklist);
$blacklist = str_replace(' ', '', $blacklist); // remove spaces
foreach ($blacklist as $word) {
if (isset($cloud[$word]))
unset($cloud[$word]);
}
}
}
/**
* Returns the sorted word cloud array
*/
function _getWordCloud($num, &$min, &$max) {
global $conf;
$cloud = array();
if (@file_exists($conf['indexdir'].'/page.idx')) { // new word-length based index
require_once(DOKU_INC.'inc/indexer.php');
$lengths = idx_indexLengths(0);
foreach ($lengths as $len) {
$idx = idx_getIndex('i', $len);
$word_idx = idx_getIndex('w', $len);
$this->_addWordsToCloud($cloud, $idx, $word_idx);
}
} else { // old index
$idx = file($conf['cachedir'].'/index.idx');
$word_idx = file($conf['cachedir'].'/word.idx');
$this->_addWordsToCloud($cloud, $idx, $word_idx);
}
$this->_filterCloud($cloud, 'word_blacklist');
return $this->_sortCloud($cloud, $num, $min, $max);
}
/**
* Adds all words in given index as $word => $freq to $cloud array
*/
function _addWordsToCloud(&$cloud, $idx, $word_idx) {
$wcount = count($word_idx);
// collect the frequency of the words
for ($i = 0; $i < $wcount; $i++) {
$key = trim($word_idx[$i]);
$value = explode(':', $idx[$i]);
if (!trim($value[0])) continue;
$cloud[$key] = count($value);
}
}
/**
* Returns the sorted tag cloud array
*/
function _getTagCloud($num, &$min, &$max, $namespaces, helper_plugin_tag $tag) {
$cloud = $tag->tagOccurrences([], $namespaces, true, $this->getConf('list_tags_of_subns'));
$this->_filterCloud($cloud, 'tag_blacklist');
return $this->_sortCloud($cloud, $num, $min, $max);
}
/**
* Sorts and slices the cloud
*/
function _sortCloud($cloud, $num, &$min, &$max) {
if(empty($cloud)) return $cloud;
// sort by frequency, then alphabetically
arsort($cloud);
$cloud = array_chunk($cloud, $num, true);
$max = current($cloud[0]);
$min = end($cloud[0]);
ksort($cloud[0]);
return $cloud[0];
}
}
// vim:ts=4:sw=4:et: