*/ // must be run within DokuWiki if (!defined('DOKU_INC')) { die(); } if (!defined('DOKU_PLUGIN')) { define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/'); } require_once DOKU_PLUGIN . 'action.php'; class action_plugin_asiansearch extends DokuWiki_Action_Plugin { /** * Returns some info */ function getInfo() { return confToHash(DOKU_PLUGIN . 'asiansearch/plugin.info.txt'); } /** * Registers event handlers */ function register(&$controller) { if (!function_exists('datetime_h')) { // DokuWiki 2009-02-14 or earlier $controller->register_hook( 'SEARCH_QUERY_FULLPAGE', 'BEFORE', $this, 'handleQuery' ); } elseif (!function_exists('valid_input_set')) { // DokuWiki 2009-12-25 "Lemming" (do nothing) } elseif (!function_exists('act_sitemap')) { // DokuWiki 2010-11-07 "Anteater" $controller->register_hook( 'FULLTEXT_SNIPPET_CREATE', 'BEFORE', $this, 'reactivateAsianSearchSnippet' ); $controller->register_hook( 'TPL_ACT_RENDER', 'BEFORE', $this, 'reactivateAsianTermHighlight' ); } else { // DokuWiki Rincewind (do nothing) } } /** * Handles a search query */ function handleQuery(&$event, $param) { $data =& $event->data; // manipulate a query $terms = preg_split( '/(".*?")/u', $data['query'], -1, PREG_SPLIT_DELIM_CAPTURE ); $data['query'] = implode( '', array_map(array($this, 'manipulateTerm'), $terms) ); } /** * Manipulates a search term */ function manipulateTerm($str = '') { // do nothing with a "pharse" if (!preg_match('/^".*"$/u', $str)) { // fix incomplete phrase $str = str_replace('"', ' ', $str); // treat ideographic spaces (U+3000) as search term separators $str = preg_replace('/\x{3000}/u', ' ', $str); // make phrases for asian characters $str = implode( ' ', array_map(array($this, 'makePhrase'), explode(' ', $str)) ); } return $str; } /** * Makes a "phrase" for each successive asian character */ function makePhrase($str = '') { // skip if $str has a search modifier if (!preg_match('/^[\-\@\^]/u', $str)) { $str = preg_replace('/(' . IDX_ASIAN . '+)/u', ' "$1" ', $str); $str = trim($str); } return $str; } /** * Reactivates missing asian search snippets */ function reactivateAsianSearchSnippet(&$event, $param) { $event->preventDefault(); $this->revised_ft_snippet($event); } /** * Revised version of the ft_snippet() * (ft_snippet_re_preprocess is replaced) */ function revised_ft_snippet(&$event) { $id = $event->data['id']; $text = $event->data['text']; $highlight = $event->data['highlight']; // ---> Copied from ft_snippet() - No code cleanups $match = array(); $snippets = array(); $utf8_offset = $offset = $end = 0; $len = utf8_strlen($text); // build a regexp from the phrases to highlight $re1 = '('.join('|',array_map( array($this, 'revised_ft_snippet_re_preprocess'), // <= REPLACED array_map('preg_quote_cb',array_filter((array) $highlight)) )).')'; $re2 = "$re1.{0,75}(?!\\1)$re1"; $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1"; for ($cnt=4; $cnt--;) { if (0) { } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { } else { break; } list($str,$idx) = $match[0]; // convert $idx (a byte offset) into a utf8 character offset $utf8_idx = utf8_strlen(substr($text,0,$idx)); $utf8_len = utf8_strlen($str); // establish context, 100 bytes surrounding the match string // first look to see if we can go 100 either side, // then drop to 50 adding any excess if the other side can't go to 50, $pre = min($utf8_idx-$utf8_offset,100); $post = min($len-$utf8_idx-$utf8_len,100); if ($pre>50 && $post>50) { $pre = $post = 50; } else if ($pre>50) { $pre = min($pre,100-$post); } else if ($post>50) { $post = min($post, 100-$pre); } else { // both are less than 50, means the context is the whole string // make it so and break out of this loop - there is no need for the // complex snippet calculations $snippets = array($text); break; } // establish context start and end points, try to append to previous // context if possible $start = $utf8_idx - $pre; $append = ($start < $end) ? $end : false; // still the end of the previous context snippet $end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context if ($append) { $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append); } else { $snippets[] = utf8_substr($text,$start,$end-$start); } // set $offset for next match attempt // substract strlen to avoid splitting a potential search success, // this is an approximation as the search pattern may match strings // of varying length and it will fail if the context snippet // boundary breaks a matching string longer than the current match $utf8_offset = $utf8_idx + $post; $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post)); $offset = utf8_correctIdx($text,$offset); } $m = "\1"; $snippets = preg_replace('/'.$re1.'/iu',$m.'$1'.$m,$snippets); $snippet = preg_replace('/'.$m.'([^'.$m.']*?)'.$m.'/iu','$1',hsc(join('... ',$snippets))); // <--- Copied from ft_snippet() - No code cleanups $event->data['snippet'] = $snippet; } /** * Revised version of the ft_snippet_re_preprocess() */ function revised_ft_snippet_re_preprocess($term) { if (preg_match('/' . IDX_ASIAN . '/u', $term)) { return $term; } else { return ft_snippet_re_preprocess($term); } } /** * Reactivates missing asian term highlightings */ function reactivateAsianTermHighlight(&$event, $param) { if ($event->data === 'show') { $event->preventDefault(); $this->revised_html_show(); } } /** * Revised version of the html_show() */ function revised_html_show($txt = null) { // ---> Copied from html_show() - No code cleanups global $ID; global $REV; global $HIGH; global $INFO; //disable section editing for old revisions or in preview if($txt || $REV){ $secedit = false; }else{ $secedit = true; } if (!is_null($txt)){ //PreviewHeader echo '
'; echo p_locale_xhtml('preview'); echo '
'; $html = html_secedit(p_render('xhtml',p_get_instructions($txt),$info),$secedit); if($INFO['prependTOC']) $html = tpl_toc(true).$html; echo $html; echo '
'; echo '
'; }else{ if ($REV) print p_locale_xhtml('showrev'); $html = p_wiki_xhtml($ID,$REV,true); $html = html_secedit($html,$secedit); if($INFO['prependTOC']) $html = tpl_toc(true).$html; $html = $this->revised_html_hilight($html,$HIGH); // <= REPLACED echo $html; } // <--- Copied from html_show() - No code cleanups } /** * Revised version of the html_hilight() */ function revised_html_hilight($html, $phrases) { // ---> Copied from html_hilight() - No code cleanups $phrases = array_filter((array) $phrases); $regex = join('|',array_map( array($this, 'revised_ft_snippet_re_preprocess'), // <= REPLACED array_map('preg_quote_cb',$phrases) )); if ($regex === '') return $html; $html = preg_replace_callback("/((<[^>]*)|$regex)/ui",'html_hilight_callback',$html); return $html; // <--- Copied from html_hilight() - No code cleanups } }