1<?php 2/** 3 * AsianSearch Plugin for DokuWiki / action.php 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Kazutaka Miyasaka <kazmiya@gmail.com> 7 */ 8 9// must be run within DokuWiki 10if (!defined('DOKU_INC')) { 11 die(); 12} 13 14if (!defined('DOKU_PLUGIN')) { 15 define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/'); 16} 17 18require_once DOKU_PLUGIN . 'action.php'; 19 20class action_plugin_asiansearch extends DokuWiki_Action_Plugin 21{ 22 /** 23 * Returns some info 24 */ 25 function getInfo() 26 { 27 return confToHash(DOKU_PLUGIN . 'asiansearch/plugin.info.txt'); 28 } 29 30 /** 31 * Registers event handlers 32 */ 33 function register(&$controller) 34 { 35 if (!function_exists('datetime_h')) { 36 // DokuWiki 2009-02-14 or earlier 37 $controller->register_hook( 38 'SEARCH_QUERY_FULLPAGE', 'BEFORE', 39 $this, 'handleQuery' 40 ); 41 } elseif (!function_exists('valid_input_set')) { 42 // DokuWiki 2009-12-25 "Lemming" (do nothing) 43 } elseif (!function_exists('act_sitemap')) { 44 // DokuWiki 2010-11-07 "Anteater" 45 $controller->register_hook( 46 'FULLTEXT_SNIPPET_CREATE', 'BEFORE', 47 $this, 'reactivateAsianSearchSnippet' 48 ); 49 50 $controller->register_hook( 51 'TPL_ACT_RENDER', 'BEFORE', 52 $this, 'reactivateAsianTermHighlight' 53 ); 54 } else { 55 // DokuWiki Rincewind (do nothing) 56 } 57 } 58 59 /** 60 * Handles a search query 61 */ 62 function handleQuery(&$event, $param) 63 { 64 $data =& $event->data; 65 66 // manipulate a query 67 $terms = preg_split( 68 '/(".*?")/u', $data['query'], -1, PREG_SPLIT_DELIM_CAPTURE 69 ); 70 71 $data['query'] = implode( 72 '', 73 array_map(array($this, 'manipulateTerm'), $terms) 74 ); 75 } 76 77 /** 78 * Manipulates a search term 79 */ 80 function manipulateTerm($str = '') 81 { 82 // do nothing with a "pharse" 83 if (!preg_match('/^".*"$/u', $str)) { 84 // fix incomplete phrase 85 $str = str_replace('"', ' ', $str); 86 87 // treat ideographic spaces (U+3000) as search term separators 88 $str = preg_replace('/\x{3000}/u', ' ', $str); 89 90 // make phrases for asian characters 91 $str = implode( 92 ' ', 93 array_map(array($this, 'makePhrase'), explode(' ', $str)) 94 ); 95 } 96 97 return $str; 98 } 99 100 /** 101 * Makes a "phrase" for each successive asian character 102 */ 103 function makePhrase($str = '') 104 { 105 // skip if $str has a search modifier 106 if (!preg_match('/^[\-\@\^]/u', $str)) { 107 $str = preg_replace('/(' . IDX_ASIAN . '+)/u', ' "$1" ', $str); 108 $str = trim($str); 109 } 110 111 return $str; 112 } 113 114 /** 115 * Reactivates missing asian search snippets 116 */ 117 function reactivateAsianSearchSnippet(&$event, $param) 118 { 119 $event->preventDefault(); 120 $this->revised_ft_snippet($event); 121 } 122 123 /** 124 * Revised version of the ft_snippet() 125 * (ft_snippet_re_preprocess is replaced) 126 */ 127 function revised_ft_snippet(&$event) 128 { 129 $id = $event->data['id']; 130 $text = $event->data['text']; 131 $highlight = $event->data['highlight']; 132 133 // ---> Copied from ft_snippet() - No code cleanups 134 135 $match = array(); 136 $snippets = array(); 137 $utf8_offset = $offset = $end = 0; 138 $len = utf8_strlen($text); 139 140 // build a regexp from the phrases to highlight 141 $re1 = '('.join('|',array_map( 142 array($this, 'revised_ft_snippet_re_preprocess'), // <= REPLACED 143 array_map('preg_quote_cb',array_filter((array) $highlight)) 144 )).')'; 145 $re2 = "$re1.{0,75}(?!\\1)$re1"; 146 $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1"; 147 148 for ($cnt=4; $cnt--;) { 149 if (0) { 150 } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { 151 } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { 152 } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) { 153 } else { 154 break; 155 } 156 157 list($str,$idx) = $match[0]; 158 159 // convert $idx (a byte offset) into a utf8 character offset 160 $utf8_idx = utf8_strlen(substr($text,0,$idx)); 161 $utf8_len = utf8_strlen($str); 162 163 // establish context, 100 bytes surrounding the match string 164 // first look to see if we can go 100 either side, 165 // then drop to 50 adding any excess if the other side can't go to 50, 166 $pre = min($utf8_idx-$utf8_offset,100); 167 $post = min($len-$utf8_idx-$utf8_len,100); 168 169 if ($pre>50 && $post>50) { 170 $pre = $post = 50; 171 } else if ($pre>50) { 172 $pre = min($pre,100-$post); 173 } else if ($post>50) { 174 $post = min($post, 100-$pre); 175 } else { 176 // both are less than 50, means the context is the whole string 177 // make it so and break out of this loop - there is no need for the 178 // complex snippet calculations 179 $snippets = array($text); 180 break; 181 } 182 183 // establish context start and end points, try to append to previous 184 // context if possible 185 $start = $utf8_idx - $pre; 186 $append = ($start < $end) ? $end : false; // still the end of the previous context snippet 187 $end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context 188 189 if ($append) { 190 $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append); 191 } else { 192 $snippets[] = utf8_substr($text,$start,$end-$start); 193 } 194 195 // set $offset for next match attempt 196 // substract strlen to avoid splitting a potential search success, 197 // this is an approximation as the search pattern may match strings 198 // of varying length and it will fail if the context snippet 199 // boundary breaks a matching string longer than the current match 200 $utf8_offset = $utf8_idx + $post; 201 $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post)); 202 $offset = utf8_correctIdx($text,$offset); 203 } 204 205 $m = "\1"; 206 $snippets = preg_replace('/'.$re1.'/iu',$m.'$1'.$m,$snippets); 207 $snippet = preg_replace('/'.$m.'([^'.$m.']*?)'.$m.'/iu','<strong class="search_hit">$1</strong>',hsc(join('... ',$snippets))); 208 209 // <--- Copied from ft_snippet() - No code cleanups 210 211 $event->data['snippet'] = $snippet; 212 } 213 214 /** 215 * Revised version of the ft_snippet_re_preprocess() 216 */ 217 function revised_ft_snippet_re_preprocess($term) 218 { 219 if (preg_match('/' . IDX_ASIAN . '/u', $term)) { 220 return $term; 221 } else { 222 return ft_snippet_re_preprocess($term); 223 } 224 } 225 226 /** 227 * Reactivates missing asian term highlightings 228 */ 229 function reactivateAsianTermHighlight(&$event, $param) 230 { 231 if ($event->data === 'show') { 232 $event->preventDefault(); 233 $this->revised_html_show(); 234 } 235 } 236 237 /** 238 * Revised version of the html_show() 239 */ 240 function revised_html_show($txt = null) 241 { 242 // ---> Copied from html_show() - No code cleanups 243 244 global $ID; 245 global $REV; 246 global $HIGH; 247 global $INFO; 248 //disable section editing for old revisions or in preview 249 if($txt || $REV){ 250 $secedit = false; 251 }else{ 252 $secedit = true; 253 } 254 255 if (!is_null($txt)){ 256 //PreviewHeader 257 echo '<br id="scroll__here" />'; 258 echo p_locale_xhtml('preview'); 259 echo '<div class="preview">'; 260 $html = html_secedit(p_render('xhtml',p_get_instructions($txt),$info),$secedit); 261 if($INFO['prependTOC']) $html = tpl_toc(true).$html; 262 echo $html; 263 echo '<div class="clearer"></div>'; 264 echo '</div>'; 265 266 }else{ 267 if ($REV) print p_locale_xhtml('showrev'); 268 $html = p_wiki_xhtml($ID,$REV,true); 269 $html = html_secedit($html,$secedit); 270 if($INFO['prependTOC']) $html = tpl_toc(true).$html; 271 $html = $this->revised_html_hilight($html,$HIGH); // <= REPLACED 272 echo $html; 273 } 274 275 // <--- Copied from html_show() - No code cleanups 276 } 277 278 /** 279 * Revised version of the html_hilight() 280 */ 281 function revised_html_hilight($html, $phrases) 282 { 283 // ---> Copied from html_hilight() - No code cleanups 284 285 $phrases = array_filter((array) $phrases); 286 $regex = join('|',array_map( 287 array($this, 'revised_ft_snippet_re_preprocess'), // <= REPLACED 288 array_map('preg_quote_cb',$phrases) 289 )); 290 291 if ($regex === '') return $html; 292 $html = preg_replace_callback("/((<[^>]*)|$regex)/ui",'html_hilight_callback',$html); 293 return $html; 294 295 // <--- Copied from html_hilight() - No code cleanups 296 } 297} 298