1<?php
2/**
3 * AsianSearch Plugin for DokuWiki / action.php
4 *
5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author  Kazutaka Miyasaka <kazmiya@gmail.com>
7 */
8
9// must be run within DokuWiki
10if (!defined('DOKU_INC')) {
11    die();
12}
13
14if (!defined('DOKU_PLUGIN')) {
15    define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
16}
17
18require_once DOKU_PLUGIN . 'action.php';
19
20class action_plugin_asiansearch extends DokuWiki_Action_Plugin
21{
22    /**
23     * Returns some info
24     */
25    function getInfo()
26    {
27        return confToHash(DOKU_PLUGIN . 'asiansearch/plugin.info.txt');
28    }
29
30    /**
31     * Registers event handlers
32     */
33    function register(&$controller)
34    {
35        if (!function_exists('datetime_h')) {
36            // DokuWiki 2009-02-14 or earlier
37            $controller->register_hook(
38                'SEARCH_QUERY_FULLPAGE', 'BEFORE',
39                $this, 'handleQuery'
40            );
41        } elseif (!function_exists('valid_input_set')) {
42            // DokuWiki 2009-12-25 "Lemming" (do nothing)
43        } elseif (!function_exists('act_sitemap')) {
44            // DokuWiki 2010-11-07 "Anteater"
45            $controller->register_hook(
46                'FULLTEXT_SNIPPET_CREATE', 'BEFORE',
47                $this, 'reactivateAsianSearchSnippet'
48            );
49
50            $controller->register_hook(
51                'TPL_ACT_RENDER', 'BEFORE',
52                $this, 'reactivateAsianTermHighlight'
53            );
54        } else {
55            // DokuWiki Rincewind (do nothing)
56        }
57    }
58
59    /**
60     * Handles a search query
61     */
62    function handleQuery(&$event, $param)
63    {
64        $data =& $event->data;
65
66        // manipulate a query
67        $terms = preg_split(
68            '/(".*?")/u', $data['query'], -1, PREG_SPLIT_DELIM_CAPTURE
69        );
70
71        $data['query'] = implode(
72            '',
73            array_map(array($this, 'manipulateTerm'), $terms)
74        );
75    }
76
77    /**
78     * Manipulates a search term
79     */
80    function manipulateTerm($str = '')
81    {
82        // do nothing with a "pharse"
83        if (!preg_match('/^".*"$/u', $str)) {
84            // fix incomplete phrase
85            $str = str_replace('"', ' ', $str);
86
87            // treat ideographic spaces (U+3000) as search term separators
88            $str = preg_replace('/\x{3000}/u', ' ',  $str);
89
90            // make phrases for asian characters
91            $str = implode(
92                ' ',
93                array_map(array($this, 'makePhrase'), explode(' ', $str))
94            );
95        }
96
97        return $str;
98    }
99
100    /**
101     * Makes a "phrase" for each successive asian character
102     */
103    function makePhrase($str = '')
104    {
105        // skip if $str has a search modifier
106        if (!preg_match('/^[\-\@\^]/u', $str)) {
107            $str = preg_replace('/(' . IDX_ASIAN . '+)/u', ' "$1" ', $str);
108            $str = trim($str);
109        }
110
111        return $str;
112    }
113
114    /**
115     * Reactivates missing asian search snippets
116     */
117    function reactivateAsianSearchSnippet(&$event, $param)
118    {
119        $event->preventDefault();
120        $this->revised_ft_snippet($event);
121    }
122
123    /**
124     * Revised version of the ft_snippet()
125     * (ft_snippet_re_preprocess is replaced)
126     */
127    function revised_ft_snippet(&$event)
128    {
129        $id = $event->data['id'];
130        $text = $event->data['text'];
131        $highlight = $event->data['highlight'];
132
133        // ---> Copied from ft_snippet() - No code cleanups
134
135        $match = array();
136        $snippets = array();
137        $utf8_offset = $offset = $end = 0;
138        $len = utf8_strlen($text);
139
140        // build a regexp from the phrases to highlight
141        $re1 = '('.join('|',array_map(
142            array($this, 'revised_ft_snippet_re_preprocess'), // <= REPLACED
143            array_map('preg_quote_cb',array_filter((array) $highlight))
144        )).')';
145        $re2 = "$re1.{0,75}(?!\\1)$re1";
146        $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
147
148        for ($cnt=4; $cnt--;) {
149            if (0) {
150            } else if (preg_match('/'.$re3.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
151            } else if (preg_match('/'.$re2.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
152            } else if (preg_match('/'.$re1.'/iu',$text,$match,PREG_OFFSET_CAPTURE,$offset)) {
153            } else {
154                break;
155            }
156
157            list($str,$idx) = $match[0];
158
159            // convert $idx (a byte offset) into a utf8 character offset
160            $utf8_idx = utf8_strlen(substr($text,0,$idx));
161            $utf8_len = utf8_strlen($str);
162
163            // establish context, 100 bytes surrounding the match string
164            // first look to see if we can go 100 either side,
165            // then drop to 50 adding any excess if the other side can't go to 50,
166            $pre = min($utf8_idx-$utf8_offset,100);
167            $post = min($len-$utf8_idx-$utf8_len,100);
168
169            if ($pre>50 && $post>50) {
170                $pre = $post = 50;
171            } else if ($pre>50) {
172                $pre = min($pre,100-$post);
173            } else if ($post>50) {
174                $post = min($post, 100-$pre);
175            } else {
176                // both are less than 50, means the context is the whole string
177                // make it so and break out of this loop - there is no need for the
178                // complex snippet calculations
179                $snippets = array($text);
180                break;
181            }
182
183            // establish context start and end points, try to append to previous
184            // context if possible
185            $start = $utf8_idx - $pre;
186            $append = ($start < $end) ? $end : false;  // still the end of the previous context snippet
187            $end = $utf8_idx + $utf8_len + $post;      // now set it to the end of this context
188
189            if ($append) {
190                $snippets[count($snippets)-1] .= utf8_substr($text,$append,$end-$append);
191            } else {
192                $snippets[] = utf8_substr($text,$start,$end-$start);
193            }
194
195            // set $offset for next match attempt
196            //   substract strlen to avoid splitting a potential search success,
197            //   this is an approximation as the search pattern may match strings
198            //   of varying length and it will fail if the context snippet
199            //   boundary breaks a matching string longer than the current match
200            $utf8_offset = $utf8_idx + $post;
201            $offset = $idx + strlen(utf8_substr($text,$utf8_idx,$post));
202            $offset = utf8_correctIdx($text,$offset);
203        }
204
205        $m = "\1";
206        $snippets = preg_replace('/'.$re1.'/iu',$m.'$1'.$m,$snippets);
207        $snippet = preg_replace('/'.$m.'([^'.$m.']*?)'.$m.'/iu','<strong class="search_hit">$1</strong>',hsc(join('... ',$snippets)));
208
209        // <--- Copied from ft_snippet() - No code cleanups
210
211        $event->data['snippet'] = $snippet;
212    }
213
214    /**
215     * Revised version of the ft_snippet_re_preprocess()
216     */
217    function revised_ft_snippet_re_preprocess($term)
218    {
219        if (preg_match('/' . IDX_ASIAN . '/u', $term)) {
220            return $term;
221        } else {
222            return ft_snippet_re_preprocess($term);
223        }
224    }
225
226    /**
227     * Reactivates missing asian term highlightings
228     */
229    function reactivateAsianTermHighlight(&$event, $param)
230    {
231        if ($event->data === 'show') {
232            $event->preventDefault();
233            $this->revised_html_show();
234        }
235    }
236
237    /**
238     * Revised version of the html_show()
239     */
240    function revised_html_show($txt = null)
241    {
242        // ---> Copied from html_show() - No code cleanups
243
244        global $ID;
245        global $REV;
246        global $HIGH;
247        global $INFO;
248        //disable section editing for old revisions or in preview
249        if($txt || $REV){
250            $secedit = false;
251        }else{
252            $secedit = true;
253        }
254
255        if (!is_null($txt)){
256            //PreviewHeader
257            echo '<br id="scroll__here" />';
258            echo p_locale_xhtml('preview');
259            echo '<div class="preview">';
260            $html = html_secedit(p_render('xhtml',p_get_instructions($txt),$info),$secedit);
261            if($INFO['prependTOC']) $html = tpl_toc(true).$html;
262            echo $html;
263            echo '<div class="clearer"></div>';
264            echo '</div>';
265
266        }else{
267            if ($REV) print p_locale_xhtml('showrev');
268            $html = p_wiki_xhtml($ID,$REV,true);
269            $html = html_secedit($html,$secedit);
270            if($INFO['prependTOC']) $html = tpl_toc(true).$html;
271            $html = $this->revised_html_hilight($html,$HIGH); // <= REPLACED
272            echo $html;
273        }
274
275        // <--- Copied from html_show() - No code cleanups
276    }
277
278    /**
279     * Revised version of the html_hilight()
280     */
281    function revised_html_hilight($html, $phrases)
282    {
283        // ---> Copied from html_hilight() - No code cleanups
284
285        $phrases = array_filter((array) $phrases);
286        $regex = join('|',array_map(
287            array($this, 'revised_ft_snippet_re_preprocess'), // <= REPLACED
288            array_map('preg_quote_cb',$phrases)
289        ));
290
291        if ($regex === '') return $html;
292        $html = preg_replace_callback("/((<[^>]*)|$regex)/ui",'html_hilight_callback',$html);
293        return $html;
294
295        // <--- Copied from html_hilight() - No code cleanups
296    }
297}
298