xref: /plugin/sphinxsearch-was/functions.php (revision 130:07256766c697)
1<?php
2/*
3 * To change this template, choose Tools | Templates
4 * and open the template in the editor.
5 */
6
7function formatXml($data)
8{
9    $xmlFormat = '
10<sphinx:document id="{id}">
11<title><![CDATA[[{title}]]></title>
12<body><![CDATA[[{body}]]></body>
13<namespace><![CDATA[[{namespace}]]></namespace>
14<pagename><![CDATA[[{pagename}]]></pagename>
15<level>{level}</level>
16<modified>{modified}</modified>
17</sphinx:document>
18
19';
20
21    return str_replace( array('{id}', '{title}', '{body}', '{namespace}', '{pagename}', '{level}', '{modified}'),
22                        array($data['id'], escapeTextValue($data['title_to_index']),
23                            escapeTextValue($data['body']),
24                            escapeTextValue($data['namespace']),
25                            escapeTextValue($data['pagename']),
26                             $data['level'], $data['modified']),
27                $xmlFormat
28            );
29}
30
31function escapeTextValue($value)
32{
33    if ("" === $value)
34    {
35        return "";
36    }
37    //$value = mb_convert_encoding($value,'UTF-8','ISO-8859-1');
38    $value = strip_tags($value);
39    $value = stripInvalidXml($value);
40    return str_replace("]]>", "]]><![CDATA[]]]]><![CDATA[>]]><![CDATA[", $value);
41 }
42
43function stripInvalidXml($value)
44{
45    $ret = "";
46    if (empty($value))
47    {
48      return $ret;
49    }
50
51    $current = null;
52    $length = strlen($value);
53    for ($i=0; $i < $length; $i++)
54    {
55      $current = ord($value{$i});
56      if (($current == 0x9) ||
57          ($current == 0xA) ||
58          ($current == 0xD) ||
59          (($current >= 0x20) && ($current <= 0xD7FF)) ||
60          (($current >= 0xE000) && ($current <= 0xFFFD)) ||
61          (($current >= 0x10000) && ($current <= 0x10FFFF)))
62      {
63        $ret .= chr($current);
64      }
65      else
66      {
67        $ret .= " ";
68      }
69    }
70    return $ret;
71  }
72
73function getDocumentsByHeadings($id, $metadata)
74{
75    if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false;
76
77    $sections = array();
78    $level = 1;
79    $previouse_title = '';
80    $firstSection = true;
81    foreach($metadata['description']['tableofcontents'] as $row){
82        if ($firstSection){
83            $zerocontent  = getZeroSectionContent($id, $row['title']);
84            if ($zerocontent){
85                $sections[$id] = array(
86                    'section' => $zerocontent,
87                    'level' => 0,
88                    'title' => $id,
89                    'title_to_index' => $id
90                );
91            }
92            $firstSection = false;
93        }
94        $sections[$row['hid']] = array(
95                                    'section' => getSectionByTitleLevel($id, $row['title'], false),
96                                    'level' => $row['level'],
97                                    'title' => $row['title']
98                                    );
99        if ($row['level'] > $level && !empty($previouse_title)){
100            $sections[$row['hid']]['title_text'] = $previouse_title . " &raquo; ".$row['title'];
101        } else {
102            $sections[$row['hid']]['title_text'] = $row['title'];
103            $previouse_title = $row['title'];
104        }
105        $sections[$row['hid']]['title_to_index'] = $row['title'];
106    }
107    return $sections;
108}
109
110function getZeroSectionContent($id, $header)
111{
112    $headerReg = preg_quote($header, '/');
113    $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
114    $doc = io_readFile(wikiFN($id));
115    $matches = array();
116    if (!preg_match("/$regex/i",$doc,$matches)) {
117        return false;
118    }
119    if (empty($matches[1])){
120        return false;
121    }
122    $end = strpos($doc, $matches[1]);
123    if (!$end){
124        return false;
125    }
126    $zerocontent = substr($doc, 0, $end);
127    return $zerocontent;
128}
129
130function getSectionByTitleLevel($id, $header, $extended=false)
131{
132    $headerReg = preg_quote($header, '/');
133    $doc = io_readFile(wikiFN($id));
134    $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
135    $section = '';
136    if (preg_match("/$regex/i",$doc,$matches)) {
137        $startHeader = $matches[0];
138        $startHeaderPos = strpos($doc, $startHeader) + strlen($startHeader);
139        $endDoc = substr($doc, $startHeaderPos);
140
141        $regex = '(={4,6})(.*?)(={4,6})';
142        if (preg_match("/$regex/i",$endDoc,$matches)) {
143            $endHeader = $matches[0];
144            $endHeaderPos = strpos($doc, $endHeader);
145        } else {
146            $endHeaderPos = 0;
147        }
148        if ($endHeaderPos){
149            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
150        } else {
151            $section = substr($doc, $startHeaderPos);
152        }
153    }
154    $section = trim($section);
155    //trying to get next section content if body for first section is empty
156    //working only for extended mode
157    if ($extended && empty($section)){
158        $startHeaderPos = $endHeaderPos + strlen($endHeader);
159        $endDoc = substr($endDoc, $startHeaderPos);
160        $regex = '(={4,6})(.*?)(={4,6})';
161        if (preg_match("/$regex/i",$endDoc,$matches)) {
162            $endHeader = $matches[0];
163            $endHeaderPos = strpos($doc, $endHeader);
164        } else {
165            $endHeaderPos = 0;
166        }
167        if ($endHeaderPos){
168            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
169        } else {
170            $section = substr($doc, $startHeaderPos);
171        }
172    }
173    $section = trim($section);
174    return $section;
175}
176
177function getSection($id, $header)
178{
179    static $cacheInstructions = null;
180    static $cacheDoc = null;
181
182    if (empty($cacheDoc[$id])){
183        // Create the parser
184        $Parser = & new Doku_Parser();
185
186        // Add the Handler
187        $Parser->Handler = & new Doku_Handler();
188
189        // Load the header mode to find headers
190        $Parser->addMode('header',new Doku_Parser_Mode_Header());
191        $Parser->addMode('listblock',new Doku_Parser_Mode_ListBlock());
192
193        // Loads the raw wiki document
194        $doc = io_readFile(wikiFN($id));
195
196        // Get a list of instructions
197        $instructions = $Parser->parse($doc);
198
199        unset($Parser->Handler);
200        unset($Parser);
201
202        //free old cache
203        $cacheInstructions = null;
204        $cacheDoc = null;
205
206        //initialize new cache
207        $cacheInstructions[$id] = $instructions;
208        $cacheDoc[$id] = $doc;
209    } else {
210        $instructions = $cacheInstructions[$id];
211        $doc = $cacheDoc[$id];
212    }
213
214
215
216    // Use this to watch when we're inside the section we want
217    $inSection = FALSE;
218    $startPos = 0;
219    $endPos = 0;
220
221    // Loop through the instructions
222    foreach ( $instructions as $instruction ) {
223
224        if ( !$inSection ) {
225
226            // Look for the header for the "Lists" heading
227            if ( $instruction[0] == 'header' &&
228                    trim($instruction[1][0]) == $header ) {
229
230                $startPos = $instruction[2];
231                $inSection = TRUE;
232            }
233        } else {
234
235            // Look for the end of the section
236            if ( $instruction[0] == 'section_close' ) {
237                $endPos = $instruction[2];
238                break;
239            }
240        }
241    }
242
243    // Normalize and pad the document in the same way the parse does
244    // so that byte indexes with match
245    $doc = "\n".str_replace("\r\n","\n",$doc)."\n";
246    $section = substr($doc, $startPos, ($endPos-$startPos));
247
248    return $section;
249}
250
251function getCategories($id)
252{
253    if (empty($id)) return '';
254
255    if (false === strpos($id, ":")){
256        return '';
257    }
258
259    $ns = explode(":", $id);
260    $nsCount = count($ns) - 1;
261
262    $result = '';
263    do{
264        for($i = 0; $i < $nsCount; $i++){
265            $name = $ns[$i];
266            $result .= $name;
267            if ($i < $nsCount - 1){
268                 $result .= ':';
269            }
270        }
271        $result .= ' ';
272    }while($nsCount--);
273    return $result;
274}
275
276function getPagename($id)
277{
278    if (empty($id)) return '';
279
280    if (false === strpos($id, ":")){
281        return $id;
282    }
283
284    $ns = explode(":", $id);
285    return $ns[count($ns) - 1];
286}
287
288
289
290 /**
291  * Method return all wiki page names
292  * @global array $conf
293  * @return array
294  */
295 function getPagesList()
296 {
297    global $conf;
298
299    $data = array();
300    sort($data);
301    search($data,$conf['datadir'],'search_allpages',array('skipacl'=>1),'');
302
303    return $data;
304}
305
306function getNsLinks($id, $keywords, $search)
307{
308    global $conf;
309    $parts = explode(':', $id);
310    $count = count($parts);
311
312    // print intermediate namespace links
313    $part = '';
314    $data = array();
315    $titles = array();
316    for($i=0; $i<$count; $i++){
317        $part .= $parts[$i].':';
318        $page = $part;
319        resolve_pageid('',$page,$exists);
320
321        if (preg_match("#:start$#", $page) && !preg_match("#:start:$#", $part)) {
322            $page = substr($page, 0, strpos($page, ":start"));
323        };
324
325        // output
326        if ($exists){
327            $titles[wl($page)] = $parts[$i];
328        } else {
329            $titles[wl($page)] = $parts[$i];
330        }
331        $data[] = array('link' => "?do=search&id={$keywords}".urlencode(" @ns $page"));
332    }
333    $titleExcerpt = $search->getExcerpt($titles, $search->starQuery($keywords));
334    $i = 0;
335    foreach ($data as $key => $notused){
336        $data[$key]['title'] = $titleExcerpt[$i++];
337    }
338    return $data;
339}
340
341function printNamespaces($query)
342{
343  $data = array();
344  $query = str_replace(" ", "_", $query);
345  $data = ft_pageLookup($query, false);
346
347  if(!count($data)) return false;
348
349  print '<h3>Matching pagenames</h3>';
350  print '<ul>';
351  $counter = 0;
352  foreach($data as $id){
353    print '<li>';
354    $ns = getNS($id);
355    if($ns){
356      $name = shorten(noNS($id), ' ('.$ns.')',30);
357    }else{
358      $name = $id;
359    }
360    $href = wl($id);
361
362    tpl_link($href,$id, "class='wikilink1'");
363    print '</li>';
364    if(++$counter == 20){
365        break;
366    }
367  }
368  print '</ul>';
369}
370
371function printNamespacesNew($pageNames)
372{
373    if(empty($pageNames)) return false;
374
375    $limit = 10;
376    print '<h3>Matching pagenames</h3>';
377    print '<ul>';
378    $counter = 0;
379    foreach($pageNames as $id => $header){
380        $ns = getNS($id);
381        if($ns){
382          $name = shorten(noNS($id), ' ('.$ns.')',30);
383        }else{
384          $name = $id;
385        }
386        print '<li>';
387        /*if (!empty($header)){
388            print '<a href="'.wl($id).'#'.$header.'" '. "class='wikilink1'>".$id."</a>".'#'.$header;
389        } else {
390            print '<a href="'.wl($id).'" '. "class='wikilink1'>".$id."</a>";
391        }*/
392        print '<a href="'.wl($id).'" '. "class='wikilink1'>".$id."</a>";
393        print '</li>';
394        if (++$counter == $limit){
395            break;
396        }
397    }
398    print '</ul>';
399}
400
401if(!function_exists('shorten')){
402    /**
403     * Shorten a given string by removing data from the middle
404     *
405     * You can give the string in two parts, teh first part $keep
406     * will never be shortened. The second part $short will be cut
407     * in the middle to shorten but only if at least $min chars are
408     * left to display it. Otherwise it will be left off.
409     *
410     * @param string $keep   the part to keep
411     * @param string $short  the part to shorten
412     * @param int    $max    maximum chars you want for the whole string
413     * @param int    $min    minimum number of chars to have left for middle shortening
414     * @param string $char   the shortening character to use
415     */
416    function shorten($keep,$short,$max,$min=9,$char='⌇'){
417        $max = $max - utf8_strlen($keep);
418       if($max < $min) return $keep;
419        $len = utf8_strlen($short);
420        if($len <= $max) return $keep.$short;
421        $half = floor($max/2);
422        return $keep.utf8_substr($short,0,$half-1).$char.utf8_substr($short,$len-$half);
423    }
424}
425