xref: /plugin/sphinxsearch-was/functions.php (revision 96:57dbaaa4f4e1)
1<?php
2/*
3 * To change this template, choose Tools | Templates
4 * and open the template in the editor.
5 */
6
7function formatXml($data)
8{
9    $xmlFormat = '
10<sphinx:document id="{id}">
11<title><![CDATA[[{title}]]></title>
12<body><![CDATA[[{body}]]></body>
13<namespace><![CDATA[[{namespace}]]></namespace>
14<pagename><![CDATA[[{pagename}]]></pagename>
15<level>{level}</level>
16<modified>{modified}</modified>
17</sphinx:document>
18
19';
20
21    return str_replace( array('{id}', '{title}', '{body}', '{namespace}', '{pagename}', '{level}', '{modified}'),
22                        array($data['id'], escapeTextValue($data['title_to_index']),
23                            escapeTextValue($data['body']),
24                            escapeTextValue($data['namespace']),
25                            escapeTextValue($data['pagename']),
26                             $data['level'], $data['modified']),
27                $xmlFormat
28            );
29}
30
31function escapeTextValue($value)
32{
33    if ("" === $value)
34    {
35        return "";
36    }
37    //$value = mb_convert_encoding($value,'UTF-8','ISO-8859-1');
38    $value = strip_tags($value);
39    $value = stripInvalidXml($value);
40    return str_replace("]]>", "]]><![CDATA[]]]]><![CDATA[>]]><![CDATA[", $value);
41 }
42
43function stripInvalidXml($value)
44{
45    $ret = "";
46    if (empty($value))
47    {
48      return $ret;
49    }
50
51    $current = null;
52    $length = strlen($value);
53    for ($i=0; $i < $length; $i++)
54    {
55      $current = ord($value{$i});
56      if (($current == 0x9) ||
57          ($current == 0xA) ||
58          ($current == 0xD) ||
59          (($current >= 0x20) && ($current <= 0xD7FF)) ||
60          (($current >= 0xE000) && ($current <= 0xFFFD)) ||
61          (($current >= 0x10000) && ($current <= 0x10FFFF)))
62      {
63        $ret .= chr($current);
64      }
65      else
66      {
67        $ret .= " ";
68      }
69    }
70    return $ret;
71  }
72
73function getDocumentsByHeadings($id, $metadata)
74{
75    if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false;
76
77    $sections = array();
78    $level = 1;
79    $previouse_title = '';
80    foreach($metadata['description']['tableofcontents'] as $row){
81        $sections[$row['hid']] = array(
82                                    'section' => getSectionByTitleLevel($id, $row['title']),
83                                    'level' => $row['level'],
84                                    'title' => $row['title']
85                                    );
86        if ($row['level'] > $level && !empty($previouse_title)){
87            $sections[$row['hid']]['title_text'] = $previouse_title . " &raquo; ".$row['title'];
88        } else {
89            $sections[$row['hid']]['title_text'] = $row['title'];
90            $previouse_title = $row['title'];
91        }
92        $sections[$row['hid']]['title_to_index'] = $row['title'];
93    }
94    return $sections;
95}
96
97function getSectionByTitleLevel($id, $header, $extended=false)
98{
99    $headerReg = preg_quote($header, '/');
100    $doc = io_readFile(wikiFN($id));
101    $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
102    $section = '';
103    if (preg_match("/$regex/i",$doc,$matches)) {
104        $startHeader = $matches[0];
105        $startHeaderPos = strpos($doc, $startHeader) + strlen($startHeader);
106        $endDoc = substr($doc, $startHeaderPos);
107
108        $regex = '(={4,6})(.*?)(={4,6})';
109        if (preg_match("/$regex/i",$endDoc,$matches)) {
110            $endHeader = $matches[0];
111            $endHeaderPos = strpos($doc, $endHeader);
112        } else {
113            $endHeaderPos = 0;
114        }
115        if ($endHeaderPos){
116            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
117        } else {
118            $section = substr($doc, $startHeaderPos);
119        }
120    }
121    $section = trim($section);
122    //trying to get next section content if body for first section is empty
123    //working only for extended mode
124    if ($extended && empty($section)){
125        $startHeaderPos = $endHeaderPos + strlen($endHeader);
126        $endDoc = substr($endDoc, $startHeaderPos);
127        $regex = '(={4,6})(.*?)(={4,6})';
128        if (preg_match("/$regex/i",$endDoc,$matches)) {
129            $endHeader = $matches[0];
130            $endHeaderPos = strpos($doc, $endHeader);
131        } else {
132            $endHeaderPos = 0;
133        }
134        if ($endHeaderPos){
135            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
136        } else {
137            $section = substr($doc, $startHeaderPos);
138        }
139    }
140    $section = trim($section);
141    return $section;
142}
143
144function getSection($id, $header)
145{
146    static $cacheInstructions = null;
147    static $cacheDoc = null;
148
149    if (empty($cacheDoc[$id])){
150        // Create the parser
151        $Parser = & new Doku_Parser();
152
153        // Add the Handler
154        $Parser->Handler = & new Doku_Handler();
155
156        // Load the header mode to find headers
157        $Parser->addMode('header',new Doku_Parser_Mode_Header());
158        $Parser->addMode('listblock',new Doku_Parser_Mode_ListBlock());
159
160        // Loads the raw wiki document
161        $doc = io_readFile(wikiFN($id));
162
163        // Get a list of instructions
164        $instructions = $Parser->parse($doc);
165
166        unset($Parser->Handler);
167        unset($Parser);
168
169        //free old cache
170        $cacheInstructions = null;
171        $cacheDoc = null;
172
173        //initialize new cache
174        $cacheInstructions[$id] = $instructions;
175        $cacheDoc[$id] = $doc;
176    } else {
177        $instructions = $cacheInstructions[$id];
178        $doc = $cacheDoc[$id];
179    }
180
181
182
183    // Use this to watch when we're inside the section we want
184    $inSection = FALSE;
185    $startPos = 0;
186    $endPos = 0;
187
188    // Loop through the instructions
189    foreach ( $instructions as $instruction ) {
190
191        if ( !$inSection ) {
192
193            // Look for the header for the "Lists" heading
194            if ( $instruction[0] == 'header' &&
195                    trim($instruction[1][0]) == $header ) {
196
197                $startPos = $instruction[2];
198                $inSection = TRUE;
199            }
200        } else {
201
202            // Look for the end of the section
203            if ( $instruction[0] == 'section_close' ) {
204                $endPos = $instruction[2];
205                break;
206            }
207        }
208    }
209
210    // Normalize and pad the document in the same way the parse does
211    // so that byte indexes with match
212    $doc = "\n".str_replace("\r\n","\n",$doc)."\n";
213    $section = substr($doc, $startPos, ($endPos-$startPos));
214
215    return $section;
216}
217
218function getCategories($id)
219{
220    if (empty($id)) return '';
221
222    if (false === strpos($id, ":")){
223        return '';
224    }
225
226    $ns = explode(":", $id);
227    $nsCount = count($ns) - 1;
228
229    $result = '';
230    do{
231        for($i = 0; $i < $nsCount; $i++){
232            $name = $ns[$i];
233            $result .= $name;
234            if ($i < $nsCount - 1){
235                 $result .= ':';
236            }
237        }
238        $result .= ' ';
239    }while($nsCount--);
240    return $result;
241}
242
243function getPagename($id)
244{
245    if (empty($id)) return '';
246
247    if (false === strpos($id, ":")){
248        return $id;
249    }
250
251    $ns = explode(":", $id);
252    return $ns[count($ns) - 1];
253}
254
255
256
257 /**
258  * Method return all wiki page names
259  * @global array $conf
260  * @return array
261  */
262 function getPagesList()
263 {
264    global $conf;
265
266    $data = array();
267    sort($data);
268    search($data,$conf['datadir'],'search_allpages','','');
269
270    return $data;
271}
272
273function getNsLinks($id, $keywords, $search)
274{
275    global $conf;
276    $parts = explode(':', $id);
277    $count = count($parts);
278
279    // print intermediate namespace links
280    $part = '';
281    $data = array();
282    $titles = array();
283    for($i=0; $i<$count; $i++){
284        $part .= $parts[$i].':';
285        $page = $part;
286        resolve_pageid('',$page,$exists);
287
288        if (preg_match("#:start$#", $page) && !preg_match("#:start:$#", $part)) {
289            $page = substr($page, 0, strpos($page, ":start"));
290        };
291
292        // output
293        if ($exists){
294            $titles[wl($page)] = $parts[$i];
295        } else {
296            $titles[wl($page)] = $parts[$i];
297        }
298        $data[] = array('link' => "?do=search&id={$keywords}".urlencode(" @ns $page"));
299    }
300    $titleExcerpt = $search->getExcerpt($titles, $search->starQuery($keywords));
301    $i = 0;
302    foreach ($data as $key => $notused){
303        $data[$key]['title'] = $titleExcerpt[$i++];
304    }
305    return $data;
306}
307
308function printNamespaces($query)
309{
310  $data = array();
311  $query = str_replace(" ", "_", $query);
312  $data = ft_pageLookup($query, false);
313
314  if(!count($data)) return false;
315
316  print '<h3>Matching pagenames</h3>';
317  print '<ul>';
318  $counter = 0;
319  foreach($data as $id){
320    print '<li>';
321    $ns = getNS($id);
322    if($ns){
323      $name = shorten(noNS($id), ' ('.$ns.')',30);
324    }else{
325      $name = $id;
326    }
327    $href = wl($id);
328
329    tpl_link($href,$id, "class='wikilink1'");
330    print '</li>';
331    if(++$counter == 20){
332        break;
333    }
334  }
335  print '</ul>';
336}
337
338function printNamespacesNew($pageNames)
339{
340    if(empty($pageNames)) return false;
341
342    $limit = 10;
343    print '<h3>Matching pagenames</h3>';
344    print '<ul>';
345    $counter = 0;
346    foreach($pageNames as $id => $header){
347        $ns = getNS($id);
348        if($ns){
349          $name = shorten(noNS($id), ' ('.$ns.')',30);
350        }else{
351          $name = $id;
352        }
353        print '<li>';
354        /*if (!empty($header)){
355            print '<a href="'.wl($id).'#'.$header.'" '. "class='wikilink1'>".$id."</a>".'#'.$header;
356        } else {
357            print '<a href="'.wl($id).'" '. "class='wikilink1'>".$id."</a>";
358        }*/
359        print '<a href="'.wl($id).'" '. "class='wikilink1'>".$id."</a>";
360        print '</li>';
361        if (++$counter == $limit){
362            break;
363        }
364    }
365    print '</ul>';
366}
367
368if(!function_exists('shorten')){
369    /**
370     * Shorten a given string by removing data from the middle
371     *
372     * You can give the string in two parts, teh first part $keep
373     * will never be shortened. The second part $short will be cut
374     * in the middle to shorten but only if at least $min chars are
375     * left to display it. Otherwise it will be left off.
376     *
377     * @param string $keep   the part to keep
378     * @param string $short  the part to shorten
379     * @param int    $max    maximum chars you want for the whole string
380     * @param int    $min    minimum number of chars to have left for middle shortening
381     * @param string $char   the shortening character to use
382     */
383    function shorten($keep,$short,$max,$min=9,$char='⌇'){
384        $max = $max - utf8_strlen($keep);
385       if($max < $min) return $keep;
386        $len = utf8_strlen($short);
387        if($len <= $max) return $keep.$short;
388        $half = floor($max/2);
389        return $keep.utf8_substr($short,0,$half-1).$char.utf8_substr($short,$len-$half);
390    }
391}
392