xref: /plugin/sphinxsearch-was/functions.php (revision 48:31ff9d57aa3a)
1<?php
2/*
3 * To change this template, choose Tools | Templates
4 * and open the template in the editor.
5 */
6
7function formatXml($data)
8{
9    $xmlFormat = '
10<sphinx:document id="{id}">
11<title><![CDATA[[{title}]]></title>
12<body><![CDATA[[{body}]]></body>
13<categories><![CDATA[[{categories}]]></categories>
14<level>{level}</level>
15<modified>{modified}</modified>
16</sphinx:document>
17
18';
19
20    return str_replace( array('{id}', '{title}', '{body}', '{categories}', '{level}', '{modified}'),
21                        array($data['id'], escapeTextValue($data['title']), escapeTextValue($data['body']), escapeTextValue($data['categories']),
22                             $data['level'], $data['modified']),
23                $xmlFormat
24            );
25}
26
27function escapeTextValue($value)
28{
29    if ("" === $value)
30    {
31        return "";
32    }
33    $value = mb_convert_encoding($value,'UTF-8','ISO-8859-1');
34    $value = strip_tags($value);
35    $value = stripInvalidXml($value);
36    return str_replace("]]>", "]]><![CDATA[]]]]><![CDATA[>]]><![CDATA[", $value);
37 }
38
39function stripInvalidXml($value)
40{
41    $ret = "";
42    if (empty($value))
43    {
44      return $ret;
45    }
46
47    $current = null;
48    $length = strlen($value);
49    for ($i=0; $i < $length; $i++)
50    {
51      $current = ord($value{$i});
52      if (($current == 0x9) ||
53          ($current == 0xA) ||
54          ($current == 0xD) ||
55          (($current >= 0x20) && ($current <= 0xD7FF)) ||
56          (($current >= 0xE000) && ($current <= 0xFFFD)) ||
57          (($current >= 0x10000) && ($current <= 0x10FFFF)))
58      {
59        $ret .= chr($current);
60      }
61      else
62      {
63        $ret .= " ";
64      }
65    }
66    return $ret;
67  }
68
69function getDocumentsByHeadings($id, $metadata)
70{
71    if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false;
72
73    $sections = array();
74    $level = 1;
75    $previouse_title = '';
76    foreach($metadata['description']['tableofcontents'] as $row){
77        $sections[$row['hid']] = array(
78                                    'section' => getSectionByTitleLevel($id, $row['title']),
79                                    'level' => $row['level'],
80                                    'title' => $row['title']
81                                    );
82        if ($row['level'] > $level){
83            $sections[$row['hid']]['title_text'] = $previouse_title . " &raquo; ".$row['title'];
84        } else {
85            $sections[$row['hid']]['title_text'] = $row['title'];
86            $previouse_title = $row['title'];
87        }
88    }
89    return $sections;
90}
91
92function getSectionByTitleLevel($id, $header, $extended=false)
93{
94    $headerReg = preg_quote($header, '/');
95    $doc = io_readFile(wikiFN($id));
96    $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
97    $section = '';
98    if (preg_match("/$regex/i",$doc,$matches)) {
99        $startHeader = $matches[0];
100        $startHeaderPos = strpos($doc, $startHeader) + strlen($startHeader);
101        $endDoc = substr($doc, $startHeaderPos);
102
103        $regex = '(={3,6})(.*?)(={3,6})';
104        if (preg_match("/$regex/i",$endDoc,$matches)) {
105            $endHeader = $matches[0];
106            $endHeaderPos = strpos($doc, $endHeader);
107        } else {
108            $endHeaderPos = 0;
109        }
110        if ($endHeaderPos){
111            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
112        } else {
113            $section = substr($doc, $startHeaderPos);
114        }
115    }
116    $section = trim($section);
117    //trying to get next section content if body for first section is empty
118    //working only for extended mode
119    if ($extended && empty($section)){
120        $startHeaderPos = $endHeaderPos + strlen($endHeader);
121        $endDoc = substr($endDoc, $startHeaderPos);
122        $regex = '(={3,6})(.*?)(={3,6})';
123        if (preg_match("/$regex/i",$endDoc,$matches)) {
124            $endHeader = $matches[0];
125            $endHeaderPos = strpos($doc, $endHeader);
126        } else {
127            $endHeaderPos = 0;
128        }
129        if ($endHeaderPos){
130            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
131        } else {
132            $section = substr($doc, $startHeaderPos);
133        }
134    }
135    $section = trim($section);
136    return $section;
137}
138
139function getSection($id, $header)
140{
141    static $cacheInstructions = null;
142    static $cacheDoc = null;
143
144    if (empty($cacheDoc[$id])){
145        // Create the parser
146        $Parser = & new Doku_Parser();
147
148        // Add the Handler
149        $Parser->Handler = & new Doku_Handler();
150
151        // Load the header mode to find headers
152        $Parser->addMode('header',new Doku_Parser_Mode_Header());
153        $Parser->addMode('listblock',new Doku_Parser_Mode_ListBlock());
154
155        // Loads the raw wiki document
156        $doc = io_readFile(wikiFN($id));
157
158        // Get a list of instructions
159        $instructions = $Parser->parse($doc);
160
161        unset($Parser->Handler);
162        unset($Parser);
163
164        //free old cache
165        $cacheInstructions = null;
166        $cacheDoc = null;
167
168        //initialize new cache
169        $cacheInstructions[$id] = $instructions;
170        $cacheDoc[$id] = $doc;
171    } else {
172        $instructions = $cacheInstructions[$id];
173        $doc = $cacheDoc[$id];
174    }
175
176
177
178    // Use this to watch when we're inside the section we want
179    $inSection = FALSE;
180    $startPos = 0;
181    $endPos = 0;
182
183    // Loop through the instructions
184    foreach ( $instructions as $instruction ) {
185
186        if ( !$inSection ) {
187
188            // Look for the header for the "Lists" heading
189            if ( $instruction[0] == 'header' &&
190                    trim($instruction[1][0]) == $header ) {
191
192                $startPos = $instruction[2];
193                $inSection = TRUE;
194            }
195        } else {
196
197            // Look for the end of the section
198            if ( $instruction[0] == 'section_close' ) {
199                $endPos = $instruction[2];
200                break;
201            }
202        }
203    }
204
205    // Normalize and pad the document in the same way the parse does
206    // so that byte indexes with match
207    $doc = "\n".str_replace("\r\n","\n",$doc)."\n";
208    $section = substr($doc, $startPos, ($endPos-$startPos));
209
210    return $section;
211}
212
213function getCategories($id)
214{
215    if (empty($id)) return '';
216
217    if (false === strpos($id, ":")){
218        return $id;
219    }
220
221    $ns = explode(":", $id);
222    $nsCount = count($ns);
223
224    $result = '';
225    do{
226        for($i = 0; $i < $nsCount; $i++){
227            $name = $ns[$i];
228            $result .= $name;
229            if ($i < $nsCount - 1){
230                 $result .= ':';
231            }
232        }
233        $result .= ' ';
234    }while($nsCount--);
235    return $result;
236}
237
238
239 /**
240  * Method return all wiki page names
241  * @global array $conf
242  * @return array
243  */
244 function getPagesList()
245 {
246    global $conf;
247
248    $data = array();
249    sort($data);
250    search($data,$conf['datadir'],'search_allpages','','');
251
252    return $data;
253}
254
255function getNsLinks($id, $keywords, $search)
256{
257    global $conf;
258    $parts = explode(':', $id);
259    $count = count($parts);
260
261    // print intermediate namespace links
262    $part = '';
263    $data = array();
264    $titles = array();
265    for($i=0; $i<$count; $i++){
266        $part .= $parts[$i].':';
267        $page = $part;
268        resolve_pageid('',$page,$exists);
269
270        if (preg_match("#:start$#", $page) && !preg_match("#:start:$#", $part)) {
271            $page = substr($page, 0, strpos($page, ":start"));
272        };
273
274        // output
275        if ($exists){
276            $titles[wl($page)] = $parts[$i];
277        } else {
278            $titles[wl($page)] = $parts[$i];
279        }
280        $data[] = array('link' => "?do=search&id={$keywords}".urlencode(" @categories $page"));
281    }
282    $titleExcerpt = $search->getExcerpt($titles, $search->starQuery($keywords));
283    $i = 0;
284    foreach ($data as $key => $notused){
285        $data[$key]['title'] = $titleExcerpt[$i++];
286    }
287    return $data;
288}
289
290function printNamespaces($query)
291{
292  $data = array();
293  $data = ft_pageLookup($query);
294
295  if(!count($data)) return;
296
297  print '<strong>'.$lang['quickhits'].'</strong>';
298  print '<ul>';
299  $counter = 0;
300  foreach($data as $id){
301    print '<li>';
302    $ns = getNS($id);
303    if($ns){
304      $name = shorten(noNS($id), ' ('.$ns.')',30);
305    }else{
306      $name = $id;
307    }
308    $href = ("?do=search&id={$query}".urlencode(" @categories {$id}"));
309    tpl_link($href,$id);
310    print '</li>';
311    if(++$counter == 20){
312        break;
313    }
314  }
315  print '</ul>';
316}