xref: /plugin/sphinxsearch-was/functions.php (revision 77:239f563f96ab)
1<?php
2/*
3 * To change this template, choose Tools | Templates
4 * and open the template in the editor.
5 */
6
7function formatXml($data)
8{
9    $xmlFormat = '
10<sphinx:document id="{id}">
11<title><![CDATA[[{title}]]></title>
12<body><![CDATA[[{body}]]></body>
13<categories><![CDATA[[{categories}]]></categories>
14<level>{level}</level>
15<modified>{modified}</modified>
16</sphinx:document>
17
18';
19
20    return str_replace( array('{id}', '{title}', '{body}', '{categories}', '{level}', '{modified}'),
21                        array($data['id'], escapeTextValue($data['title_to_index']), escapeTextValue($data['body']), escapeTextValue($data['categories']),
22                             $data['level'], $data['modified']),
23                $xmlFormat
24            );
25}
26
27function escapeTextValue($value)
28{
29    if ("" === $value)
30    {
31        return "";
32    }
33    //$value = mb_convert_encoding($value,'UTF-8','ISO-8859-1');
34    $value = strip_tags($value);
35    $value = stripInvalidXml($value);
36    return str_replace("]]>", "]]><![CDATA[]]]]><![CDATA[>]]><![CDATA[", $value);
37 }
38
39function stripInvalidXml($value)
40{
41    $ret = "";
42    if (empty($value))
43    {
44      return $ret;
45    }
46
47    $current = null;
48    $length = strlen($value);
49    for ($i=0; $i < $length; $i++)
50    {
51      $current = ord($value{$i});
52      if (($current == 0x9) ||
53          ($current == 0xA) ||
54          ($current == 0xD) ||
55          (($current >= 0x20) && ($current <= 0xD7FF)) ||
56          (($current >= 0xE000) && ($current <= 0xFFFD)) ||
57          (($current >= 0x10000) && ($current <= 0x10FFFF)))
58      {
59        $ret .= chr($current);
60      }
61      else
62      {
63        $ret .= " ";
64      }
65    }
66    return $ret;
67  }
68
69function getDocumentsByHeadings($id, $metadata)
70{
71    if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false;
72
73    $sections = array();
74    $level = 1;
75    $previouse_title = '';
76    foreach($metadata['description']['tableofcontents'] as $row){
77        $sections[$row['hid']] = array(
78                                    'section' => getSectionByTitleLevel($id, $row['title']),
79                                    'level' => $row['level'],
80                                    'title' => $row['title']
81                                    );
82        if ($row['level'] > $level && !empty($previouse_title)){
83            $sections[$row['hid']]['title_text'] = $previouse_title . " &raquo; ".$row['title'];
84        } else {
85            $sections[$row['hid']]['title_text'] = $row['title'];
86            $previouse_title = $row['title'];
87        }
88        $sections[$row['hid']]['title_to_index'] = $row['title'];
89    }
90    return $sections;
91}
92
93function getSectionByTitleLevel($id, $header, $extended=false)
94{
95    $headerReg = preg_quote($header, '/');
96    $doc = io_readFile(wikiFN($id));
97    $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
98    $section = '';
99    if (preg_match("/$regex/i",$doc,$matches)) {
100        $startHeader = $matches[0];
101        $startHeaderPos = strpos($doc, $startHeader) + strlen($startHeader);
102        $endDoc = substr($doc, $startHeaderPos);
103
104        $regex = '(={3,6})(.*?)(={3,6})';
105        if (preg_match("/$regex/i",$endDoc,$matches)) {
106            $endHeader = $matches[0];
107            $endHeaderPos = strpos($doc, $endHeader);
108        } else {
109            $endHeaderPos = 0;
110        }
111        if ($endHeaderPos){
112            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
113        } else {
114            $section = substr($doc, $startHeaderPos);
115        }
116    }
117    $section = trim($section);
118    //trying to get next section content if body for first section is empty
119    //working only for extended mode
120    if ($extended && empty($section)){
121        $startHeaderPos = $endHeaderPos + strlen($endHeader);
122        $endDoc = substr($endDoc, $startHeaderPos);
123        $regex = '(={3,6})(.*?)(={3,6})';
124        if (preg_match("/$regex/i",$endDoc,$matches)) {
125            $endHeader = $matches[0];
126            $endHeaderPos = strpos($doc, $endHeader);
127        } else {
128            $endHeaderPos = 0;
129        }
130        if ($endHeaderPos){
131            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
132        } else {
133            $section = substr($doc, $startHeaderPos);
134        }
135    }
136    $section = trim($section);
137    return $section;
138}
139
140function getSection($id, $header)
141{
142    static $cacheInstructions = null;
143    static $cacheDoc = null;
144
145    if (empty($cacheDoc[$id])){
146        // Create the parser
147        $Parser = & new Doku_Parser();
148
149        // Add the Handler
150        $Parser->Handler = & new Doku_Handler();
151
152        // Load the header mode to find headers
153        $Parser->addMode('header',new Doku_Parser_Mode_Header());
154        $Parser->addMode('listblock',new Doku_Parser_Mode_ListBlock());
155
156        // Loads the raw wiki document
157        $doc = io_readFile(wikiFN($id));
158
159        // Get a list of instructions
160        $instructions = $Parser->parse($doc);
161
162        unset($Parser->Handler);
163        unset($Parser);
164
165        //free old cache
166        $cacheInstructions = null;
167        $cacheDoc = null;
168
169        //initialize new cache
170        $cacheInstructions[$id] = $instructions;
171        $cacheDoc[$id] = $doc;
172    } else {
173        $instructions = $cacheInstructions[$id];
174        $doc = $cacheDoc[$id];
175    }
176
177
178
179    // Use this to watch when we're inside the section we want
180    $inSection = FALSE;
181    $startPos = 0;
182    $endPos = 0;
183
184    // Loop through the instructions
185    foreach ( $instructions as $instruction ) {
186
187        if ( !$inSection ) {
188
189            // Look for the header for the "Lists" heading
190            if ( $instruction[0] == 'header' &&
191                    trim($instruction[1][0]) == $header ) {
192
193                $startPos = $instruction[2];
194                $inSection = TRUE;
195            }
196        } else {
197
198            // Look for the end of the section
199            if ( $instruction[0] == 'section_close' ) {
200                $endPos = $instruction[2];
201                break;
202            }
203        }
204    }
205
206    // Normalize and pad the document in the same way the parse does
207    // so that byte indexes with match
208    $doc = "\n".str_replace("\r\n","\n",$doc)."\n";
209    $section = substr($doc, $startPos, ($endPos-$startPos));
210
211    return $section;
212}
213
214function getCategories($id)
215{
216    if (empty($id)) return '';
217
218    if (false === strpos($id, ":")){
219        return $id;
220    }
221
222    $ns = explode(":", $id);
223    $nsCount = count($ns);
224
225    $result = '';
226    do{
227        for($i = 0; $i < $nsCount; $i++){
228            $name = $ns[$i];
229            $result .= $name;
230            if ($i < $nsCount - 1){
231                 $result .= ':';
232            }
233        }
234        $result .= ' ';
235    }while($nsCount--);
236    return $result;
237}
238
239
240 /**
241  * Method return all wiki page names
242  * @global array $conf
243  * @return array
244  */
245 function getPagesList()
246 {
247    global $conf;
248
249    $data = array();
250    sort($data);
251    search($data,$conf['datadir'],'search_allpages','','');
252
253    return $data;
254}
255
256function getNsLinks($id, $keywords, $search)
257{
258    global $conf;
259    $parts = explode(':', $id);
260    $count = count($parts);
261
262    // print intermediate namespace links
263    $part = '';
264    $data = array();
265    $titles = array();
266    for($i=0; $i<$count; $i++){
267        $part .= $parts[$i].':';
268        $page = $part;
269        resolve_pageid('',$page,$exists);
270
271        if (preg_match("#:start$#", $page) && !preg_match("#:start:$#", $part)) {
272            $page = substr($page, 0, strpos($page, ":start"));
273        };
274
275        // output
276        if ($exists){
277            $titles[wl($page)] = $parts[$i];
278        } else {
279            $titles[wl($page)] = $parts[$i];
280        }
281        $data[] = array('link' => "?do=search&id={$keywords}".urlencode(" @categories $page"));
282    }
283    $titleExcerpt = $search->getExcerpt($titles, $search->starQuery($keywords));
284    $i = 0;
285    foreach ($data as $key => $notused){
286        $data[$key]['title'] = $titleExcerpt[$i++];
287    }
288    return $data;
289}
290
291function printNamespaces($query)
292{
293  $data = array();
294  $query = str_replace(" ", "_", $query);
295  $data = ft_pageLookup($query, false);
296
297  if(!count($data)) return false;
298
299  print '<h3>Matching pagenames</h3>';
300  print '<ul>';
301  $counter = 0;
302  foreach($data as $id){
303    print '<li>';
304    $ns = getNS($id);
305    if($ns){
306      $name = shorten(noNS($id), ' ('.$ns.')',30);
307    }else{
308      $name = $id;
309    }
310    $href = wl($id);
311
312    tpl_link($href,$id, "class='wikilink1'");
313    print '</li>';
314    if(++$counter == 20){
315        break;
316    }
317  }
318  print '</ul>';
319}
320
321function printNamespacesNew($pageNames)
322{
323    if(empty($pageNames)) return false;
324
325    $limit = 10;
326    print '<h3>Matching pagenames</h3>';
327    print '<ul>';
328    $counter = 0;
329    foreach($pageNames as $id => $header){
330        $ns = getNS($id);
331        if($ns){
332          $name = shorten(noNS($id), ' ('.$ns.')',30);
333        }else{
334          $name = $id;
335        }
336        print '<li>';
337        if (!empty($header)){
338            print '<a href="'.wl($id).'#'.$header.'" '. "class='wikilink1'>".$id."</a>".'#'.$header;
339        } else {
340            print '<a href="'.wl($id).'" '. "class='wikilink1'>".$id."</a>";
341        }
342        print '</li>';
343        if (++$counter == $limit){
344            break;
345        }
346    }
347    print '</ul>';
348}