xref: /plugin/sphinxsearch-was/functions.php (revision 130:07256766c697)
1<?php
2/*
3 * To change this template, choose Tools | Templates
4 * and open the template in the editor.
5 */
6
7function formatXml($data)
8{
9    $xmlFormat = '
10<sphinx:document id="{id}">
11<title><![CDATA[[{title}]]></title>
12<body><![CDATA[[{body}]]></body>
13<namespace><![CDATA[[{namespace}]]></namespace>
14<pagename><![CDATA[[{pagename}]]></pagename>
15<level>{level}</level>
16<modified>{modified}</modified>
17</sphinx:document>
18
19';
20
21    return str_replace(
22        array('{id}', '{title}', '{body}', '{namespace}', '{pagename}', '{level}', '{modified}'),
23        array(
24            $data['id'], escapeTextValue($data['title_to_index']),
25            escapeTextValue($data['body']),
26            escapeTextValue($data['namespace']),
27            escapeTextValue($data['pagename']),
28            $data['level'], $data['modified']
29        ),
30        $xmlFormat
31    );
32}
33
34function escapeTextValue($value)
35{
36    if ("" === $value) {
37        return "";
38    }
39    //$value = mb_convert_encoding($value,'UTF-8','ISO-8859-1');
40    $value = strip_tags($value);
41    $value = stripInvalidXml($value);
42    return str_replace("]]>", "]]><![CDATA[]]]]><![CDATA[>]]><![CDATA[", $value);
43}
44
45function stripInvalidXml($value)
46{
47    $ret = "";
48    if (empty($value)) {
49        return $ret;
50    }
51
52    $current = null;
53    $length = strlen($value);
54    for ($i = 0; $i < $length; $i++) {
55        $current = ord($value{
56        $i});
57        if (($current == 0x9) || ($current == 0xA) || ($current == 0xD) || (($current >= 0x20) && ($current <= 0xD7FF)) || (($current >= 0xE000) && ($current <= 0xFFFD)) || (($current >= 0x10000) && ($current <= 0x10FFFF))
58        ) {
59            $ret .= chr($current);
60        } else {
61            $ret .= " ";
62        }
63    }
64    return $ret;
65}
66
67function getDocumentsByHeadings($id, $metadata)
68{
69    if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false;
70
71    $sections = array();
72    $level = 1;
73    $previouse_title = '';
74    $firstSection = true;
75    foreach ($metadata['description']['tableofcontents'] as $row) {
76        if ($firstSection) {
77            $zerocontent  = getZeroSectionContent($id, $row['title']);
78            if ($zerocontent) {
79                $sections[$id] = array(
80                    'section' => $zerocontent,
81                    'level' => 0,
82                    'title' => $id,
83                    'title_to_index' => $id
84                );
85            }
86            $firstSection = false;
87        }
88        $sections[$row['hid']] = array(
89            'section' => getSectionByTitleLevel($id, $row['title'], false),
90            'level' => $row['level'],
91            'title' => $row['title']
92        );
93        if ($row['level'] > $level && !empty($previouse_title)) {
94            $sections[$row['hid']]['title_text'] = $previouse_title . " &raquo; " . $row['title'];
95        } else {
96            $sections[$row['hid']]['title_text'] = $row['title'];
97            $previouse_title = $row['title'];
98        }
99        $sections[$row['hid']]['title_to_index'] = $row['title'];
100    }
101    return $sections;
102}
103
104function getZeroSectionContent($id, $header)
105{
106    $headerReg = preg_quote($header, '/');
107    $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
108    $doc = io_readFile(wikiFN($id));
109    $matches = array();
110    if (!preg_match("/$regex/i", $doc, $matches)) {
111        return false;
112    }
113    if (empty($matches[1])) {
114        return false;
115    }
116    $end = strpos($doc, $matches[1]);
117    if (!$end) {
118        return false;
119    }
120    $zerocontent = substr($doc, 0, $end);
121    return $zerocontent;
122}
123
124function getSectionByTitleLevel($id, $header, $extended = false)
125{
126    $headerReg = preg_quote($header, '/');
127    $doc = io_readFile(wikiFN($id));
128    $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
129    $section = '';
130    if (preg_match("/$regex/i", $doc, $matches)) {
131        $startHeader = $matches[0];
132        $startHeaderPos = strpos($doc, $startHeader) + strlen($startHeader);
133        $endDoc = substr($doc, $startHeaderPos);
134
135        $regex = '(={4,6})(.*?)(={4,6})';
136        if (preg_match("/$regex/i", $endDoc, $matches)) {
137            $endHeader = $matches[0];
138            $endHeaderPos = strpos($doc, $endHeader);
139        } else {
140            $endHeaderPos = 0;
141        }
142        if ($endHeaderPos) {
143            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
144        } else {
145            $section = substr($doc, $startHeaderPos);
146        }
147    }
148    $section = trim($section);
149    //trying to get next section content if body for first section is empty
150    //working only for extended mode
151    if ($extended && empty($section)) {
152        $startHeaderPos = $endHeaderPos + strlen($endHeader);
153        $endDoc = substr($endDoc, $startHeaderPos);
154        $regex = '(={4,6})(.*?)(={4,6})';
155        if (preg_match("/$regex/i", $endDoc, $matches)) {
156            $endHeader = $matches[0];
157            $endHeaderPos = strpos($doc, $endHeader);
158        } else {
159            $endHeaderPos = 0;
160        }
161        if ($endHeaderPos) {
162            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
163        } else {
164            $section = substr($doc, $startHeaderPos);
165        }
166    }
167    $section = trim($section);
168    return $section;
169}
170
171function getSection($id, $header)
172{
173    static $cacheInstructions = null;
174    static $cacheDoc = null;
175
176    if (empty($cacheDoc[$id])) {
177        // Create the parser
178        $Parser = new Doku_Parser();
179
180        // Add the Handler
181        $Parser->Handler = new Doku_Handler();
182
183        // Load the header mode to find headers
184        $Parser->addMode('header', new Doku_Parser_Mode_Header());
185        $Parser->addMode('listblock', new Doku_Parser_Mode_ListBlock());
186
187        // Loads the raw wiki document
188        $doc = io_readFile(wikiFN($id));
189
190        // Get a list of instructions
191        $instructions = $Parser->parse($doc);
192
193        unset($Parser->Handler);
194        unset($Parser);
195
196        //free old cache
197        $cacheInstructions = null;
198        $cacheDoc = null;
199
200        //initialize new cache
201        $cacheInstructions[$id] = $instructions;
202        $cacheDoc[$id] = $doc;
203    } else {
204        $instructions = $cacheInstructions[$id];
205        $doc = $cacheDoc[$id];
206    }
207
208
209
210    // Use this to watch when we're inside the section we want
211    $inSection = FALSE;
212    $startPos = 0;
213    $endPos = 0;
214
215    // Loop through the instructions
216    foreach ($instructions as $instruction) {
217
218        if (!$inSection) {
219
220            // Look for the header for the "Lists" heading
221            if (
222                $instruction[0] == 'header' &&
223                trim($instruction[1][0]) == $header
224            ) {
225
226                $startPos = $instruction[2];
227                $inSection = TRUE;
228            }
229        } else {
230
231            // Look for the end of the section
232            if ($instruction[0] == 'section_close') {
233                $endPos = $instruction[2];
234                break;
235            }
236        }
237    }
238
239    // Normalize and pad the document in the same way the parse does
240    // so that byte indexes with match
241    $doc = "\n" . str_replace("\r\n", "\n", $doc) . "\n";
242    $section = substr($doc, $startPos, ($endPos - $startPos));
243
244    return $section;
245}
246
247function getCategories($id)
248{
249    if (empty($id)) return '';
250
251    if (false === strpos($id, ":")) {
252        return '';
253    }
254
255    $ns = explode(":", $id);
256    $nsCount = count($ns) - 1;
257
258    $result = '';
259    do {
260        for ($i = 0; $i < $nsCount; $i++) {
261            $name = $ns[$i];
262            $result .= $name;
263            if ($i < $nsCount - 1) {
264                $result .= ':';
265            }
266        }
267        $result .= ' ';
268    } while ($nsCount--);
269    return $result;
270}
271
272function getPagename($id)
273{
274    if (empty($id)) return '';
275
276    if (false === strpos($id, ":")) {
277        return $id;
278    }
279
280    $ns = explode(":", $id);
281    return $ns[count($ns) - 1];
282}
283
284
285
286/**
287 * Method return all wiki page names
288 * @global array $conf
289 * @return array
290 */
291function getPagesList()
292{
293    global $conf;
294
295    $data = array();
296    sort($data);
297    search($data, $conf['datadir'], 'search_allpages', array('skipacl' => 1), '');
298
299    return $data;
300}
301
302function getNsLinks($id, $keywords, $search)
303{
304    global $conf;
305    $parts = explode(':', $id);
306    $count = count($parts);
307
308    // print intermediate namespace links
309    $part = '';
310    $data = array();
311    $titles = array();
312    for ($i = 0; $i < $count; $i++) {
313        $part .= $parts[$i] . ':';
314        $page = $part;
315        resolve_pageid('', $page, $exists);
316
317        if (preg_match("#:start$#", $page) && !preg_match("#:start:$#", $part)) {
318            $page = substr($page, 0, strpos($page, ":start"));
319        };
320
321        // output
322        if ($exists) {
323            $titles[wl($page)] = $parts[$i];
324        } else {
325            $titles[wl($page)] = $parts[$i];
326        }
327        $data[] = array('link' => "?do=search&id={$keywords}" . urlencode(" @ns $page"));
328    }
329    $titleExcerpt = $search->getExcerpt($titles, $search->starQuery($keywords));
330    $i = 0;
331    foreach ($data as $key => $notused) {
332        $data[$key]['title'] = $titleExcerpt[$i++];
333    }
334    return $data;
335}
336
337function printNamespaces($query)
338{
339    $data = array();
340    $query = str_replace(" ", "_", $query);
341    $data = ft_pageLookup($query, false);
342
343    if (!count($data)) return false;
344
345    print '<ul>';
346    $counter = 0;
347    foreach ($data as $id) {
348        print '<li>';
349        $ns = getNS($id);
350        if ($ns) {
351            $name = shorten(noNS($id), ' (' . $ns . ')', 30);
352        } else {
353            $name = $id;
354        }
355        $href = wl($id);
356
357        tpl_link($href, $id, "class='wikilink1'");
358        print '</li>';
359        if (++$counter == 20) {
360            break;
361        }
362    }
363    print '</ul>';
364}
365
366function printNamespacesNew($pageNames)
367{
368    if (empty($pageNames)) return false;
369
370    $limit = 10;
371    print '<ul>';
372    $counter = 0;
373    foreach ($pageNames as $id => $header) {
374        $ns = getNS($id);
375        if ($ns) {
376            $name = shorten(noNS($id), ' (' . $ns . ')', 30);
377        } else {
378            $name = $id;
379        }
380        print '<li>';
381        /*if (!empty($header)){
382            print '<a href="'.wl($id).'#'.$header.'" '. "class='wikilink1'>".$id."</a>".'#'.$header;
383        } else {
384            print '<a href="'.wl($id).'" '. "class='wikilink1'>".$id."</a>";
385        }*/
386        print '<a href="' . wl($id) . '" ' . "class='wikilink1'>" . $id . "</a>";
387        print '</li>';
388        if (++$counter == $limit) {
389            break;
390        }
391    }
392    print '</ul>';
393}
394
395if (!function_exists('shorten')) {
396    /**
397     * Shorten a given string by removing data from the middle
398     *
399     * You can give the string in two parts, teh first part $keep
400     * will never be shortened. The second part $short will be cut
401     * in the middle to shorten but only if at least $min chars are
402     * left to display it. Otherwise it will be left off.
403     *
404     * @param string $keep   the part to keep
405     * @param string $short  the part to shorten
406     * @param int    $max    maximum chars you want for the whole string
407     * @param int    $min    minimum number of chars to have left for middle shortening
408     * @param string $char   the shortening character to use
409     */
410    function shorten($keep, $short, $max, $min = 9, $char = '⌇')
411    {
412        $max = $max - utf8_strlen($keep);
413        if ($max < $min) return $keep;
414        $len = utf8_strlen($short);
415        if ($len <= $max) return $keep . $short;
416        $half = floor($max / 2);
417        return $keep . utf8_substr($short, 0, $half - 1) . $char . utf8_substr($short, $len - $half);
418    }
419}
420