xref: /plugin/sphinxsearch-was/functions.php (revision 20:eca7605e143c)
1<?php
2/*
3 * To change this template, choose Tools | Templates
4 * and open the template in the editor.
5 */
6
7function formatXml($data)
8{
9    $xmlFormat = '
10<sphinx:document id="{id}">
11<title><![CDATA[[{title}]]></title>
12<body><![CDATA[[{body}]]></body>
13<categories><![CDATA[[{categories}]]></categories>
14<level>{level}</level>
15<modified>{modified}</modified>
16</sphinx:document>
17
18';
19
20    return str_replace( array('{id}', '{title}', '{body}', '{categories}', '{level}', '{modified}'),
21                        array($data['id'], $data['title'], $data['body'], $data['categories'],
22                             $data['level'], $data['modified']),
23                $xmlFormat
24            );
25}
26
27function getDocumentsByHeadings($id, $metadata)
28{
29    if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false;
30
31    $sections = array();
32    $level = 1;
33    $previouse_title = '';
34    foreach($metadata['description']['tableofcontents'] as $row){
35        $sections[$row['hid']] = array(
36                                    'section' => getSectionByTitleLevel($id, $row['title']),
37                                    //'section' => getSection($id, $row['title']),
38                                    'level' => $row['level'],
39                                    'title' => $row['title']
40                                    );
41        if ($row['level'] > $level){
42            $sections[$row['hid']]['title_text'] = $previouse_title . " &raquo; ".$row['title'];
43        } else {
44            $sections[$row['hid']]['title_text'] = $row['title'];
45            $previouse_title = $row['title'];
46        }
47        //echo $sections[$row['hid']]['title_text']."\n";
48        //echo $sections[$row['hid']]['section'];
49    }
50    //exit;
51    return $sections;
52}
53
54function getSectionByTitleLevel($id, $header)
55{
56    $headerReg = preg_quote($header, '/');
57    $doc = io_readFile(wikiFN($id));
58    $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
59    $section = '';
60    if (preg_match("/$regex/i",$doc,$matches)) {
61        $startHeader = $matches[0];
62        $startHeaderPos = strpos($doc, $startHeader) + strlen($startHeader);
63        $endDoc = substr($doc, $startHeaderPos);
64
65        $regex = '(={1,6})(.*?)(={1,6})';
66        if (preg_match("/$regex/i",$endDoc,$matches)) {
67            $endHeader = $matches[0];
68            $endHeaderPos = strpos($doc, $endHeader);
69        } else {
70            $endHeaderPos = 0;
71        }
72        if ($endHeaderPos){
73            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
74        } else {
75            $section = substr($doc, $startHeaderPos);
76        }
77    }
78    $section = trim($section);
79    //trying to get next section content if body for first section found
80    if (!$section){
81        $startHeaderPos = $endHeaderPos + strlen($endHeader);
82        $endDoc = substr($endDoc, $startHeaderPos);
83        $regex = '(={1,6})(.*?)(={1,6})';
84        if (preg_match("/$regex/i",$endDoc,$matches)) {
85            $endHeader = $matches[0];
86            $endHeaderPos = strpos($doc, $endHeader);
87        } else {
88            $endHeaderPos = 0;
89        }
90        if ($endHeaderPos){
91            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
92        } else {
93            $section = substr($doc, $startHeaderPos);
94        }
95    }
96    return $section;
97}
98
99function getSection($id, $header)
100{
101    static $cacheInstructions = null;
102    static $cacheDoc = null;
103
104    if (empty($cacheDoc[$id])){
105        // Create the parser
106        $Parser = & new Doku_Parser();
107
108        // Add the Handler
109        $Parser->Handler = & new Doku_Handler();
110
111        // Load the header mode to find headers
112        $Parser->addMode('header',new Doku_Parser_Mode_Header());
113        $Parser->addMode('listblock',new Doku_Parser_Mode_ListBlock());
114
115        // Loads the raw wiki document
116        $doc = io_readFile(wikiFN($id));
117
118        // Get a list of instructions
119        $instructions = $Parser->parse($doc);
120
121        unset($Parser->Handler);
122        unset($Parser);
123
124        //free old cache
125        $cacheInstructions = null;
126        $cacheDoc = null;
127
128        //initialize new cache
129        $cacheInstructions[$id] = $instructions;
130        $cacheDoc[$id] = $doc;
131    } else {
132        $instructions = $cacheInstructions[$id];
133        $doc = $cacheDoc[$id];
134    }
135
136
137
138    // Use this to watch when we're inside the section we want
139    $inSection = FALSE;
140    $startPos = 0;
141    $endPos = 0;
142
143    // Loop through the instructions
144    foreach ( $instructions as $instruction ) {
145
146        if ( !$inSection ) {
147
148            // Look for the header for the "Lists" heading
149            if ( $instruction[0] == 'header' &&
150                    trim($instruction[1][0]) == $header ) {
151
152                $startPos = $instruction[2];
153                $inSection = TRUE;
154            }
155        } else {
156
157            // Look for the end of the section
158            if ( $instruction[0] == 'section_close' ) {
159                $endPos = $instruction[2];
160                break;
161            }
162        }
163    }
164
165    // Normalize and pad the document in the same way the parse does
166    // so that byte indexes with match
167    $doc = "\n".str_replace("\r\n","\n",$doc)."\n";
168    $section = substr($doc, $startPos, ($endPos-$startPos));
169
170    return $section;
171}
172
173function getCategories($id)
174{
175    if (empty($id)) return '';
176
177    if (false === strpos($id, ":")){
178        return $id;
179    }
180
181    $ns = explode(":", $id);
182    $nsCount = count($ns);
183
184    $result = '';
185    do{
186        for($i = 0; $i < $nsCount; $i++){
187            $name = $ns[$i];
188            $result .= $name;
189            if ($i < $nsCount - 1){
190                 $result .= ':';
191            }
192        }
193        $result .= ' ';
194    }while($nsCount--);
195    return $result;
196}
197
198
199 /**
200  * Method return all wiki page names
201  * @global array $conf
202  * @return array
203  */
204 function getPagesList()
205 {
206    global $conf;
207
208    $data = array();
209    sort($data);
210    search($data,$conf['datadir'],'search_allpages','','');
211
212    return $data;
213}
214
215function getNsLinks($id, $keywords, $search)
216{
217    global $conf;
218    $parts = explode(':', $id);
219    $count = count($parts);
220
221    // print intermediate namespace links
222    $part = '';
223    $data = array();
224    $titles = array();
225    for($i=0; $i<$count; $i++){
226        $part .= $parts[$i].':';
227        $page = $part;
228        resolve_pageid('',$page,$exists);
229
230        if (preg_match("#:start$#", $page) && !preg_match("#:start:$#", $part)) {
231            $page = substr($page, 0, strpos($page, ":start"));
232        };
233
234        // output
235        if ($exists){
236            $titles[wl($page)] = $parts[$i];
237        } else {
238            $titles[wl($page)] = $parts[$i];
239        }
240        $data[] = array('link' => "?do=sphinxsearch&id={$keywords}".urlencode(" @categories $page"));
241    }
242    $titleExcerpt = $search->getExcerpt($titles, $search->starQuery($keywords));
243    $i = 0;
244    foreach ($data as $key => $notused){
245        $data[$key]['title'] = $titleExcerpt[$i++];
246    }
247    return $data;
248}