xref: /plugin/sphinxsearch-was/xmlall.php (revision 3:c793831f6d31)
1<?php
2/**
3 * XML feed export
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8
9
10/* Initialization */
11
12if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../../');
13if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
14
15require_once(DOKU_INC.'inc/init.php');
16require_once(DOKU_INC.'inc/common.php');
17require_once(DOKU_INC.'inc/events.php');
18require_once(DOKU_INC.'inc/parserutils.php');
19require_once(DOKU_INC.'inc/feedcreator.class.php');
20require_once(DOKU_INC.'inc/auth.php');
21require_once(DOKU_INC.'inc/pageutils.php');
22require_once(DOKU_INC.'inc/search.php');
23
24require_once(DOKU_PLUGIN.'sphinxsearch/PageMapper.php');
25
26if (!file_exists(DOKU_INC.$conf['savedir']."/sphinxsearch/")){
27	mkdir(DOKU_INC.$conf['savedir']."/sphinxsearch/");
28}
29
30$pagesList = getPagesList();
31
32echo '<?xml version="1.0" encoding="utf-8"?>
33<sphinx:docset>
34
35<sphinx:schema>
36<sphinx:field name="title"/>
37<sphinx:field name="body"/>
38<sphinx:field name="headings"/>
39<sphinx:field name="categories"/>
40<sphinx:field name="modified"/>
41<sphinx:field name="created"/>
42<sphinx:field name="creator"/>
43<sphinx:field name="extra"/>
44<sphinx:attr name="modified" type="timestamp"/>
45</sphinx:schema>
46';
47
48$pageMapper = new PageMapper();
49
50foreach($pagesList as $row){
51    $dokuPageId = $row['id'];
52    //get meta data
53    $metadata = p_get_metadata($dokuPageId);
54    //parse meta data for headers, abstract, date, authors
55    $data = array();
56    $data['id'] = crc32($dokuPageId);
57    $data['headings'] = strip_tags(getHeadings($metadata));
58    $data['categories'] = getCategories($dokuPageId);
59    $data['created'] = $metadata['date']['created'];
60    $data['modified'] = $metadata['date']['modified'];
61    $data['creator'] = $metadata['creator'];
62    $data['title'] = strip_tags($metadata['title']);
63    $data['extra'] = strip_tags($metadata['description']['abstract']);
64    $data['body'] = strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false));
65
66    echo formatXml($data)."\n";
67
68    $pageMapper->add($dokuPageId);
69}
70
71echo '</sphinx:docset>';
72
73
74
75function formatXml($data)
76{
77    $xmlFormat = '
78<sphinx:document id="{id}">
79<title><![CDATA[[{title}]]></title>
80<body><![CDATA[[{body}]]></body>
81<headings><![CDATA[[{headings}]]></headings>
82<categories><![CDATA[[{categories}]]></categories>
83<modified>{modified}</modified>
84<created>{created}</created>
85<creator>{creator}</creator>
86<extra><![CDATA[[{extra}]]></extra>
87</sphinx:document>
88
89';
90
91    return str_replace( array('{id}', '{title}', '{body}', '{headings}', '{categories}', '{modified}', '{created}', '{creator}', '{extra}'),
92                        array($data['id'], $data['title'], $data['body'], $data['headings'],
93                            $data['categories'],  $data['modified'], $data['created'], $data['creator'], $data['extra']),
94                $xmlFormat
95            );
96}
97
98function getHeadings($metadata)
99{
100    if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return '';
101
102    $result = array();
103    foreach($metadata['description']['tableofcontents'] as $row){
104        $result[] = $row['title'];
105    }
106    return implode(", ", $result);
107}
108
109function getCategories($id)
110{
111    if (empty($id)) return '';
112
113    if (false === strpos($id, ":")){
114        return $id;
115    }
116
117    $ns = explode(":", $id);
118    $nsCount = count($ns);
119
120    $result = '';
121    do{
122        for($i = 0; $i < $nsCount; $i++){
123            $name = $ns[$i];
124            $result .= $name;
125            if ($i < $nsCount - 1){
126                 $result .= ':';
127            }
128        }
129        $result .= ' ';
130    }while($nsCount--);
131    return $result;
132}
133
134
135 /**
136  * Method return all wiki page names
137  * @global array $conf
138  * @return array
139  */
140 function getPagesList()
141 {
142    global $conf;
143
144    $data = array();
145    sort($data);
146    search($data,$conf['datadir'],'search_allpages','','');
147
148    return $data;
149}
150
151/**
152 * Array
153(
154    [date] => Array
155        (
156            [created] => 1239181434
157            [modified] => 1239202933
158        )
159
160    [creator] => Sergey Nikolaev
161    [last_change] => Array
162        (
163            [date] => 1239202933
164            [ip] => 85.118.229.162
165            [type] => E
166            [id] => cal:minutes:boardreader:200904:20090408
167            [user] => snikolaev
168            [sum] =>
169            [extra] =>
170        )
171
172    [contributor] => Array
173        (
174            [snikolaev] => Sergey Nikolaev
175        )
176
177    [title] => BoardReader call of Apr 8 2009
178    [description] => Array
179        (
180            [tableofcontents] => Array
181                (
182                    [0] => Array
183                        (
184                            [hid] => boardreader_call_of_apr_8_2009
185                            [title] => BoardReader call of Apr 8 2009
186                            [type] => ul
187                            [level] => 1
188                        )
189
190                    [1] => Array
191                        (
192                            [hid] => sergey
193                            [title] => Sergey
194                            [type] => ul
195                            [level] => 2
196                        )
197
198                    [2] => Array
199                        (
200                            [hid] => slava
201                            [title] => Slava
202                            [type] => ul
203                            [level] => 2
204                        )
205
206                    [3] => Array
207                        (
208                            [hid] => roman
209                            [title] => Roman
210                            [type] => ul
211                            [level] => 2
212                        )
213
214                    [4] => Array
215                        (
216                            [hid] => nikita
217                            [title] => Nikita
218                            [type] => ul
219                            [level] => 2
220                        )
221
222                    [5] => Array
223                        (
224                            [hid] => discussion
225                            [title] => Discussion
226                            [type] => ul
227                            [level] => 2
228                        )
229
230                )
231
232            [abstract] => Participants: Mindaugas, Sergey, Slava, Roman, Nikita
233
234Duration: 23 min
235
236Sergey
237
238Status:
239
240	*  published Roman's changes
241	*  started reviewing Slava's changes
242
243
244Plans:
245
246	*  start altering (singature field)
247	*  select server error handling
248	*  publish Slava's and Roman's changes
249        )
250
251    [internal] => Array
252        (
253            [cache] => 1
254            [toc] => 1
255        )
256
257)
258
259 */
260
261