xref: /plugin/sphinxsearch-was/xmlall.php (revision 0:c723787235e2)
1<?php
2/**
3 * XML feed export
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8
9// dokuwiki folder (absolute system path)
10$dokuwiki_folder =  '/www/dokuwiki/htdocs';
11
12// dokuwiki url
13$dokuwiki_url = 'http://dokuwiki.home';
14
15// link prefix to another page
16$link_prefix = 'http://dokuwiki.home/doc.php/';
17
18/* Initialization */
19
20define('DOKU_PATH', $dokuwiki_folder);
21define('DOKU_INC', DOKU_PATH . '/');
22define('DOKU_CONF', DOKU_PATH . '/conf/');
23define('DOKU_URL', $dokuwiki_url  . '/');
24
25if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/');
26if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
27
28require_once(DOKU_INC.'inc/init.php');
29require_once(DOKU_INC.'inc/common.php');
30require_once(DOKU_INC.'inc/events.php');
31require_once(DOKU_INC.'inc/parserutils.php');
32require_once(DOKU_INC.'inc/feedcreator.class.php');
33require_once(DOKU_INC.'inc/auth.php');
34require_once(DOKU_INC.'inc/pageutils.php');
35require_once(DOKU_INC.'inc/search.php');
36
37require_once(DOKU_PLUGIN.'sphinxsearch/PageMapper.php');
38
39$pagesList = getPagesList();
40
41echo '<?xml version="1.0" encoding="utf-8"?>
42<sphinx:docset>
43
44<sphinx:schema>
45<sphinx:field name="title"/>
46<sphinx:field name="body"/>
47<sphinx:field name="headings"/>
48<sphinx:field name="categories"/>
49<sphinx:field name="modified"/>
50<sphinx:field name="created"/>
51<sphinx:field name="creator"/>
52<sphinx:field name="extra"/>
53<sphinx:attr name="modified" type="timestamp"/>
54</sphinx:schema>
55';
56
57$pageMapper = new PageMapper();
58
59foreach($pagesList as $row){
60    $dokuPageId = $row['id'];
61    //get meta data
62    $metadata = p_get_metadata($dokuPageId);
63    //parse meta data for headers, abstract, date, authors
64    $data['id'] = crc32($dokuPageId);
65    $data['headings'] = strip_tags(getHeadings($metadata));
66    $data['categories'] = getCategories($dokuPageId);
67    $data['created'] = $metadata['date']['created'];
68    $data['modified'] = $metadata['date']['modified'];
69    $data['creator'] = $metadata['creator'];
70    $data['title'] = strip_tags($metadata['title']);
71    $data['extra'] = strip_tags($metadata['description']['abstract']);
72    $data['body'] = strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false));
73
74    echo formatXml($data)."\n";
75
76    $pageMapper->add($dokuPageId);
77}
78
79echo '</sphinx:docset>';
80
81
82
83function formatXml($data)
84{
85    $xmlFormat = '
86<sphinx:document id="{id}">
87<title><![CDATA[[{title}]]></title>
88<body><![CDATA[[{body}]]></body>
89<headings><![CDATA[[{headings}]]></headings>
90<categories><![CDATA[[{categories}]]></categories>
91<modified>{modified}</modified>
92<created>{created}</created>
93<creator>{creator}</creator>
94<extra><![CDATA[[{extra}]]></extra>
95</sphinx:document>
96
97';
98
99    return str_replace( array('{id}', '{title}', '{body}', '{headings}', '{categories}', '{modified}', '{created}', '{creator}', '{extra}'),
100                        array($data['id'], $data['title'], $data['body'], $data['headings'],
101                            $data['categories'],  $data['modified'], $data['created'], $data['creator'], $data['extra']),
102                $xmlFormat
103            );
104}
105
106function getHeadings($metadata)
107{
108    if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return '';
109
110    $result = array();
111    foreach($metadata['description']['tableofcontents'] as $row){
112        $result[] = $row['title'];
113    }
114    return implode(", ", $result);
115}
116
117function getCategories($id)
118{
119    if (empty($id)) return '';
120
121    if (false === strpos($id, ":")){
122        return $id;
123    }
124
125    $ns = explode(":", $id);
126    $nsCount = count($ns);
127
128    $result = '';
129    do{
130        for($i = 0; $i < $nsCount; $i++){
131            $name = $ns[$i];
132            $result .= $name;
133            if ($i < $nsCount - 1){
134                 $result .= ':';
135            }
136        }
137        $result .= ' ';
138    }while($nsCount--);
139    return $result;
140}
141
142
143 /**
144  * Method return all wiki page names
145  * @global array $conf
146  * @return array
147  */
148 function getPagesList()
149 {
150    global $conf;
151
152    $data = array();
153    sort($data);
154    search($data,$conf['datadir'],'search_allpages','','');
155
156    return $data;
157}
158
159/**
160 * Array
161(
162    [date] => Array
163        (
164            [created] => 1239181434
165            [modified] => 1239202933
166        )
167
168    [creator] => Sergey Nikolaev
169    [last_change] => Array
170        (
171            [date] => 1239202933
172            [ip] => 85.118.229.162
173            [type] => E
174            [id] => cal:minutes:boardreader:200904:20090408
175            [user] => snikolaev
176            [sum] =>
177            [extra] =>
178        )
179
180    [contributor] => Array
181        (
182            [snikolaev] => Sergey Nikolaev
183        )
184
185    [title] => BoardReader call of Apr 8 2009
186    [description] => Array
187        (
188            [tableofcontents] => Array
189                (
190                    [0] => Array
191                        (
192                            [hid] => boardreader_call_of_apr_8_2009
193                            [title] => BoardReader call of Apr 8 2009
194                            [type] => ul
195                            [level] => 1
196                        )
197
198                    [1] => Array
199                        (
200                            [hid] => sergey
201                            [title] => Sergey
202                            [type] => ul
203                            [level] => 2
204                        )
205
206                    [2] => Array
207                        (
208                            [hid] => slava
209                            [title] => Slava
210                            [type] => ul
211                            [level] => 2
212                        )
213
214                    [3] => Array
215                        (
216                            [hid] => roman
217                            [title] => Roman
218                            [type] => ul
219                            [level] => 2
220                        )
221
222                    [4] => Array
223                        (
224                            [hid] => nikita
225                            [title] => Nikita
226                            [type] => ul
227                            [level] => 2
228                        )
229
230                    [5] => Array
231                        (
232                            [hid] => discussion
233                            [title] => Discussion
234                            [type] => ul
235                            [level] => 2
236                        )
237
238                )
239
240            [abstract] => Participants: Mindaugas, Sergey, Slava, Roman, Nikita
241
242Duration: 23 min
243
244Sergey
245
246Status:
247
248	*  published Roman's changes
249	*  started reviewing Slava's changes
250
251
252Plans:
253
254	*  start altering (singature field)
255	*  select server error handling
256	*  publish Slava's and Roman's changes
257        )
258
259    [internal] => Array
260        (
261            [cache] => 1
262            [toc] => 1
263        )
264
265)
266
267 */
268
269