xref: /plugin/sphinxsearch-was/xmlall.php (revision 18:fcbacaf89c61)
1<?php
2/**
3 * XML feed export
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8
9
10/* Initialization */
11
12if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../../');
13if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
14
15require_once(DOKU_INC.'inc/init.php');
16require_once(DOKU_INC.'inc/common.php');
17require_once(DOKU_INC.'inc/events.php');
18require_once(DOKU_INC.'inc/parserutils.php');
19require_once(DOKU_INC.'inc/feedcreator.class.php');
20require_once(DOKU_INC.'inc/auth.php');
21require_once(DOKU_INC.'inc/pageutils.php');
22require_once(DOKU_INC.'inc/search.php');
23require_once(DOKU_INC.'inc/parser/parser.php');
24
25
26require_once(DOKU_PLUGIN.'sphinxsearch/PageMapper.php');
27require_once(DOKU_PLUGIN.'sphinxsearch/functions.php');
28
29if (!file_exists(DOKU_INC.$conf['savedir']."/sphinxsearch/")){
30	mkdir(DOKU_INC.$conf['savedir']."/sphinxsearch/");
31}
32
33$pagesList = getPagesList();
34
35echo '<?xml version="1.0" encoding="utf-8"?>
36<sphinx:docset>
37
38<sphinx:schema>
39<sphinx:field name="title"/>
40<sphinx:field name="body"/>
41<sphinx:field name="categories"/>
42<sphinx:field name="level"/>
43<sphinx:field name="modified"/>
44<sphinx:attr name="level" type="int" bits="8" default="1"/>
45</sphinx:schema>
46';
47
48$pageMapper = new PageMapper();
49$counter = 0;
50foreach($pagesList as $row){
51    $dokuPageId = $row['id'];
52    resolve_pageid('',$page,$exists);
53    if (empty($dokuPageId) || !$exists){ //do not include not exists page
54        continue;
55    }
56    //get meta data
57    $metadata = p_get_metadata($dokuPageId);
58    $sections = getDocumentsByHeadings($dokuPageId, $metadata);
59
60    if (!empty($sections)){
61        foreach($sections as $hid => $section){
62            //parse meta data for headers, abstract, date, authors
63            $data = array();
64            $data['id'] = crc32($dokuPageId.$hid);
65            $data['categories'] = getCategories($dokuPageId) . '#' . $hid;
66            $data['level'] = $section['level'];
67            $data['modified'] = $metadata['date']['modified'];
68            $data['title'] = strip_tags($section['title']);
69            $data['body'] = strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info));
70
71            echo formatXml($data)."\n";
72            $pageMapper->add($dokuPageId, $section['title'], $hid);
73            $counter++;
74        }
75    } else {
76        //parse meta data for headers, abstract, date, authors
77        $data = array();
78        $data['id'] = crc32($dokuPageId);
79        $data['categories'] = getCategories($dokuPageId);
80        $data['level'] = 1;
81        $data['modified'] = $metadata['date']['modified'];
82        $data['title'] = strip_tags($metadata['title']);
83        $data['body'] = strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false));
84
85        echo formatXml($data)."\n";
86        $pageMapper->add($dokuPageId, $metadata['title']);
87        $counter++;
88    }
89
90}
91echo '</sphinx:docset>';
92/*echo $counter;
93echo "\n".number_format(memory_get_peak_usage()/1024)."K\n";*/