xref: /plugin/sphinxsearch-was/xmlall.php (revision 102:334a4a10e54b)
1<?php
2/**
3 * XML feed export
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8
9$deStatus = ini_get('display_errors');
10ini_set('display_errors', 0);
11/* Initialization */
12
13if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../../');
14if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
15
16require_once(DOKU_INC.'inc/init.php');
17require_once(DOKU_INC.'inc/common.php');
18require_once(DOKU_INC.'inc/events.php');
19require_once(DOKU_INC.'inc/parserutils.php');
20require_once(DOKU_INC.'inc/feedcreator.class.php');
21require_once(DOKU_INC.'inc/auth.php');
22require_once(DOKU_INC.'inc/pageutils.php');
23require_once(DOKU_INC.'inc/search.php');
24require_once(DOKU_INC.'inc/parser/parser.php');
25
26
27require_once(DOKU_PLUGIN.'sphinxsearch/PageMapper.php');
28require_once(DOKU_PLUGIN.'sphinxsearch/functions.php');
29
30if (!file_exists(DOKU_INC.$conf['savedir']."/sphinxsearch/")){
31	mkdir(DOKU_INC.$conf['savedir']."/sphinxsearch/");
32}
33
34$pagesList = getPagesList();
35
36echo '<?xml version="1.0" encoding="utf-8"?>
37<sphinx:docset>
38
39<sphinx:schema>
40<sphinx:field name="title"/>
41<sphinx:field name="body"/>
42<sphinx:field name="namespace"/>
43<sphinx:field name="pagename"/>
44<sphinx:field name="level"/>
45<sphinx:field name="modified"/>
46<sphinx:attr name="level" type="int" bits="8" default="1"/>
47</sphinx:schema>
48';
49
50$pageMapper = new PageMapper();
51foreach($pagesList as $row){
52    $dokuPageId = $row['id'];
53    resolve_pageid('',$page,$exists);
54    if (empty($dokuPageId) || !$exists){ //do not include not exists page
55        continue;
56    }
57    //get meta data
58    $metadata = p_get_metadata($dokuPageId);
59
60    $sections = getDocumentsByHeadings($dokuPageId, $metadata);
61
62    if (!empty($sections)){
63        foreach($sections as $hid => $section){
64            //parse meta data for headers, abstract, date, authors
65            $data = array();
66            $data['id'] = crc32($dokuPageId.$hid);
67            $data['namespace'] = getCategories($dokuPageId);
68            $data['pagename'] = getPagename($dokuPageId);
69            $data['level'] = $section['level'];
70            $data['modified'] = $metadata['date']['modified'];
71            $data['title'] = strip_tags($section['title_text']);
72            $data['title_to_index'] = $section['title_to_index'];
73            $data['body'] = $section['section']; //strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info));
74
75            //convert to utf-8 encoding
76            $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
77            $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
78
79            echo formatXml($data)."\n";
80            $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid);
81        }
82    } else {
83        $data = array();
84        $data['id'] = crc32($dokuPageId);
85        $data['namespace'] = getCategories($dokuPageId);
86        $data['pagename'] = getPagename($dokuPageId);
87        $data['level'] = 1;
88        $data['modified'] = $metadata['date']['modified'];
89        $data['title'] = strip_tags($metadata['title']);
90        $data['title_to_index'] = $metadata['title'];
91        $data['body'] = io_readFile(wikiFN($dokuPageId)); //strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false));
92
93        //convert to utf-8 encoding
94        $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
95        $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
96
97        echo formatXml($data)."\n";
98        $pageMapper->add($dokuPageId, $metadata['title'], $metadata['title']);
99    }
100
101}
102echo '</sphinx:docset>';
103
104ini_set('display_errors', $deStatus);