xref: /plugin/sphinxsearch-was/xmlall.php (revision 127:2ea4a515b6ea)
1<?php
2/**
3 * XML feed export
4 *
5 * @author     Ivinco <opensource@ivinco.com>
6 */
7
8$deStatus = ini_get('display_errors');
9ini_set('display_errors', 0);
10/* Initialization */
11
12if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../../');
13if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
14
15require_once(DOKU_INC.'inc/init.php');
16require_once(DOKU_INC.'inc/common.php');
17require_once(DOKU_INC.'inc/events.php');
18require_once(DOKU_INC.'inc/parserutils.php');
19require_once(DOKU_INC.'inc/feedcreator.class.php');
20require_once(DOKU_INC.'inc/auth.php');
21require_once(DOKU_INC.'inc/pageutils.php');
22require_once(DOKU_INC.'inc/search.php');
23require_once(DOKU_INC.'inc/parser/parser.php');
24
25
26require_once(DOKU_PLUGIN.'sphinxsearch/PageMapper.php');
27require_once(DOKU_PLUGIN.'sphinxsearch/functions.php');
28
29$dataPath = fullpath($conf['savedir']);
30if (!@file_exists($dataPath)) {
31    $dataPath = fullpath(DOKU_INC . $conf['savedir']);
32    if (!@file_exists($dataPath)) die('invalid DokuWiki savedir');
33}
34$fullSphinxPath = $dataPath . '/sphinxsearch/';
35if (!@file_exists($fullSphinxPath)) {
36    mkdir($fullSphinxPath);
37}
38
39$pagesList = getPagesList();
40
41echo '<?xml version="1.0" encoding="utf-8"?>
42<sphinx:docset>
43
44<sphinx:schema>
45<sphinx:field name="title"/>
46<sphinx:field name="body"/>
47<sphinx:field name="namespace"/>
48<sphinx:field name="pagename"/>
49<sphinx:field name="level"/>
50<sphinx:field name="modified"/>
51<sphinx:attr name="level" type="int" bits="8" default="1"/>
52</sphinx:schema>
53';
54
55$pageMapper = new PageMapper();
56foreach($pagesList as $row){
57    $dokuPageId = $row['id'];
58    resolve_pageid('',$page,$exists);
59    if (empty($dokuPageId) || !$exists){ //do not include not exists page
60        continue;
61    }
62    if (!empty($conf['hidepages'])){
63        //check hidepages pattern to exclude hidden pages
64        $testName = ':'.$dokuPageId;
65        if (preg_match("/".$conf['hidepages']."/", $testName)){
66            continue;
67        }
68    }
69    //get meta data
70    $metadata = p_get_metadata($dokuPageId);
71
72    $sections = getDocumentsByHeadings($dokuPageId, $metadata);
73
74    if (!empty($sections)){
75        foreach($sections as $hid => $section){
76            if (empty($section['section'])){
77                continue;
78            }
79            //parse meta data for headers, abstract, date, authors
80            $data = array();
81            $data['id'] = sprintf('%u', crc32($dokuPageId.$hid));
82            $data['namespace'] = getCategories($dokuPageId);
83            $data['pagename'] = getPagename($dokuPageId);
84            $data['level'] = $section['level'];
85            $data['modified'] = $metadata['date']['modified'];
86            $data['title'] = strip_tags($section['title_text']);
87            $data['title_to_index'] = $section['title_to_index'];
88            $data['body'] = $section['section']; //strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info));
89
90            //convert to utf-8 encoding
91            $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
92            $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
93
94            echo formatXml($data)."\n";
95            $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid);
96        }
97    } else {
98        $data = array();
99        $data['id'] = sprintf('%u', crc32($dokuPageId.$hid));
100        $data['namespace'] = getCategories($dokuPageId);
101        $data['pagename'] = getPagename($dokuPageId);
102        $data['level'] = 1;
103        $data['modified'] = $metadata['date']['modified'];
104        $data['title'] = strip_tags($metadata['title']);
105        $data['title_to_index'] = $metadata['title'];
106        $data['body'] = io_readFile(wikiFN($dokuPageId)); //strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false));
107
108        if (empty($data['body'])){
109            continue;
110        }
111
112        //convert to utf-8 encoding
113        $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
114        $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
115
116        echo formatXml($data)."\n";
117        $pageMapper->add($dokuPageId, $metadata['title'], $metadata['title']);
118    }
119
120}
121echo '</sphinx:docset>';
122
123ini_set('display_errors', $deStatus);