xref: /plugin/sphinxsearch-was/xmlall.php (revision 133:aa595765bbfd)
1<?php
2/**
3 * XML feed export
4 *
5 * @author     Ivinco <opensource@ivinco.com>
6 */
7
8$deStatus = ini_get('display_errors');
9ini_set('display_errors', 0);
10/* Initialization */
11
12if (!defined('DOKU_INC')) define('DOKU_INC', dirname(__FILE__) . '/../../../');
13if (!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
14
15define('NOSESSION', true);
16
17require_once(DOKU_INC . 'inc/init.php');
18require_once(DOKU_INC . 'inc/common.php');
19require_once(DOKU_INC . 'inc/events.php');
20require_once(DOKU_INC . 'inc/parserutils.php');
21//require_once(DOKU_INC.'inc/feedcreator.class.php');
22require_once(DOKU_INC . 'inc/auth.php');
23require_once(DOKU_INC . 'inc/pageutils.php');
24require_once(DOKU_INC . 'inc/search.php');
25require_once(DOKU_INC . 'inc/parser/parser.php');
26
27
28require_once(DOKU_PLUGIN . 'sphinxsearch/PageMapper.php');
29require_once(DOKU_PLUGIN . 'sphinxsearch/functions.php');
30
31$dataPath = fullpath($conf['savedir']);
32if (!@file_exists($dataPath)) {
33    $dataPath = fullpath(DOKU_INC . $conf['savedir']);
34    if (!@file_exists($dataPath)) die('invalid DokuWiki savedir');
35}
36$fullSphinxPath = $dataPath . '/sphinxsearch/';
37if (!@file_exists($fullSphinxPath)) {
38    mkdir($fullSphinxPath);
39}
40
41$pagesList = getPagesList();
42
43echo '<?xml version="1.0" encoding="utf-8"?>
44<sphinx:docset>
45
46<sphinx:schema>
47<sphinx:field name="title"/>
48<sphinx:field name="body"/>
49<sphinx:field name="namespace"/>
50<sphinx:field name="pagename"/>
51<sphinx:field name="level"/>
52<sphinx:field name="modified"/>
53<sphinx:attr name="level" type="int" bits="8" default="1"/>
54</sphinx:schema>
55';
56
57$pageMapper = new PageMapper();
58foreach ($pagesList as $row) {
59    $dokuPageId = $row['id'];
60    resolve_pageid('', $page, $exists);
61    if (empty($dokuPageId) || !$exists) { //do not include not exists page
62        continue;
63    }
64    if (!empty($conf['hidepages'])) {
65        //check hidepages pattern to exclude hidden pages
66        $testName = ':' . $dokuPageId;
67        if (preg_match("/" . $conf['hidepages'] . "/", $testName)) {
68            continue;
69        }
70    }
71
72    //get meta data
73    $metadata = p_get_metadata($dokuPageId);
74    $sections = getDocumentsByHeadings($dokuPageId, $metadata);
75
76    if (!empty($sections)) {
77        foreach ($sections as $hid => $section) {
78            if (empty($section['section'])) {
79                continue;
80            }
81            //parse meta data for headers, abstract, date, authors
82            $data = array();
83            $data['id'] = sprintf('%u', crc32($dokuPageId . $hid));
84            $data['namespace'] = getCategories($dokuPageId);
85            $data['pagename'] = getPagename($dokuPageId);
86            $data['level'] = $section['level'];
87            $data['modified'] = $metadata['date']['modified'];
88            $data['title'] = strip_tags($section['title_text']);
89            $data['title_to_index'] = $section['title_to_index'];
90            $data['body'] = $section['section']; //strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info));
91
92            //convert to utf-8 encoding
93            $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
94            $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
95
96            echo formatXml($data) . "\n";
97            $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid);
98        }
99    } else {
100        $data = array();
101        $data['id'] = sprintf('%u', crc32($dokuPageId));
102        $data['namespace'] = getCategories($dokuPageId);
103        $data['pagename'] = getPagename($dokuPageId);
104        $data['level'] = 1;
105        $data['modified'] = $metadata['date']['modified'];
106        $data['title'] = strip_tags($metadata['title']);
107        $data['title_to_index'] = $metadata['title'];
108        $data['body'] = io_readFile(wikiFN($dokuPageId)); //strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false));
109
110        if (empty($data['body'])) {
111            continue;
112        }
113
114        //convert to utf-8 encoding
115        $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
116        $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
117
118        echo formatXml($data) . "\n";
119        $pageMapper->add($dokuPageId, $metadata['title'], $metadata['title']);
120    }
121}
122echo '</sphinx:docset>';
123
124ini_set('display_errors', $deStatus);
125