xref: /plugin/sphinxsearch-was/xmlall.php (revision 133:aa595765bbfd)
1 <?php
2 /**
3  * XML feed export
4  *
5  * @author     Ivinco <opensource@ivinco.com>
6  */
7 
8 $deStatus = ini_get('display_errors');
9 ini_set('display_errors', 0);
10 /* Initialization */
11 
12 if (!defined('DOKU_INC')) define('DOKU_INC', dirname(__FILE__) . '/../../../');
13 if (!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
14 
15 define('NOSESSION', true);
16 
17 require_once(DOKU_INC . 'inc/init.php');
18 require_once(DOKU_INC . 'inc/common.php');
19 require_once(DOKU_INC . 'inc/events.php');
20 require_once(DOKU_INC . 'inc/parserutils.php');
21 //require_once(DOKU_INC.'inc/feedcreator.class.php');
22 require_once(DOKU_INC . 'inc/auth.php');
23 require_once(DOKU_INC . 'inc/pageutils.php');
24 require_once(DOKU_INC . 'inc/search.php');
25 require_once(DOKU_INC . 'inc/parser/parser.php');
26 
27 
28 require_once(DOKU_PLUGIN . 'sphinxsearch/PageMapper.php');
29 require_once(DOKU_PLUGIN . 'sphinxsearch/functions.php');
30 
31 $dataPath = fullpath($conf['savedir']);
32 if (!@file_exists($dataPath)) {
33     $dataPath = fullpath(DOKU_INC . $conf['savedir']);
34     if (!@file_exists($dataPath)) die('invalid DokuWiki savedir');
35 }
36 $fullSphinxPath = $dataPath . '/sphinxsearch/';
37 if (!@file_exists($fullSphinxPath)) {
38     mkdir($fullSphinxPath);
39 }
40 
41 $pagesList = getPagesList();
42 
43 echo '<?xml version="1.0" encoding="utf-8"?>
44 <sphinx:docset>
45 
46 <sphinx:schema>
47 <sphinx:field name="title"/>
48 <sphinx:field name="body"/>
49 <sphinx:field name="namespace"/>
50 <sphinx:field name="pagename"/>
51 <sphinx:field name="level"/>
52 <sphinx:field name="modified"/>
53 <sphinx:attr name="level" type="int" bits="8" default="1"/>
54 </sphinx:schema>
55 ';
56 
57 $pageMapper = new PageMapper();
58 foreach ($pagesList as $row) {
59     $dokuPageId = $row['id'];
60     resolve_pageid('', $page, $exists);
61     if (empty($dokuPageId) || !$exists) { //do not include not exists page
62         continue;
63     }
64     if (!empty($conf['hidepages'])) {
65         //check hidepages pattern to exclude hidden pages
66         $testName = ':' . $dokuPageId;
67         if (preg_match("/" . $conf['hidepages'] . "/", $testName)) {
68             continue;
69         }
70     }
71 
72     //get meta data
73     $metadata = p_get_metadata($dokuPageId);
74     $sections = getDocumentsByHeadings($dokuPageId, $metadata);
75 
76     if (!empty($sections)) {
77         foreach ($sections as $hid => $section) {
78             if (empty($section['section'])) {
79                 continue;
80             }
81             //parse meta data for headers, abstract, date, authors
82             $data = array();
83             $data['id'] = sprintf('%u', crc32($dokuPageId . $hid));
84             $data['namespace'] = getCategories($dokuPageId);
85             $data['pagename'] = getPagename($dokuPageId);
86             $data['level'] = $section['level'];
87             $data['modified'] = $metadata['date']['modified'];
88             $data['title'] = strip_tags($section['title_text']);
89             $data['title_to_index'] = $section['title_to_index'];
90             $data['body'] = $section['section']; //strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info));
91 
92             //convert to utf-8 encoding
93             $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
94             $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
95 
96             echo formatXml($data) . "\n";
97             $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid);
98         }
99     } else {
100         $data = array();
101         $data['id'] = sprintf('%u', crc32($dokuPageId));
102         $data['namespace'] = getCategories($dokuPageId);
103         $data['pagename'] = getPagename($dokuPageId);
104         $data['level'] = 1;
105         $data['modified'] = $metadata['date']['modified'];
106         $data['title'] = strip_tags($metadata['title']);
107         $data['title_to_index'] = $metadata['title'];
108         $data['body'] = io_readFile(wikiFN($dokuPageId)); //strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false));
109 
110         if (empty($data['body'])) {
111             continue;
112         }
113 
114         //convert to utf-8 encoding
115         $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
116         $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
117 
118         echo formatXml($data) . "\n";
119         $pageMapper->add($dokuPageId, $metadata['title'], $metadata['title']);
120     }
121 }
122 echo '</sphinx:docset>';
123 
124 ini_set('display_errors', $deStatus);
125