1 <?php 2 /** 3 * XML feed export 4 * 5 * @author Ivinco <opensource@ivinco.com> 6 */ 7 8 $deStatus = ini_get('display_errors'); 9 ini_set('display_errors', 0); 10 /* Initialization */ 11 12 if (!defined('DOKU_INC')) define('DOKU_INC', dirname(__FILE__) . '/../../../'); 13 if (!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/'); 14 15 define('NOSESSION', true); 16 17 require_once(DOKU_INC . 'inc/init.php'); 18 require_once(DOKU_INC . 'inc/common.php'); 19 require_once(DOKU_INC . 'inc/events.php'); 20 require_once(DOKU_INC . 'inc/parserutils.php'); 21 //require_once(DOKU_INC.'inc/feedcreator.class.php'); 22 require_once(DOKU_INC . 'inc/auth.php'); 23 require_once(DOKU_INC . 'inc/pageutils.php'); 24 require_once(DOKU_INC . 'inc/search.php'); 25 require_once(DOKU_INC . 'inc/parser/parser.php'); 26 27 28 require_once(DOKU_PLUGIN . 'sphinxsearch/PageMapper.php'); 29 require_once(DOKU_PLUGIN . 'sphinxsearch/functions.php'); 30 31 $dataPath = fullpath($conf['savedir']); 32 if (!@file_exists($dataPath)) { 33 $dataPath = fullpath(DOKU_INC . $conf['savedir']); 34 if (!@file_exists($dataPath)) die('invalid DokuWiki savedir'); 35 } 36 $fullSphinxPath = $dataPath . '/sphinxsearch/'; 37 if (!@file_exists($fullSphinxPath)) { 38 mkdir($fullSphinxPath); 39 } 40 41 $pagesList = getPagesList(); 42 43 echo '<?xml version="1.0" encoding="utf-8"?> 44 <sphinx:docset> 45 46 <sphinx:schema> 47 <sphinx:field name="title"/> 48 <sphinx:field name="body"/> 49 <sphinx:field name="namespace"/> 50 <sphinx:field name="pagename"/> 51 <sphinx:field name="level"/> 52 <sphinx:field name="modified"/> 53 <sphinx:attr name="level" type="int" bits="8" default="1"/> 54 </sphinx:schema> 55 '; 56 57 $pageMapper = new PageMapper(); 58 foreach ($pagesList as $row) { 59 $dokuPageId = $row['id']; 60 resolve_pageid('', $page, $exists); 61 if (empty($dokuPageId) || !$exists) { //do not include not exists page 62 continue; 63 } 64 if (!empty($conf['hidepages'])) { 65 //check hidepages pattern to exclude hidden pages 66 $testName = ':' . $dokuPageId; 67 if (preg_match("/" . $conf['hidepages'] . "/", $testName)) { 68 continue; 69 } 70 } 71 72 //get meta data 73 $metadata = p_get_metadata($dokuPageId); 74 $sections = getDocumentsByHeadings($dokuPageId, $metadata); 75 76 if (!empty($sections)) { 77 foreach ($sections as $hid => $section) { 78 if (empty($section['section'])) { 79 continue; 80 } 81 //parse meta data for headers, abstract, date, authors 82 $data = array(); 83 $data['id'] = sprintf('%u', crc32($dokuPageId . $hid)); 84 $data['namespace'] = getCategories($dokuPageId); 85 $data['pagename'] = getPagename($dokuPageId); 86 $data['level'] = $section['level']; 87 $data['modified'] = $metadata['date']['modified']; 88 $data['title'] = strip_tags($section['title_text']); 89 $data['title_to_index'] = $section['title_to_index']; 90 $data['body'] = $section['section']; //strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info)); 91 92 //convert to utf-8 encoding 93 $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto")); 94 $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto")); 95 96 echo formatXml($data) . "\n"; 97 $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid); 98 } 99 } else { 100 $data = array(); 101 $data['id'] = sprintf('%u', crc32($dokuPageId)); 102 $data['namespace'] = getCategories($dokuPageId); 103 $data['pagename'] = getPagename($dokuPageId); 104 $data['level'] = 1; 105 $data['modified'] = $metadata['date']['modified']; 106 $data['title'] = strip_tags($metadata['title']); 107 $data['title_to_index'] = $metadata['title']; 108 $data['body'] = io_readFile(wikiFN($dokuPageId)); //strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false)); 109 110 if (empty($data['body'])) { 111 continue; 112 } 113 114 //convert to utf-8 encoding 115 $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto")); 116 $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto")); 117 118 echo formatXml($data) . "\n"; 119 $pageMapper->add($dokuPageId, $metadata['title'], $metadata['title']); 120 } 121 } 122 echo '</sphinx:docset>'; 123 124 ini_set('display_errors', $deStatus); 125