1<?php 2/** 3 * XML feed export 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9$deStatus = ini_get('display_errors'); 10ini_set('display_errors', 0); 11/* Initialization */ 12 13if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../../'); 14if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/'); 15 16require_once(DOKU_INC.'inc/init.php'); 17require_once(DOKU_INC.'inc/common.php'); 18require_once(DOKU_INC.'inc/events.php'); 19require_once(DOKU_INC.'inc/parserutils.php'); 20require_once(DOKU_INC.'inc/feedcreator.class.php'); 21require_once(DOKU_INC.'inc/auth.php'); 22require_once(DOKU_INC.'inc/pageutils.php'); 23require_once(DOKU_INC.'inc/search.php'); 24require_once(DOKU_INC.'inc/parser/parser.php'); 25 26 27require_once(DOKU_PLUGIN.'sphinxsearch/PageMapper.php'); 28require_once(DOKU_PLUGIN.'sphinxsearch/functions.php'); 29 30if (!file_exists(DOKU_INC.$conf['savedir']."/sphinxsearch/")){ 31 mkdir(DOKU_INC.$conf['savedir']."/sphinxsearch/"); 32} 33 34$pagesList = getPagesList(); 35 36echo '<?xml version="1.0" encoding="utf-8"?> 37<sphinx:docset> 38 39<sphinx:schema> 40<sphinx:field name="title"/> 41<sphinx:field name="body"/> 42<sphinx:field name="namespace"/> 43<sphinx:field name="pagename"/> 44<sphinx:field name="level"/> 45<sphinx:field name="modified"/> 46<sphinx:attr name="level" type="int" bits="8" default="1"/> 47</sphinx:schema> 48'; 49 50$pageMapper = new PageMapper(); 51foreach($pagesList as $row){ 52 $dokuPageId = $row['id']; 53 resolve_pageid('',$page,$exists); 54 if (empty($dokuPageId) || !$exists){ //do not include not exists page 55 continue; 56 } 57 //get meta data 58 $metadata = p_get_metadata($dokuPageId); 59 60 $sections = getDocumentsByHeadings($dokuPageId, $metadata); 61 62 if (!empty($sections)){ 63 foreach($sections as $hid => $section){ 64 //parse meta data for headers, abstract, date, authors 65 $data = array(); 66 $data['id'] = crc32($dokuPageId.$hid); 67 $data['namespace'] = getCategories($dokuPageId); 68 $data['pagename'] = getPagename($dokuPageId); 69 $data['level'] = $section['level']; 70 $data['modified'] = $metadata['date']['modified']; 71 $data['title'] = strip_tags($section['title_text']); 72 $data['title_to_index'] = $section['title_to_index']; 73 $data['body'] = $section['section']; //strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info)); 74 75 //convert to utf-8 encoding 76 $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto")); 77 $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto")); 78 79 echo formatXml($data)."\n"; 80 $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid); 81 } 82 } else { 83 $data = array(); 84 $data['id'] = crc32($dokuPageId); 85 $data['namespace'] = getCategories($dokuPageId); 86 $data['pagename'] = getPagename($dokuPageId); 87 $data['level'] = 1; 88 $data['modified'] = $metadata['date']['modified']; 89 $data['title'] = strip_tags($metadata['title']); 90 $data['title_to_index'] = $metadata['title']; 91 $data['body'] = io_readFile(wikiFN($dokuPageId)); //strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false)); 92 93 //convert to utf-8 encoding 94 $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto")); 95 $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto")); 96 97 echo formatXml($data)."\n"; 98 $pageMapper->add($dokuPageId, $metadata['title'], $metadata['title']); 99 } 100 101} 102echo '</sphinx:docset>'; 103 104ini_set('display_errors', $deStatus);