1<?php 2/** 3 * XML feed export 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9 10/* Initialization */ 11 12if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../../'); 13if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/'); 14 15require_once(DOKU_INC.'inc/init.php'); 16require_once(DOKU_INC.'inc/common.php'); 17require_once(DOKU_INC.'inc/events.php'); 18require_once(DOKU_INC.'inc/parserutils.php'); 19require_once(DOKU_INC.'inc/feedcreator.class.php'); 20require_once(DOKU_INC.'inc/auth.php'); 21require_once(DOKU_INC.'inc/pageutils.php'); 22require_once(DOKU_INC.'inc/search.php'); 23require_once(DOKU_INC.'inc/parser/parser.php'); 24 25 26require_once(DOKU_PLUGIN.'sphinxsearch/PageMapper.php'); 27 28if (!file_exists(DOKU_INC.$conf['savedir']."/sphinxsearch/")){ 29 mkdir(DOKU_INC.$conf['savedir']."/sphinxsearch/"); 30} 31 32$pagesList = getPagesList(); 33 34echo '<?xml version="1.0" encoding="utf-8"?> 35<sphinx:docset> 36 37<sphinx:schema> 38<sphinx:field name="title"/> 39<sphinx:field name="body"/> 40<sphinx:field name="categories"/> 41<sphinx:field name="level"/> 42<sphinx:field name="modified"/> 43<sphinx:field name="creator"/> 44<sphinx:attr name="level" type="int" bits="8" default="1"/> 45</sphinx:schema> 46'; 47 48$pageMapper = new PageMapper(); 49 50foreach($pagesList as $row){ 51 $dokuPageId = $row['id']; 52 //get meta data 53 $metadata = p_get_metadata($dokuPageId); 54 $sections = getDocumentsByHeadings($dokuPageId, $metadata); 55 if (!empty($sections)){ 56 foreach($sections as $hid => $section){ 57 //parse meta data for headers, abstract, date, authors 58 $data = array(); 59 $data['id'] = crc32($dokuPageId.$hid); 60 $data['categories'] = getCategories($dokuPageId) . '#' . $hid; 61 $data['level'] = $section['level']; 62 $data['modified'] = $metadata['date']['modified']; 63 $data['creator'] = $metadata['creator']; 64 $data['title'] = strip_tags($section['title']); 65 $data['body'] = strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info)); 66 67 echo formatXml($data)."\n"; 68 $pageMapper->add($dokuPageId, $section['title'], $hid); 69 } 70 } else { 71 //parse meta data for headers, abstract, date, authors 72 $data = array(); 73 $data['id'] = crc32($dokuPageId); 74 $data['categories'] = getCategories($dokuPageId); 75 $data['level'] = 1; 76 $data['modified'] = $metadata['date']['modified']; 77 $data['creator'] = $metadata['creator']; 78 $data['title'] = strip_tags($metadata['title']); 79 $data['body'] = strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false)); 80 81 echo formatXml($data)."\n"; 82 $pageMapper->add($dokuPageId, $metadata['title']); 83 } 84} 85 86echo '</sphinx:docset>'; 87 88 89 90function formatXml($data) 91{ 92 $xmlFormat = ' 93<sphinx:document id="{id}"> 94<title><![CDATA[[{title}]]></title> 95<body><![CDATA[[{body}]]></body> 96<categories><![CDATA[[{categories}]]></categories> 97<level>{level}</level> 98<modified>{modified}</modified> 99<creator>{creator}</creator> 100</sphinx:document> 101 102'; 103 104 return str_replace( array('{id}', '{title}', '{body}', '{categories}', '{level}', '{modified}', '{creator}'), 105 array($data['id'], $data['title'], $data['body'], $data['categories'], 106 $data['level'], $data['modified'], $data['creator']), 107 $xmlFormat 108 ); 109} 110 111function getDocumentsByHeadings($id, $metadata) 112{ 113 if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false; 114 115 $sections = array(); 116 foreach($metadata['description']['tableofcontents'] as $row){ 117 $sections[$row['hid']] = array( 118 'section' => getSection($id, $row['title']), 119 'title' => $row['title'], 120 'level' => $row['level'] 121 ); 122 } 123 return $sections; 124} 125 126function getSection($id, $header) 127{ 128 // Create the parser 129 $Parser = & new Doku_Parser(); 130 131 // Add the Handler 132 $Parser->Handler = & new Doku_Handler(); 133 134 // Load the header mode to find headers 135 $Parser->addMode('header',new Doku_Parser_Mode_Header()); 136 137 // Load the modes which could contain markup that might be 138 // mistaken for a header 139 $Parser->addMode('listblock',new Doku_Parser_Mode_ListBlock()); 140 $Parser->addMode('preformatted',new Doku_Parser_Mode_Preformatted()); 141 $Parser->addMode('table',new Doku_Parser_Mode_Table()); 142 $Parser->addMode('unformatted',new Doku_Parser_Mode_Unformatted()); 143 $Parser->addMode('php',new Doku_Parser_Mode_PHP()); 144 $Parser->addMode('html',new Doku_Parser_Mode_HTML()); 145 $Parser->addMode('code',new Doku_Parser_Mode_Code()); 146 $Parser->addMode('file',new Doku_Parser_Mode_File()); 147 $Parser->addMode('quote',new Doku_Parser_Mode_Quote()); 148 $Parser->addMode('footnote',new Doku_Parser_Mode_Footnote()); 149 $Parser->addMode('internallink',new Doku_Parser_Mode_InternalLink()); 150 $Parser->addMode('media',new Doku_Parser_Mode_Media()); 151 $Parser->addMode('externallink',new Doku_Parser_Mode_ExternalLink()); 152 $Parser->addMode('windowssharelink',new Doku_Parser_Mode_WindowsShareLink()); 153 $Parser->addMode('filelink',new Doku_Parser_Mode_FileLink()); 154 155 // Loads the raw wiki document 156 $doc = io_readFile(wikiFN($id)); 157 158 // Get a list of instructions 159 $instructions = $Parser->parse($doc); 160 161 unset($Parser); 162 163 // Use this to watch when we're inside the section we want 164 $inSection = FALSE; 165 $startPos = 0; 166 $endPos = 0; 167 168 // Loop through the instructions 169 foreach ( $instructions as $instruction ) { 170 171 if ( !$inSection ) { 172 173 // Look for the header for the "Lists" heading 174 if ( $instruction[0] == 'header' && 175 trim($instruction[1][0]) == $header ) { 176 177 $startPos = $instruction[2]; 178 $inSection = TRUE; 179 } 180 } else { 181 182 // Look for the end of the section 183 if ( $instruction[0] == 'section_close' ) { 184 $endPos = $instruction[2]; 185 break; 186 } 187 } 188 } 189 190 // Normalize and pad the document in the same way the parse does 191 // so that byte indexes with match 192 $doc = "\n".str_replace("\r\n","\n",$doc)."\n"; 193 $section = substr($doc, $startPos, ($endPos-$startPos)); 194 195 return $section; 196} 197 198function getCategories($id) 199{ 200 if (empty($id)) return ''; 201 202 if (false === strpos($id, ":")){ 203 return $id; 204 } 205 206 $ns = explode(":", $id); 207 $nsCount = count($ns); 208 209 $result = ''; 210 do{ 211 for($i = 0; $i < $nsCount; $i++){ 212 $name = $ns[$i]; 213 $result .= $name; 214 if ($i < $nsCount - 1){ 215 $result .= ':'; 216 } 217 } 218 $result .= ' '; 219 }while($nsCount--); 220 return $result; 221} 222 223 224 /** 225 * Method return all wiki page names 226 * @global array $conf 227 * @return array 228 */ 229 function getPagesList() 230 { 231 global $conf; 232 233 $data = array(); 234 sort($data); 235 search($data,$conf['datadir'],'search_allpages','',''); 236 237 return $data; 238} 239 240/** 241 * Array 242( 243 [date] => Array 244 ( 245 [created] => 1239181434 246 [modified] => 1239202933 247 ) 248 249 [creator] => Sergey Nikolaev 250 [last_change] => Array 251 ( 252 [date] => 1239202933 253 [ip] => 85.118.229.162 254 [type] => E 255 [id] => cal:minutes:boardreader:200904:20090408 256 [user] => snikolaev 257 [sum] => 258 [extra] => 259 ) 260 261 [contributor] => Array 262 ( 263 [snikolaev] => Sergey Nikolaev 264 ) 265 266 [title] => BoardReader call of Apr 8 2009 267 [description] => Array 268 ( 269 [tableofcontents] => Array 270 ( 271 [0] => Array 272 ( 273 [hid] => boardreader_call_of_apr_8_2009 274 [title] => BoardReader call of Apr 8 2009 275 [type] => ul 276 [level] => 1 277 ) 278 279 [1] => Array 280 ( 281 [hid] => sergey 282 [title] => Sergey 283 [type] => ul 284 [level] => 2 285 ) 286 287 [2] => Array 288 ( 289 [hid] => slava 290 [title] => Slava 291 [type] => ul 292 [level] => 2 293 ) 294 295 [3] => Array 296 ( 297 [hid] => roman 298 [title] => Roman 299 [type] => ul 300 [level] => 2 301 ) 302 303 [4] => Array 304 ( 305 [hid] => nikita 306 [title] => Nikita 307 [type] => ul 308 [level] => 2 309 ) 310 311 [5] => Array 312 ( 313 [hid] => discussion 314 [title] => Discussion 315 [type] => ul 316 [level] => 2 317 ) 318 319 ) 320 321 [abstract] => Participants: Mindaugas, Sergey, Slava, Roman, Nikita 322 323Duration: 23 min 324 325Sergey 326 327Status: 328 329 * published Roman's changes 330 * started reviewing Slava's changes 331 332 333Plans: 334 335 * start altering (singature field) 336 * select server error handling 337 * publish Slava's and Roman's changes 338 ) 339 340 [internal] => Array 341 ( 342 [cache] => 1 343 [toc] => 1 344 ) 345 346) 347 348 */ 349 350