*/ /* Initialization */ if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../../'); if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/'); require_once(DOKU_INC.'inc/init.php'); require_once(DOKU_INC.'inc/common.php'); require_once(DOKU_INC.'inc/events.php'); require_once(DOKU_INC.'inc/parserutils.php'); require_once(DOKU_INC.'inc/feedcreator.class.php'); require_once(DOKU_INC.'inc/auth.php'); require_once(DOKU_INC.'inc/pageutils.php'); require_once(DOKU_INC.'inc/search.php'); require_once(DOKU_INC.'inc/parser/parser.php'); require_once(DOKU_PLUGIN.'sphinxsearch/PageMapper.php'); if (!file_exists(DOKU_INC.$conf['savedir']."/sphinxsearch/")){ mkdir(DOKU_INC.$conf['savedir']."/sphinxsearch/"); } $pagesList = getPagesList(); echo ' '; $pageMapper = new PageMapper(); foreach($pagesList as $row){ $dokuPageId = $row['id']; //get meta data $metadata = p_get_metadata($dokuPageId); $sections = getDocumentsByHeadings($dokuPageId, $metadata); if (!empty($sections)){ foreach($sections as $hid => $section){ //parse meta data for headers, abstract, date, authors $data = array(); $data['id'] = crc32($dokuPageId.$hid); $data['categories'] = getCategories($dokuPageId) . '#' . $hid; $data['level'] = $section['level']; $data['modified'] = $metadata['date']['modified']; $data['creator'] = $metadata['creator']; $data['title'] = strip_tags($section['title']); $data['body'] = strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info)); echo formatXml($data)."\n"; $pageMapper->add($dokuPageId, $section['title'], $hid); } } else { //parse meta data for headers, abstract, date, authors $data = array(); $data['id'] = crc32($dokuPageId); $data['categories'] = getCategories($dokuPageId); $data['level'] = 1; $data['modified'] = $metadata['date']['modified']; $data['creator'] = $metadata['creator']; $data['title'] = strip_tags($metadata['title']); $data['body'] = strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false)); echo formatXml($data)."\n"; $pageMapper->add($dokuPageId, $metadata['title']); } } echo ''; function formatXml($data) { $xmlFormat = ' <![CDATA[[{title}]]> {level} {modified} {creator} '; return str_replace( array('{id}', '{title}', '{body}', '{categories}', '{level}', '{modified}', '{creator}'), array($data['id'], $data['title'], $data['body'], $data['categories'], $data['level'], $data['modified'], $data['creator']), $xmlFormat ); } function getDocumentsByHeadings($id, $metadata) { if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false; $sections = array(); foreach($metadata['description']['tableofcontents'] as $row){ $sections[$row['hid']] = array( 'section' => getSection($id, $row['title']), 'title' => $row['title'], 'level' => $row['level'] ); } return $sections; } function getSection($id, $header) { // Create the parser $Parser = & new Doku_Parser(); // Add the Handler $Parser->Handler = & new Doku_Handler(); // Load the header mode to find headers $Parser->addMode('header',new Doku_Parser_Mode_Header()); // Load the modes which could contain markup that might be // mistaken for a header $Parser->addMode('listblock',new Doku_Parser_Mode_ListBlock()); $Parser->addMode('preformatted',new Doku_Parser_Mode_Preformatted()); $Parser->addMode('table',new Doku_Parser_Mode_Table()); $Parser->addMode('unformatted',new Doku_Parser_Mode_Unformatted()); $Parser->addMode('php',new Doku_Parser_Mode_PHP()); $Parser->addMode('html',new Doku_Parser_Mode_HTML()); $Parser->addMode('code',new Doku_Parser_Mode_Code()); $Parser->addMode('file',new Doku_Parser_Mode_File()); $Parser->addMode('quote',new Doku_Parser_Mode_Quote()); $Parser->addMode('footnote',new Doku_Parser_Mode_Footnote()); $Parser->addMode('internallink',new Doku_Parser_Mode_InternalLink()); $Parser->addMode('media',new Doku_Parser_Mode_Media()); $Parser->addMode('externallink',new Doku_Parser_Mode_ExternalLink()); $Parser->addMode('windowssharelink',new Doku_Parser_Mode_WindowsShareLink()); $Parser->addMode('filelink',new Doku_Parser_Mode_FileLink()); // Loads the raw wiki document $doc = io_readFile(wikiFN($id)); // Get a list of instructions $instructions = $Parser->parse($doc); unset($Parser); // Use this to watch when we're inside the section we want $inSection = FALSE; $startPos = 0; $endPos = 0; // Loop through the instructions foreach ( $instructions as $instruction ) { if ( !$inSection ) { // Look for the header for the "Lists" heading if ( $instruction[0] == 'header' && trim($instruction[1][0]) == $header ) { $startPos = $instruction[2]; $inSection = TRUE; } } else { // Look for the end of the section if ( $instruction[0] == 'section_close' ) { $endPos = $instruction[2]; break; } } } // Normalize and pad the document in the same way the parse does // so that byte indexes with match $doc = "\n".str_replace("\r\n","\n",$doc)."\n"; $section = substr($doc, $startPos, ($endPos-$startPos)); return $section; } function getCategories($id) { if (empty($id)) return ''; if (false === strpos($id, ":")){ return $id; } $ns = explode(":", $id); $nsCount = count($ns); $result = ''; do{ for($i = 0; $i < $nsCount; $i++){ $name = $ns[$i]; $result .= $name; if ($i < $nsCount - 1){ $result .= ':'; } } $result .= ' '; }while($nsCount--); return $result; } /** * Method return all wiki page names * @global array $conf * @return array */ function getPagesList() { global $conf; $data = array(); sort($data); search($data,$conf['datadir'],'search_allpages','',''); return $data; } /** * Array ( [date] => Array ( [created] => 1239181434 [modified] => 1239202933 ) [creator] => Sergey Nikolaev [last_change] => Array ( [date] => 1239202933 [ip] => 85.118.229.162 [type] => E [id] => cal:minutes:boardreader:200904:20090408 [user] => snikolaev [sum] => [extra] => ) [contributor] => Array ( [snikolaev] => Sergey Nikolaev ) [title] => BoardReader call of Apr 8 2009 [description] => Array ( [tableofcontents] => Array ( [0] => Array ( [hid] => boardreader_call_of_apr_8_2009 [title] => BoardReader call of Apr 8 2009 [type] => ul [level] => 1 ) [1] => Array ( [hid] => sergey [title] => Sergey [type] => ul [level] => 2 ) [2] => Array ( [hid] => slava [title] => Slava [type] => ul [level] => 2 ) [3] => Array ( [hid] => roman [title] => Roman [type] => ul [level] => 2 ) [4] => Array ( [hid] => nikita [title] => Nikita [type] => ul [level] => 2 ) [5] => Array ( [hid] => discussion [title] => Discussion [type] => ul [level] => 2 ) ) [abstract] => Participants: Mindaugas, Sergey, Slava, Roman, Nikita Duration: 23 min Sergey Status: * published Roman's changes * started reviewing Slava's changes Plans: * start altering (singature field) * select server error handling * publish Slava's and Roman's changes ) [internal] => Array ( [cache] => 1 [toc] => 1 ) ) */