10Syaroslav@ivinco.com<?php
20Syaroslav@ivinco.com/**
30Syaroslav@ivinco.com * XML feed export
40Syaroslav@ivinco.com *
5118Syaroslav * @author     Ivinco <opensource@ivinco.com>
60Syaroslav@ivinco.com */
70Syaroslav@ivinco.com
8102Syaroslav@ivinco.com$deStatus = ini_get('display_errors');
9102Syaroslav@ivinco.comini_set('display_errors', 0);
100Syaroslav@ivinco.com/* Initialization */
110Syaroslav@ivinco.com
121Syaroslav@ivinco.comif(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../../');
130Syaroslav@ivinco.comif(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
140Syaroslav@ivinco.com
150Syaroslav@ivinco.comrequire_once(DOKU_INC.'inc/init.php');
160Syaroslav@ivinco.comrequire_once(DOKU_INC.'inc/common.php');
170Syaroslav@ivinco.comrequire_once(DOKU_INC.'inc/events.php');
180Syaroslav@ivinco.comrequire_once(DOKU_INC.'inc/parserutils.php');
190Syaroslav@ivinco.comrequire_once(DOKU_INC.'inc/feedcreator.class.php');
200Syaroslav@ivinco.comrequire_once(DOKU_INC.'inc/auth.php');
210Syaroslav@ivinco.comrequire_once(DOKU_INC.'inc/pageutils.php');
220Syaroslav@ivinco.comrequire_once(DOKU_INC.'inc/search.php');
234Syaroslav@ivinco.comrequire_once(DOKU_INC.'inc/parser/parser.php');
244Syaroslav@ivinco.com
250Syaroslav@ivinco.com
260Syaroslav@ivinco.comrequire_once(DOKU_PLUGIN.'sphinxsearch/PageMapper.php');
276Syaroslav@ivinco.comrequire_once(DOKU_PLUGIN.'sphinxsearch/functions.php');
280Syaroslav@ivinco.com
29119Syaroslav$dataPath = fullpath($conf['savedir']);
30119Syaroslavif (!@file_exists($dataPath)) {
31119Syaroslav    $dataPath = fullpath(DOKU_INC . $conf['savedir']);
32119Syaroslav    if (!@file_exists($dataPath)) die('invalid DokuWiki savedir');
33119Syaroslav}
34119Syaroslav$fullSphinxPath = $dataPath . '/sphinxsearch/';
35119Syaroslavif (!@file_exists($fullSphinxPath)) {
36119Syaroslav    mkdir($fullSphinxPath);
371Syaroslav@ivinco.com}
381Syaroslav@ivinco.com
390Syaroslav@ivinco.com$pagesList = getPagesList();
400Syaroslav@ivinco.com
410Syaroslav@ivinco.comecho '<?xml version="1.0" encoding="utf-8"?>
420Syaroslav@ivinco.com<sphinx:docset>
430Syaroslav@ivinco.com
440Syaroslav@ivinco.com<sphinx:schema>
45124Syaroslav@ivinco.com<sphinx:field name="title"/>
460Syaroslav@ivinco.com<sphinx:field name="body"/>
4780Syaroslav@ivinco.com<sphinx:field name="namespace"/>
4880Syaroslav@ivinco.com<sphinx:field name="pagename"/>
494Syaroslav@ivinco.com<sphinx:field name="level"/>
500Syaroslav@ivinco.com<sphinx:field name="modified"/>
514Syaroslav@ivinco.com<sphinx:attr name="level" type="int" bits="8" default="1"/>
520Syaroslav@ivinco.com</sphinx:schema>
530Syaroslav@ivinco.com';
540Syaroslav@ivinco.com
550Syaroslav@ivinco.com$pageMapper = new PageMapper();
560Syaroslav@ivinco.comforeach($pagesList as $row){
570Syaroslav@ivinco.com    $dokuPageId = $row['id'];
5811Syaroslav@ivinco.com    resolve_pageid('',$page,$exists);
5911Syaroslav@ivinco.com    if (empty($dokuPageId) || !$exists){ //do not include not exists page
6010Syaroslav@ivinco.com        continue;
6110Syaroslav@ivinco.com    }
62*126Syaroslav@ivinco.com    $testName = ':'.$dokuPageId;
63*126Syaroslav@ivinco.com    //check hidepages pattern to exclude hidden pages
64*126Syaroslav@ivinco.com    if (preg_match("/".$conf['hidepages']."/", $testName)){
65*126Syaroslav@ivinco.com        continue;
66*126Syaroslav@ivinco.com    }
670Syaroslav@ivinco.com    //get meta data
68124Syaroslav@ivinco.com    $metadata = p_get_metadata($dokuPageId);
69124Syaroslav@ivinco.com
704Syaroslav@ivinco.com    $sections = getDocumentsByHeadings($dokuPageId, $metadata);
71124Syaroslav@ivinco.com
724Syaroslav@ivinco.com    if (!empty($sections)){
734Syaroslav@ivinco.com        foreach($sections as $hid => $section){
74124Syaroslav@ivinco.com            if (empty($section['section'])){
75124Syaroslav@ivinco.com                continue;
76124Syaroslav@ivinco.com            }
774Syaroslav@ivinco.com            //parse meta data for headers, abstract, date, authors
784Syaroslav@ivinco.com            $data = array();
79121Syaroslav@ivinco.com            $data['id'] = sprintf('%u', crc32($dokuPageId.$hid));
8080Syaroslav@ivinco.com            $data['namespace'] = getCategories($dokuPageId);
8180Syaroslav@ivinco.com            $data['pagename'] = getPagename($dokuPageId);
824Syaroslav@ivinco.com            $data['level'] = $section['level'];
834Syaroslav@ivinco.com            $data['modified'] = $metadata['date']['modified'];
8419Syaroslav@ivinco.com            $data['title'] = strip_tags($section['title_text']);
8556Syaroslav@ivinco.com            $data['title_to_index'] = $section['title_to_index'];
8682Syaroslav@ivinco.com            $data['body'] = $section['section']; //strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info));
870Syaroslav@ivinco.com
8899Syaroslav@ivinco.com            //convert to utf-8 encoding
8999Syaroslav@ivinco.com            $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
9099Syaroslav@ivinco.com            $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
9199Syaroslav@ivinco.com
924Syaroslav@ivinco.com            echo formatXml($data)."\n";
9356Syaroslav@ivinco.com            $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid);
944Syaroslav@ivinco.com        }
9543Syaroslav@ivinco.com    } else {
9643Syaroslav@ivinco.com        $data = array();
97124Syaroslav@ivinco.com        $data['id'] = sprintf('%u', crc32($dokuPageId.$hid));
9880Syaroslav@ivinco.com        $data['namespace'] = getCategories($dokuPageId);
9980Syaroslav@ivinco.com        $data['pagename'] = getPagename($dokuPageId);
10043Syaroslav@ivinco.com        $data['level'] = 1;
10143Syaroslav@ivinco.com        $data['modified'] = $metadata['date']['modified'];
10243Syaroslav@ivinco.com        $data['title'] = strip_tags($metadata['title']);
10356Syaroslav@ivinco.com        $data['title_to_index'] = $metadata['title'];
10468Syaroslav@ivinco.com        $data['body'] = io_readFile(wikiFN($dokuPageId)); //strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false));
10543Syaroslav@ivinco.com
106124Syaroslav@ivinco.com        if (empty($data['body'])){
107124Syaroslav@ivinco.com            continue;
108124Syaroslav@ivinco.com        }
109124Syaroslav@ivinco.com
11099Syaroslav@ivinco.com        //convert to utf-8 encoding
11199Syaroslav@ivinco.com        $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
11299Syaroslav@ivinco.com        $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
11399Syaroslav@ivinco.com
11443Syaroslav@ivinco.com        echo formatXml($data)."\n";
11543Syaroslav@ivinco.com        $pageMapper->add($dokuPageId, $metadata['title'], $metadata['title']);
11643Syaroslav@ivinco.com    }
117124Syaroslav@ivinco.com
1180Syaroslav@ivinco.com}
11917Syaroslav@ivinco.comecho '</sphinx:docset>';
120102Syaroslav@ivinco.com
121102Syaroslav@ivinco.comini_set('display_errors', $deStatus);