xref: /plugin/sphinxsearch-was/xmlall.php (revision 133:aa595765bbfd)
10Syaroslav@ivinco.com<?php
20Syaroslav@ivinco.com/**
30Syaroslav@ivinco.com * XML feed export
40Syaroslav@ivinco.com *
5118Syaroslav * @author     Ivinco <opensource@ivinco.com>
60Syaroslav@ivinco.com */
70Syaroslav@ivinco.com
8102Syaroslav@ivinco.com$deStatus = ini_get('display_errors');
9102Syaroslav@ivinco.comini_set('display_errors', 0);
100Syaroslav@ivinco.com/* Initialization */
110Syaroslav@ivinco.com
121Syaroslav@ivinco.comif (!defined('DOKU_INC')) define('DOKU_INC', dirname(__FILE__) . '/../../../');
130Syaroslav@ivinco.comif (!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
140Syaroslav@ivinco.com
15*133Sandreydefine('NOSESSION', true);
16*133Sandrey
170Syaroslav@ivinco.comrequire_once(DOKU_INC . 'inc/init.php');
180Syaroslav@ivinco.comrequire_once(DOKU_INC . 'inc/common.php');
190Syaroslav@ivinco.comrequire_once(DOKU_INC . 'inc/events.php');
200Syaroslav@ivinco.comrequire_once(DOKU_INC . 'inc/parserutils.php');
210Syaroslav@ivinco.com//require_once(DOKU_INC.'inc/feedcreator.class.php');
220Syaroslav@ivinco.comrequire_once(DOKU_INC . 'inc/auth.php');
230Syaroslav@ivinco.comrequire_once(DOKU_INC . 'inc/pageutils.php');
240Syaroslav@ivinco.comrequire_once(DOKU_INC . 'inc/search.php');
254Syaroslav@ivinco.comrequire_once(DOKU_INC . 'inc/parser/parser.php');
264Syaroslav@ivinco.com
270Syaroslav@ivinco.com
280Syaroslav@ivinco.comrequire_once(DOKU_PLUGIN . 'sphinxsearch/PageMapper.php');
296Syaroslav@ivinco.comrequire_once(DOKU_PLUGIN . 'sphinxsearch/functions.php');
300Syaroslav@ivinco.com
31119Syaroslav$dataPath = fullpath($conf['savedir']);
32119Syaroslavif (!@file_exists($dataPath)) {
33119Syaroslav    $dataPath = fullpath(DOKU_INC . $conf['savedir']);
34119Syaroslav    if (!@file_exists($dataPath)) die('invalid DokuWiki savedir');
35119Syaroslav}
36119Syaroslav$fullSphinxPath = $dataPath . '/sphinxsearch/';
37119Syaroslavif (!@file_exists($fullSphinxPath)) {
38119Syaroslav    mkdir($fullSphinxPath);
391Syaroslav@ivinco.com}
401Syaroslav@ivinco.com
410Syaroslav@ivinco.com$pagesList = getPagesList();
420Syaroslav@ivinco.com
430Syaroslav@ivinco.comecho '<?xml version="1.0" encoding="utf-8"?>
440Syaroslav@ivinco.com<sphinx:docset>
450Syaroslav@ivinco.com
460Syaroslav@ivinco.com<sphinx:schema>
47124Syaroslav@ivinco.com<sphinx:field name="title"/>
480Syaroslav@ivinco.com<sphinx:field name="body"/>
4980Syaroslav@ivinco.com<sphinx:field name="namespace"/>
5080Syaroslav@ivinco.com<sphinx:field name="pagename"/>
514Syaroslav@ivinco.com<sphinx:field name="level"/>
520Syaroslav@ivinco.com<sphinx:field name="modified"/>
534Syaroslav@ivinco.com<sphinx:attr name="level" type="int" bits="8" default="1"/>
540Syaroslav@ivinco.com</sphinx:schema>
550Syaroslav@ivinco.com';
560Syaroslav@ivinco.com
570Syaroslav@ivinco.com$pageMapper = new PageMapper();
580Syaroslav@ivinco.comforeach ($pagesList as $row) {
590Syaroslav@ivinco.com    $dokuPageId = $row['id'];
6011Syaroslav@ivinco.com    resolve_pageid('', $page, $exists);
6111Syaroslav@ivinco.com    if (empty($dokuPageId) || !$exists) { //do not include not exists page
6210Syaroslav@ivinco.com        continue;
6310Syaroslav@ivinco.com    }
64127Syaroslav@ivinco.com    if (!empty($conf['hidepages'])) {
65127Syaroslav@ivinco.com        //check hidepages pattern to exclude hidden pages
66127Syaroslav@ivinco.com        $testName = ':' . $dokuPageId;
67127Syaroslav@ivinco.com        if (preg_match("/" . $conf['hidepages'] . "/", $testName)) {
68127Syaroslav@ivinco.com            continue;
69127Syaroslav@ivinco.com        }
70126Syaroslav@ivinco.com    }
71130Syaroslav@ivinco.com
720Syaroslav@ivinco.com    //get meta data
73124Syaroslav@ivinco.com    $metadata = p_get_metadata($dokuPageId);
744Syaroslav@ivinco.com    $sections = getDocumentsByHeadings($dokuPageId, $metadata);
75124Syaroslav@ivinco.com
764Syaroslav@ivinco.com    if (!empty($sections)) {
774Syaroslav@ivinco.com        foreach ($sections as $hid => $section) {
78124Syaroslav@ivinco.com            if (empty($section['section'])) {
79124Syaroslav@ivinco.com                continue;
80124Syaroslav@ivinco.com            }
814Syaroslav@ivinco.com            //parse meta data for headers, abstract, date, authors
824Syaroslav@ivinco.com            $data = array();
83121Syaroslav@ivinco.com            $data['id'] = sprintf('%u', crc32($dokuPageId . $hid));
8480Syaroslav@ivinco.com            $data['namespace'] = getCategories($dokuPageId);
8580Syaroslav@ivinco.com            $data['pagename'] = getPagename($dokuPageId);
864Syaroslav@ivinco.com            $data['level'] = $section['level'];
874Syaroslav@ivinco.com            $data['modified'] = $metadata['date']['modified'];
8819Syaroslav@ivinco.com            $data['title'] = strip_tags($section['title_text']);
8956Syaroslav@ivinco.com            $data['title_to_index'] = $section['title_to_index'];
9082Syaroslav@ivinco.com            $data['body'] = $section['section']; //strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info));
910Syaroslav@ivinco.com
9299Syaroslav@ivinco.com            //convert to utf-8 encoding
9399Syaroslav@ivinco.com            $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
9499Syaroslav@ivinco.com            $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
9599Syaroslav@ivinco.com
964Syaroslav@ivinco.com            echo formatXml($data) . "\n";
9756Syaroslav@ivinco.com            $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid);
984Syaroslav@ivinco.com        }
9943Syaroslav@ivinco.com    } else {
10043Syaroslav@ivinco.com        $data = array();
101132Syaroslav@ivinco.com        $data['id'] = sprintf('%u', crc32($dokuPageId));
10280Syaroslav@ivinco.com        $data['namespace'] = getCategories($dokuPageId);
10380Syaroslav@ivinco.com        $data['pagename'] = getPagename($dokuPageId);
10443Syaroslav@ivinco.com        $data['level'] = 1;
10543Syaroslav@ivinco.com        $data['modified'] = $metadata['date']['modified'];
10643Syaroslav@ivinco.com        $data['title'] = strip_tags($metadata['title']);
10756Syaroslav@ivinco.com        $data['title_to_index'] = $metadata['title'];
10868Syaroslav@ivinco.com        $data['body'] = io_readFile(wikiFN($dokuPageId)); //strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false));
10943Syaroslav@ivinco.com
110124Syaroslav@ivinco.com        if (empty($data['body'])) {
111124Syaroslav@ivinco.com            continue;
112124Syaroslav@ivinco.com        }
113124Syaroslav@ivinco.com
11499Syaroslav@ivinco.com        //convert to utf-8 encoding
11599Syaroslav@ivinco.com        $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
11699Syaroslav@ivinco.com        $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));
11799Syaroslav@ivinco.com
11843Syaroslav@ivinco.com        echo formatXml($data) . "\n";
11943Syaroslav@ivinco.com        $pageMapper->add($dokuPageId, $metadata['title'], $metadata['title']);
12043Syaroslav@ivinco.com    }
121124Syaroslav@ivinco.com}
1220Syaroslav@ivinco.comecho '</sphinx:docset>';
12317Syaroslav@ivinco.com
124102Syaroslav@ivinco.comini_set('display_errors', $deStatus);
125*133Sandrey