1<?php
2// 1. Start buffering to catch stray output
3ob_start();
4
5// 2. Force PHP silence for the stream
6error_reporting(0);
7ini_set('display_errors', 0);
8
9// 3. Setup DokuWiki Environment
10if (!defined('DOKU_INC')) {
11    // This moves up 3 levels from /lib/plugins/sphinxsearchwaswas/ to the root
12    define('DOKU_INC', realpath(__DIR__ . '/../../../') . '/');
13}
14
15// 4. Load DokuWiki Core
16require_once(DOKU_INC . 'inc/init.php');
17require_once(DOKU_INC . 'inc/common.php');
18require_once(DOKU_INC . 'inc/search.php');
19require_once(DOKU_INC . 'inc/parserutils.php');
20
21// 5. Load Plugin Files using absolute local paths (FIXES THE CLASS NOT FOUND ERROR)
22require_once(__DIR__ . '/PageMapper.php');
23require_once(__DIR__ . '/functions.php');
24
25// 6. Muzzle DokuWiki's Error Handler
26if (ob_get_length()) ob_end_clean();
27set_error_handler(function() { return true; });
28
29// 7. Start the clean XML stream
30echo '<?xml version="1.0" encoding="utf-8"?>' . PHP_EOL;
31echo '<sphinx:docset>' . PHP_EOL;
32echo '<sphinx:schema>
33    <sphinx:field name="title"/>
34    <sphinx:field name="body"/>
35    <sphinx:field name="namespace"/>
36    <sphinx:field name="pagename"/>
37    <sphinx:field name="level"/>
38    <sphinx:field name="modified"/>
39    <sphinx:attr name="level" type="int" bits="8" default="1"/>
40</sphinx:schema>' . PHP_EOL;
41
42try {
43    $pageMapper = new PageMapper();
44    $pagesList = getPagesList();
45
46    if (empty($pagesList)) {
47        file_put_contents('php://stderr', "Error: getPagesList() returned 0 pages. Check DOKU_INC or permissions.\n");
48        file_put_contents('php://stderr', "Data dir is: " . $conf['datadir'] . "\n");
49    }
50
51    if ($pagesList) {
52        foreach ($pagesList as $row) {
53            $dokuPageId = $row['id'];
54
55            file_put_contents('php://stderr', "Processing: $dokuPageId ... \n");
56
57            $page_file = wikiFN($dokuPageId);
58
59            if (!$dokuPageId || !@file_exists($page_file)) continue;
60
61            $metadata = p_get_metadata($dokuPageId);
62            $sections = getDocumentsByHeadings($dokuPageId, $metadata);
63            $info = [];
64
65            if (!empty($sections)) {
66                foreach ($sections as $hid => $section) {
67                    if (empty($section['section'])) continue;
68
69                    $data = [
70                        'id'             => sprintf('%u', crc32($dokuPageId . $hid) ?: 1),
71                        'namespace'      => getCategories($dokuPageId),
72                        'pagename'       => getPagename($dokuPageId),
73                        'level'          => (int)$section['level'],
74                        'modified'       => (int)($metadata['date']['modified'] ?? time()),
75                        'title'          => strip_tags($section['title_text']),
76                        'title_to_index' => $section['title_to_index'],
77                        'body'           => get_clean_text($section['section'])
78                    ];
79                    echo formatXml($data) . PHP_EOL;
80                    $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid);
81                }
82            } else {
83                $raw_content = io_readFile($page_file);
84                $data = [
85                    'id'             => sprintf('%u', crc32($dokuPageId) ?: 1),
86                    'namespace'      => getCategories($dokuPageId),
87                    'pagename'       => getPagename($dokuPageId),
88                    'level'          => 1,
89                    'modified'       => (int)($metadata['date']['modified'] ?? time()),
90                    'title'          => strip_tags($metadata['title'] ?? $dokuPageId),
91                    'title_to_index' => $metadata['title'] ?? $dokuPageId,
92                    'body'           => get_clean_text($raw_content)
93                ];
94                if (!empty(trim($data['body']))) {
95                    echo formatXml($data) . PHP_EOL;
96                    $pageMapper->add($dokuPageId, $data['title_to_index'], $data['title_to_index']);
97                }
98            }
99        }
100    }
101} catch (Throwable $e) {
102    // Log fatal errors to stderr so they don't corrupt the XML but you can still see them
103    file_put_contents('php://stderr', $e->getMessage() . "\n");
104}
105
106echo '</sphinx:docset>' . PHP_EOL;
107