<?php
// 1. Start buffering to catch stray output
ob_start();

// 2. Force PHP silence for the stream
error_reporting(0);
ini_set('display_errors', 0);

// 3. Setup DokuWiki Environment
if (!defined('DOKU_INC')) {
    // This moves up 3 levels from /lib/plugins/sphinxsearchwaswas/ to the root
    define('DOKU_INC', realpath(__DIR__ . '/../../../') . '/');
}

// 4. Load DokuWiki Core
require_once(DOKU_INC . 'inc/init.php');
require_once(DOKU_INC . 'inc/common.php');
require_once(DOKU_INC . 'inc/search.php');
require_once(DOKU_INC . 'inc/parserutils.php');

// 5. Load Plugin Files using absolute local paths (FIXES THE CLASS NOT FOUND ERROR)
require_once(__DIR__ . '/PageMapper.php');
require_once(__DIR__ . '/functions.php');

// 6. Muzzle DokuWiki's Error Handler
if (ob_get_length()) ob_end_clean();
set_error_handler(function() { return true; });

// 7. Start the clean XML stream
echo '<?xml version="1.0" encoding="utf-8"?>' . PHP_EOL;
echo '<sphinx:docset>' . PHP_EOL;
echo '<sphinx:schema>
    <sphinx:field name="title"/>
    <sphinx:field name="body"/>
    <sphinx:field name="namespace"/>
    <sphinx:field name="pagename"/>
    <sphinx:field name="level"/>
    <sphinx:field name="modified"/>
    <sphinx:attr name="level" type="int" bits="8" default="1"/>
</sphinx:schema>' . PHP_EOL;

try {
    $pageMapper = new PageMapper();
    $pagesList = getPagesList();

    if (empty($pagesList)) {
        file_put_contents('php://stderr', "Error: getPagesList() returned 0 pages. Check DOKU_INC or permissions.\n");
        file_put_contents('php://stderr', "Data dir is: " . $conf['datadir'] . "\n");
    }

    if ($pagesList) {
        foreach ($pagesList as $row) {
            $dokuPageId = $row['id'];
            
            file_put_contents('php://stderr', "Processing: $dokuPageId ... \n");
            
            $page_file = wikiFN($dokuPageId);

            if (!$dokuPageId || !@file_exists($page_file)) continue;

            $metadata = p_get_metadata($dokuPageId);
            $sections = getDocumentsByHeadings($dokuPageId, $metadata);
            $info = [];

            if (!empty($sections)) {
                foreach ($sections as $hid => $section) {
                    if (empty($section['section'])) continue;

                    $data = [
                        'id'             => sprintf('%u', crc32($dokuPageId . $hid) ?: 1),
                        'namespace'      => getCategories($dokuPageId),
                        'pagename'       => getPagename($dokuPageId),
                        'level'          => (int)$section['level'],
                        'modified'       => (int)($metadata['date']['modified'] ?? time()),
                        'title'          => strip_tags($section['title_text']),
                        'title_to_index' => $section['title_to_index'],
                        'body'           => get_clean_text($section['section'])
                    ];
                    echo formatXml($data) . PHP_EOL;
                    $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid);
                }
            } else {
                $raw_content = io_readFile($page_file);
                $data = [
                    'id'             => sprintf('%u', crc32($dokuPageId) ?: 1),
                    'namespace'      => getCategories($dokuPageId),
                    'pagename'       => getPagename($dokuPageId),
                    'level'          => 1,
                    'modified'       => (int)($metadata['date']['modified'] ?? time()),
                    'title'          => strip_tags($metadata['title'] ?? $dokuPageId),
                    'title_to_index' => $metadata['title'] ?? $dokuPageId,
                    'body'           => get_clean_text($raw_content)
                ];
                if (!empty(trim($data['body']))) {
                    echo formatXml($data) . PHP_EOL;
                    $pageMapper->add($dokuPageId, $data['title_to_index'], $data['title_to_index']);
                }
            }
        }
    }
} catch (Throwable $e) {
    // Log fatal errors to stderr so they don't corrupt the XML but you can still see them
    file_put_contents('php://stderr', $e->getMessage() . "\n");
}

echo '</sphinx:docset>' . PHP_EOL;
