1<?php
2/**
3 * SphinxSearch Wrapper for DokuWiki
4 * Advanced Renderer: Slices DokuWiki blocks at the line level and re-renders snippets
5 * to preserve GeSHi highlighting, file tabs, and media elements.
6 */
7
8class SphinxSearch
9{
10    private SphinxClient $_sphinx;
11    private array $_result = [];
12    private int $_offset = 0;
13    private int $_snippetSize = 512;
14    private int $_resultsPerPage = 10;
15    private int $_titlePriority = 1;
16    private int $_bodyPriority = 1;
17    private int $_namespacePriority = 1;
18    private int $_pagenamePriority = 1;
19    private string $_query = '';
20    private string $_index = '';
21    private string $_host = '';
22    private int $_port = 9312;
23
24    public function __construct(string $host, int $port, string $index)
25    {
26        $this->_host = $host;
27        $this->_port = $port;
28        $this->_index = $index;
29        $this->_sphinx = new SphinxClient();
30        $this->_sphinx->SetServer($this->_host, $this->_port);
31        $this->_sphinx->SetMatchMode(SPH_MATCH_EXTENDED2);
32    }
33
34    public function setSnippetSize(int $size): void { if ($size > 0) $this->_snippetSize = $size; }
35    public function setIndexTags(string $tags): void { /* Handled by DokuWiki Parser */ }
36
37    public function search(int $start, int $resultsPerPage = 10): bool
38    {
39        $this->_resultsPerPage = $resultsPerPage;
40        $this->_sphinx->SetFieldWeights([
41            'namespace' => $this->_namespacePriority,
42            'pagename' => $this->_pagenamePriority,
43            'title' => $this->_titlePriority,
44            'body' => $this->_bodyPriority
45        ]);
46        $this->_sphinx->SetLimits($start, $resultsPerPage + 50);
47        $res = $this->_sphinx->Query($this->_query, $this->_index);
48        $this->_result = $res ?: [];
49        return !empty($this->_result['matches']);
50    }
51
52    public function getPages(string $keywords): array|false
53    {
54        if (empty($this->_result['matches'])) return false;
55        $ids = $this->getPagesIds();
56        $results = [];
57        $cleanK = $this->removeStars($keywords);
58
59        foreach ($ids as $id => $data) {
60            $this->_offset++;
61            if (auth_quickaclcheck($data['page']) >= AUTH_READ) {
62                $results[$id] = [
63                    'page'              => $data['page'],
64                    'bodyExcerpt'       => $this->_generateDokuWikiSnippet($data['page'], $cleanK),
65                    'titleTextExcerpt'  => !empty($data['title_text']) ? $data['title_text'] : $data['page'],
66                    'hid'               => $data['hid'] ?? '',
67                ];
68                if (count($results) >= $this->_resultsPerPage) break;
69            }
70        }
71        return $results;
72    }
73
74    /**
75     * Mirrors DokuWiki blocks by slicing instructions and re-rendering them.
76     * This preserves blue tabs, yellow borders, and syntax colors.
77     */
78    private function _generateDokuWikiSnippet(string $pageId, string $query): string
79    {
80        $rawText = rawWiki($pageId);
81        if (empty($rawText)) return '';
82
83        $instructions = p_get_instructions($rawText);
84        $words = preg_split('/\s+/', $query, -1, PREG_SPLIT_NO_EMPTY);
85        $matchInstruction = null;
86
87        // 1. Find the instruction block containing keywords
88        foreach ($instructions as $instr) {
89            $type = $instr[0];
90            $content = '';
91            if ($type === 'code' || $type === 'file') $content = $instr[1][0];
92            elseif ($type === 'cdata') $content = $instr[1][0];
93
94            foreach ($words as $word) {
95                if (mb_stripos($content, $word) !== false) {
96                    $matchInstruction = $instr;
97                    break 2;
98                }
99            }
100        }
101
102        if (!$matchInstruction) {
103            // Fallback for simple text matches
104            $clean = strip_tags(p_render('xhtml', $instructions, $info));
105            return '<p>... ' . hsc(mb_substr($clean, 0, 250)) . ' ...</p>';
106        }
107
108        $type = $matchInstruction[0];
109        $snippetWiki = '';
110
111        // 2. Line-based slicing for Code/File blocks
112        if ($type === 'code' || $type === 'file') {
113            $codeText = $matchInstruction[1][0];
114            $lang = $matchInstruction[1][1];
115            $file = ($type === 'file') ? $matchInstruction[1][2] : '';
116
117            $lines = explode("\n", $codeText);
118            $matchIdx = 0;
119            foreach ($lines as $idx => $line) {
120                foreach ($words as $word) {
121                    if (mb_stripos($line, $word) !== false) { $matchIdx = $idx; break 2; }
122                }
123            }
124
125            // Slice window: 3 lines before, 10 lines after match
126            $start = max(0, $matchIdx - 3);
127            $slice = array_slice($lines, $start, 15);
128            $finalCode = ($start > 0 ? "... \n" : "") . implode("\n", $slice) . ($start + 15 < count($lines) ? "\n ..." : "");
129
130            if ($type === 'file') $snippetWiki = "<file $lang $file>\n$finalCode\n</file>";
131            else $snippetWiki = "<code $lang>\n$finalCode\n</code>";
132        }
133        // 3. Media Preservation
134        elseif (in_array($type, ['internalmedia', 'externalmedia'])) {
135            // If it's a video/image, we render the whole instruction
136            $snippetWiki = p_render('xhtml', [$matchInstruction], $info);
137            return $snippetWiki;
138        }
139        else {
140            $snippetWiki = $matchInstruction[1][0];
141        }
142
143        // 4. Re-render the slice using DokuWiki engine
144        $rendered = p_render('xhtml', p_get_instructions($snippetWiki), $info);
145
146        // Safe Highlight
147        foreach ($words as $word) {
148            if (mb_strlen($word) < 2) continue;
149            $q = preg_quote(hsc($word), '/');
150            $rendered = preg_replace("/(?![^<]*>)$q/iu", '<strong>$0</strong>', $rendered);
151        }
152
153        return $rendered;
154    }
155
156    public function getExcerpt(array $data, string $query): array {
157        $words = preg_split('/\s+/', $this->removeStars($query), -1, PREG_SPLIT_NO_EMPTY);
158        $res = [];
159        foreach ($data as $text) {
160            $out = hsc($text);
161            foreach ($words as $word) {
162                if (mb_strlen($word) < 2) continue;
163                $q = preg_quote(hsc($word), '/');
164                $out = preg_replace("/($q)/iu", '<strong>$1</strong>', $out);
165            }
166            $res[] = $out;
167        }
168        return $res;
169    }
170
171    public function getPagesIds(): array { return (new PageMapper())->getByCrc(array_keys($this->_result['matches'] ?? [])); }
172    public function removeStars(string $query): string { return trim(str_replace('*', '', $query)); }
173    public function starQuery(string $query): string {
174        $words = preg_split('/\s+/', $this->removeStars($query), -1, PREG_SPLIT_NO_EMPTY);
175        $starred = [];
176        foreach ($words as $w) $starred[] = (str_starts_with($w, '-') || mb_strlen($w) < 3 || str_contains($w, '"')) ? $w : "*$w*";
177        return implode(" ", $starred);
178    }
179    public function getOffset(): int { return $this->_offset; }
180    public function getError(): string { return $this->_sphinx->GetLastError(); }
181    public function getTotalFound(): int { return (int)($this->_result['total_found'] ?? 0); }
182    public function setNamespacePriority(int $p): void { $this->_namespacePriority = $p; }
183    public function setPagenamePriority(int $p): void { $this->_pagenamePriority = $p; }
184    public function setTitlePriority(int $p): void { $this->_titlePriority = $p; }
185    public function setBodyPriority(int $p): void { $this->_bodyPriority = $p; }
186    public function setSearchAllQuery(string $k, string $c): void {
187        $esc = $this->_sphinx->EscapeString($k);
188        $this->_query = "(@(body,title) $esc) | (@(namespace,pagename) " . $this->starQuery($esc) . ")";
189    }
190    public function setSearchAllQueryWithCategoryFilter(string $k, string $c): void {
191        $esc = $this->_sphinx->EscapeString($k);
192        $cat = $this->_sphinx->EscapeString($c);
193        $f = str_starts_with($c, "-") ? '-"' . substr($cat, 1) . '"' : '"' . $cat . '"';
194        $this->_query = "(@(namespace,pagename) $f) & ((@(body,title) $esc) | (@(namespace,pagename) " . $this->starQuery($esc) . "))";
195    }
196    public function setSearchOnlyPagename(): void { $this->_query = "(@pagename {$this->_query})"; }
197}
198