1<?php 2/** 3 * SphinxSearch Wrapper for DokuWiki 4 * Advanced Renderer: Slices DokuWiki blocks at the line level and re-renders snippets 5 * to preserve GeSHi highlighting, file tabs, and media elements. 6 */ 7 8class SphinxSearch 9{ 10 private SphinxClient $_sphinx; 11 private array $_result = []; 12 private int $_offset = 0; 13 private int $_snippetSize = 512; 14 private int $_resultsPerPage = 10; 15 private int $_titlePriority = 1; 16 private int $_bodyPriority = 1; 17 private int $_namespacePriority = 1; 18 private int $_pagenamePriority = 1; 19 private string $_query = ''; 20 private string $_index = ''; 21 private string $_host = ''; 22 private int $_port = 9312; 23 24 public function __construct(string $host, int $port, string $index) 25 { 26 $this->_host = $host; 27 $this->_port = $port; 28 $this->_index = $index; 29 $this->_sphinx = new SphinxClient(); 30 $this->_sphinx->SetServer($this->_host, $this->_port); 31 $this->_sphinx->SetMatchMode(SPH_MATCH_EXTENDED2); 32 } 33 34 public function setSnippetSize(int $size): void { if ($size > 0) $this->_snippetSize = $size; } 35 public function setIndexTags(string $tags): void { /* Handled by DokuWiki Parser */ } 36 37 public function search(int $start, int $resultsPerPage = 10): bool 38 { 39 $this->_resultsPerPage = $resultsPerPage; 40 $this->_sphinx->SetFieldWeights([ 41 'namespace' => $this->_namespacePriority, 42 'pagename' => $this->_pagenamePriority, 43 'title' => $this->_titlePriority, 44 'body' => $this->_bodyPriority 45 ]); 46 $this->_sphinx->SetLimits($start, $resultsPerPage + 50); 47 $res = $this->_sphinx->Query($this->_query, $this->_index); 48 $this->_result = $res ?: []; 49 return !empty($this->_result['matches']); 50 } 51 52 public function getPages(string $keywords): array|false 53 { 54 if (empty($this->_result['matches'])) return false; 55 $ids = $this->getPagesIds(); 56 $results = []; 57 $cleanK = $this->removeStars($keywords); 58 59 foreach ($ids as $id => $data) { 60 $this->_offset++; 61 if (auth_quickaclcheck($data['page']) >= AUTH_READ) { 62 $results[$id] = [ 63 'page' => $data['page'], 64 'bodyExcerpt' => $this->_generateDokuWikiSnippet($data['page'], $cleanK), 65 'titleTextExcerpt' => !empty($data['title_text']) ? $data['title_text'] : $data['page'], 66 'hid' => $data['hid'] ?? '', 67 ]; 68 if (count($results) >= $this->_resultsPerPage) break; 69 } 70 } 71 return $results; 72 } 73 74 /** 75 * Mirrors DokuWiki blocks by slicing instructions and re-rendering them. 76 * This preserves blue tabs, yellow borders, and syntax colors. 77 */ 78 private function _generateDokuWikiSnippet(string $pageId, string $query): string 79 { 80 $rawText = rawWiki($pageId); 81 if (empty($rawText)) return ''; 82 83 $instructions = p_get_instructions($rawText); 84 $words = preg_split('/\s+/', $query, -1, PREG_SPLIT_NO_EMPTY); 85 $matchInstruction = null; 86 87 // 1. Find the instruction block containing keywords 88 foreach ($instructions as $instr) { 89 $type = $instr[0]; 90 $content = ''; 91 if ($type === 'code' || $type === 'file') $content = $instr[1][0]; 92 elseif ($type === 'cdata') $content = $instr[1][0]; 93 94 foreach ($words as $word) { 95 if (mb_stripos($content, $word) !== false) { 96 $matchInstruction = $instr; 97 break 2; 98 } 99 } 100 } 101 102 if (!$matchInstruction) { 103 // Fallback for simple text matches 104 $clean = strip_tags(p_render('xhtml', $instructions, $info)); 105 return '<p>... ' . hsc(mb_substr($clean, 0, 250)) . ' ...</p>'; 106 } 107 108 $type = $matchInstruction[0]; 109 $snippetWiki = ''; 110 111 // 2. Line-based slicing for Code/File blocks 112 if ($type === 'code' || $type === 'file') { 113 $codeText = $matchInstruction[1][0]; 114 $lang = $matchInstruction[1][1]; 115 $file = ($type === 'file') ? $matchInstruction[1][2] : ''; 116 117 $lines = explode("\n", $codeText); 118 $matchIdx = 0; 119 foreach ($lines as $idx => $line) { 120 foreach ($words as $word) { 121 if (mb_stripos($line, $word) !== false) { $matchIdx = $idx; break 2; } 122 } 123 } 124 125 // Slice window: 3 lines before, 10 lines after match 126 $start = max(0, $matchIdx - 3); 127 $slice = array_slice($lines, $start, 15); 128 $finalCode = ($start > 0 ? "... \n" : "") . implode("\n", $slice) . ($start + 15 < count($lines) ? "\n ..." : ""); 129 130 if ($type === 'file') $snippetWiki = "<file $lang $file>\n$finalCode\n</file>"; 131 else $snippetWiki = "<code $lang>\n$finalCode\n</code>"; 132 } 133 // 3. Media Preservation 134 elseif (in_array($type, ['internalmedia', 'externalmedia'])) { 135 // If it's a video/image, we render the whole instruction 136 $snippetWiki = p_render('xhtml', [$matchInstruction], $info); 137 return $snippetWiki; 138 } 139 else { 140 $snippetWiki = $matchInstruction[1][0]; 141 } 142 143 // 4. Re-render the slice using DokuWiki engine 144 $rendered = p_render('xhtml', p_get_instructions($snippetWiki), $info); 145 146 // Safe Highlight 147 foreach ($words as $word) { 148 if (mb_strlen($word) < 2) continue; 149 $q = preg_quote(hsc($word), '/'); 150 $rendered = preg_replace("/(?![^<]*>)$q/iu", '<strong>$0</strong>', $rendered); 151 } 152 153 return $rendered; 154 } 155 156 public function getExcerpt(array $data, string $query): array { 157 $words = preg_split('/\s+/', $this->removeStars($query), -1, PREG_SPLIT_NO_EMPTY); 158 $res = []; 159 foreach ($data as $text) { 160 $out = hsc($text); 161 foreach ($words as $word) { 162 if (mb_strlen($word) < 2) continue; 163 $q = preg_quote(hsc($word), '/'); 164 $out = preg_replace("/($q)/iu", '<strong>$1</strong>', $out); 165 } 166 $res[] = $out; 167 } 168 return $res; 169 } 170 171 public function getPagesIds(): array { return (new PageMapper())->getByCrc(array_keys($this->_result['matches'] ?? [])); } 172 public function removeStars(string $query): string { return trim(str_replace('*', '', $query)); } 173 public function starQuery(string $query): string { 174 $words = preg_split('/\s+/', $this->removeStars($query), -1, PREG_SPLIT_NO_EMPTY); 175 $starred = []; 176 foreach ($words as $w) $starred[] = (str_starts_with($w, '-') || mb_strlen($w) < 3 || str_contains($w, '"')) ? $w : "*$w*"; 177 return implode(" ", $starred); 178 } 179 public function getOffset(): int { return $this->_offset; } 180 public function getError(): string { return $this->_sphinx->GetLastError(); } 181 public function getTotalFound(): int { return (int)($this->_result['total_found'] ?? 0); } 182 public function setNamespacePriority(int $p): void { $this->_namespacePriority = $p; } 183 public function setPagenamePriority(int $p): void { $this->_pagenamePriority = $p; } 184 public function setTitlePriority(int $p): void { $this->_titlePriority = $p; } 185 public function setBodyPriority(int $p): void { $this->_bodyPriority = $p; } 186 public function setSearchAllQuery(string $k, string $c): void { 187 $esc = $this->_sphinx->EscapeString($k); 188 $this->_query = "(@(body,title) $esc) | (@(namespace,pagename) " . $this->starQuery($esc) . ")"; 189 } 190 public function setSearchAllQueryWithCategoryFilter(string $k, string $c): void { 191 $esc = $this->_sphinx->EscapeString($k); 192 $cat = $this->_sphinx->EscapeString($c); 193 $f = str_starts_with($c, "-") ? '-"' . substr($cat, 1) . '"' : '"' . $cat . '"'; 194 $this->_query = "(@(namespace,pagename) $f) & ((@(body,title) $esc) | (@(namespace,pagename) " . $this->starQuery($esc) . "))"; 195 } 196 public function setSearchOnlyPagename(): void { $this->_query = "(@pagename {$this->_query})"; } 197} 198