1<?php 2 3namespace dokuwiki\test\Search\Collection; 4 5use dokuwiki\Search\Collection\CollectionSearch; 6use dokuwiki\Search\Index\MemoryIndex; 7use dokuwiki\Search\Tokenizer; 8 9class CollectionSearchTest extends \DokuWikiTest 10{ 11 12 public function testExactTerm() 13 { 14 // add some content to the indexes 15 $collection = new MockFrequencyCollection('page', 'w', 'i', 'pageword'); 16 $collection->lock(); 17 $collection->addEntity('page1', ['dokuwiki', 'dokuwiki', 'dokuwikis', 'doku', 'wiki']); 18 $collection->addEntity('page2', ['dokuwiki', 'other', 'words']); 19 $collection->unlock(); 20 21 // add search term 22 $search = new CollectionSearch($collection); 23 $term = $search->addTerm('dokuwiki'); 24 25 // execute search 26 $search->execute(); 27 28 // inspect the term updates first: 29 30 // exact search should only match one token 31 $this->assertEquals(['dokuwiki'], $term->getTokens()); 32 // that token is 8 chars and should be the first in the index 33 $this->assertEquals([0], $term->getTokenIDsByGroup(8)); 34 // the dokuwiki token is two times on page1 and 1 time on page2 35 $this->assertEquals(['page1' => 2, 'page2' => 1], $term->getEntityFrequencies()); 36 37 // entity IDs should be available from the search 38 $this->assertEquals([0 => 'page1', 1 => 'page2'], $search->getEntities()); 39 40 } 41 42 public function testWildcardSearch() 43 { 44 // page1 has: dokuwiki(x2), dokuwikis, doku, wiki 45 // page2 has: dokuwiki, other, words 46 $collection = new MockFrequencyCollection('wc_page', 'wc_w', 'wc_i', 'wc_pageword'); 47 $collection->lock(); 48 $collection->addEntity('page1', ['dokuwiki', 'dokuwiki', 'dokuwikis', 'doku', 'wiki']); 49 $collection->addEntity('page2', ['dokuwiki', 'other', 'words']); 50 $collection->unlock(); 51 52 $search = new CollectionSearch($collection); 53 $endWild = $search->addTerm('doku*'); 54 $startWild = $search->addTerm('*wiki'); 55 $bothWild = $search->addTerm('*kuwi*'); 56 $search->execute(); 57 58 // doku* should match: doku(4), dokuwiki(8), dokuwikis(9) 59 $endTokens = $endWild->getTokens(); 60 sort($endTokens); 61 $this->assertEquals(['doku', 'dokuwiki', 'dokuwikis'], $endTokens); 62 // page1 has doku(1) + dokuwiki(2) + dokuwikis(1) = 4, page2 has dokuwiki(1) = 1 63 $this->assertEquals(['page1' => 4, 'page2' => 1], $endWild->getEntityFrequencies()); 64 65 // *wiki should match: dokuwiki(8), wiki(4) 66 $startTokens = $startWild->getTokens(); 67 sort($startTokens); 68 $this->assertEquals(['dokuwiki', 'wiki'], $startTokens); 69 // page1 has dokuwiki(2) + wiki(1) = 3, page2 has dokuwiki(1) = 1 70 $this->assertEquals(['page1' => 3, 'page2' => 1], $startWild->getEntityFrequencies()); 71 72 // *kuwi* should match: dokuwiki(8), dokuwikis(9) 73 $bothTokens = $bothWild->getTokens(); 74 sort($bothTokens); 75 $this->assertEquals(['dokuwiki', 'dokuwikis'], $bothTokens); 76 // page1 has dokuwiki(2) + dokuwikis(1) = 3, page2 has dokuwiki(1) = 1 77 $this->assertEquals(['page1' => 3, 'page2' => 1], $bothWild->getEntityFrequencies()); 78 } 79 80 /** 81 * Index a real text file via the Tokenizer and search it 82 */ 83 public function testTokenizedPageSearch() 84 { 85 $text = file_get_contents(__DIR__ . '/../data/searchtest.txt'); 86 $tokens = Tokenizer::getWords($text); 87 88 $collection = new MockFrequencyCollection('tp_page', 'tp_w', 'tp_i', 'tp_pageword'); 89 $collection->lock(); 90 $collection->addEntity('search:test', $tokens); 91 $collection->unlock(); 92 93 $search = new CollectionSearch($collection); 94 $exact = $search->addTerm('dokuwiki'); 95 $wild = $search->addTerm('plugin*'); 96 $search->execute(); 97 98 // "dokuwiki" appears 4 times in the text (case-insensitive tokenization) 99 $this->assertEquals(['dokuwiki'], $exact->getTokens()); 100 $this->assertEquals(['search:test' => 4], $exact->getEntityFrequencies()); 101 102 // "plugin*" should match "plugins" (7 chars) and "plugin" would be too if present 103 $wildTokens = $wild->getTokens(); 104 $this->assertContains('plugins', $wildTokens); 105 $this->assertNotEmpty($wild->getEntityFrequencies()); 106 $this->assertArrayHasKey('search:test', $wild->getEntityFrequencies()); 107 } 108 109 public function testNoMatchReturnsEmptyFrequencies() 110 { 111 $collection = new MockFrequencyCollection('nm_page', 'nm_w', 'nm_i', 'nm_pageword'); 112 $collection->lock(); 113 $collection->addEntity('page1', ['alpha', 'beta', 'gamma']); 114 $collection->unlock(); 115 116 $search = new CollectionSearch($collection); 117 $term = $search->addTerm('zzzznotfound'); 118 $search->execute(); 119 120 $this->assertEmpty($term->getTokens()); 121 $this->assertEmpty($term->getEntityFrequencies()); 122 $this->assertEmpty($search->getEntities()); 123 } 124 125 // --- lookup() tests --- 126 127 /** 128 * Exact lookup on a non-split LookupCollection 129 */ 130 public function testLookupExact() 131 { 132 $collection = new MockLookupCollection('le_entity', 'le_token', 'le_freq', 'le_reverse'); 133 $collection->lock(); 134 $collection->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 135 $collection->addEntity('wiki:other', ['wiki:syntax']); 136 $collection->unlock(); 137 138 $search = new CollectionSearch($collection); 139 $result = $search->lookup('wiki:syntax'); 140 141 $this->assertCount(1, $result); 142 $this->assertArrayHasKey('wiki:syntax', $result); 143 $pages = $result['wiki:syntax']; 144 sort($pages); 145 $this->assertEquals(['wiki:other', 'wiki:start'], $pages); 146 } 147 148 /** 149 * Wildcard lookup on a non-split LookupCollection 150 */ 151 public function testLookupWildcard() 152 { 153 $collection = new MockLookupCollection('lw_entity', 'lw_token', 'lw_freq', 'lw_reverse'); 154 $collection->lock(); 155 $collection->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 156 $collection->addEntity('wiki:other', ['wiki:syntax', 'other:page']); 157 $collection->unlock(); 158 159 $search = new CollectionSearch($collection); 160 161 // end wildcard: wiki:* matches wiki:syntax and wiki:welcome 162 // wiki:start has both tokens, so it appears twice; wiki:other has wiki:syntax once 163 $result = $search->lookup('wiki:*'); 164 $pages = $result['wiki:*']; 165 sort($pages); 166 $this->assertEquals(['wiki:other', 'wiki:start', 'wiki:start'], $pages); 167 168 // start wildcard: *syntax matches only wiki:syntax 169 $search2 = new CollectionSearch($collection); 170 $result2 = $search2->lookup('*syntax'); 171 $pages2 = $result2['*syntax']; 172 sort($pages2); 173 $this->assertEquals(['wiki:other', 'wiki:start'], $pages2); 174 } 175 176 /** 177 * Callback lookup on a non-split LookupCollection 178 */ 179 public function testLookupCallback() 180 { 181 $collection = new MockLookupCollection('lc_entity', 'lc_token', 'lc_freq', 'lc_reverse'); 182 $collection->lock(); 183 $collection->addEntity('wiki:start', ['Apple', 'Banana']); 184 $collection->addEntity('wiki:other', ['Cherry', 'Apple Pie']); 185 $collection->unlock(); 186 187 $search = new CollectionSearch($collection); 188 // case-insensitive substring match 189 $result = $search->lookup('apple', static fn($search, $word) => stripos($word, $search) !== false); 190 191 $pages = $result['apple']; 192 sort($pages); 193 $this->assertEquals(['wiki:other', 'wiki:start'], $pages); 194 } 195 196 /** 197 * lookup() on a DirectCollection (title-style 1:1 mapping) 198 */ 199 public function testLookupOnDirectCollection() 200 { 201 $collection = new MockDirectCollection('ld_entity', 'ld_token'); 202 $collection->lock(); 203 $collection->addEntity('wiki:start', ['Welcome to DokuWiki']); 204 $collection->addEntity('wiki:syntax', ['Formatting Syntax']); 205 $collection->addEntity('wiki:other', ['Other Page']); 206 $collection->unlock(); 207 208 $search = new CollectionSearch($collection); 209 210 // exact match 211 $result = $search->lookup('Welcome to DokuWiki'); 212 $this->assertEquals(['wiki:start'], $result['Welcome to DokuWiki']); 213 214 // wildcard match 215 $search2 = new CollectionSearch($collection); 216 $result2 = $search2->lookup('*Syntax'); 217 $this->assertEquals(['wiki:syntax'], $result2['*Syntax']); 218 219 // callback match (case-insensitive substring) 220 $search3 = new CollectionSearch($collection); 221 $result3 = $search3->lookup('wiki', static fn($s, $w) => stripos($w, $s) !== false); 222 $this->assertEquals(['wiki:start'], $result3['wiki']); 223 } 224 225 /** 226 * lookup() with multiple values 227 */ 228 public function testLookupMultipleValues() 229 { 230 $collection = new MockLookupCollection('lm_entity', 'lm_token', 'lm_freq', 'lm_reverse'); 231 $collection->lock(); 232 $collection->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 233 $collection->addEntity('wiki:other', ['wiki:syntax']); 234 $collection->unlock(); 235 236 $search = new CollectionSearch($collection); 237 $result = $search->lookup(['wiki:syntax', 'wiki:welcome', 'nonexistent']); 238 239 $syntax = $result['wiki:syntax']; 240 sort($syntax); 241 $this->assertEquals(['wiki:other', 'wiki:start'], $syntax); 242 $this->assertEquals(['wiki:start'], $result['wiki:welcome']); 243 $this->assertEquals([], $result['nonexistent']); 244 } 245 246 /** 247 * lookup() on a split FrequencyCollection 248 */ 249 public function testLookupOnSplitCollection() 250 { 251 $collection = new MockFrequencyCollection('ls_page', 'ls_w', 'ls_i', 'ls_pageword'); 252 $collection->lock(); 253 $collection->addEntity('page1', ['dokuwiki', 'wiki', 'doku']); 254 $collection->addEntity('page2', ['dokuwiki', 'other']); 255 $collection->unlock(); 256 257 $search = new CollectionSearch($collection); 258 $result = $search->lookup('dokuwiki'); 259 260 $pages = $result['dokuwiki']; 261 sort($pages); 262 $this->assertEquals(['page1', 'page2'], $pages); 263 } 264 265 /** 266 * Searching an empty collection returns no results 267 */ 268 public function testSearchEmptyCollection() 269 { 270 $collection = new MockFrequencyCollection('empty_page', 'empty_w', 'empty_i', 'empty_pw'); 271 272 $search = new CollectionSearch($collection); 273 $term = $search->addTerm('anything'); 274 $search->execute(); 275 $this->assertEquals([], $term->getEntityFrequencies()); 276 } 277 278 /** 279 * Lookup on an empty collection returns empty arrays 280 */ 281 public function testLookupEmptyCollection() 282 { 283 $collection = new MockFrequencyCollection('empty2_page', 'empty2_w', 'empty2_i', 'empty2_pw'); 284 285 $search = new CollectionSearch($collection); 286 $result = $search->lookup('anything'); 287 $this->assertEquals([], $result['anything']); 288 } 289} 290