1<?php 2 3namespace dokuwiki\test\Search\Collection; 4 5use dokuwiki\Search\Collection\CollectionSearch; 6use dokuwiki\Search\Index\MemoryIndex; 7use dokuwiki\Search\Tokenizer; 8 9class CollectionSearchTest extends \DokuWikiTest 10{ 11 12 public function testExactTerm() 13 { 14 // add some content to the indexes 15 $collection = new MockFrequencyCollection('page', 'w', 'i', 'pageword'); 16 $collection->lock(); 17 $collection->addEntity('page1', ['dokuwiki', 'dokuwiki', 'dokuwikis', 'doku', 'wiki']); 18 $collection->addEntity('page2', ['dokuwiki', 'other', 'words']); 19 $collection->unlock(); 20 21 // add search term 22 $search = new CollectionSearch($collection); 23 $term = $search->addTerm('dokuwiki'); 24 25 // execute search 26 $search->execute(); 27 28 // exact search should only match one token 29 $this->assertEquals(['dokuwiki'], $term->getTokens()); 30 // the dokuwiki token is two times on page1 and 1 time on page2 31 $this->assertEquals(['page1' => 2, 'page2' => 1], $term->getEntityFrequencies()); 32 // full detail available 33 $this->assertEquals(['dokuwiki' => 2], $term->getMatches()['page1']); 34 $this->assertEquals(['dokuwiki' => 1], $term->getMatches()['page2']); 35 36 } 37 38 public function testWildcardSearch() 39 { 40 // page1 has: dokuwiki(x2), dokuwikis, doku, wiki 41 // page2 has: dokuwiki, other, words 42 $collection = new MockFrequencyCollection('wc_page', 'wc_w', 'wc_i', 'wc_pageword'); 43 $collection->lock(); 44 $collection->addEntity('page1', ['dokuwiki', 'dokuwiki', 'dokuwikis', 'doku', 'wiki']); 45 $collection->addEntity('page2', ['dokuwiki', 'other', 'words']); 46 $collection->unlock(); 47 48 $search = new CollectionSearch($collection); 49 $endWild = $search->addTerm('doku*'); 50 $startWild = $search->addTerm('*wiki'); 51 $bothWild = $search->addTerm('*kuwi*'); 52 $search->execute(); 53 54 // doku* should match: doku(4), dokuwiki(8), dokuwikis(9) 55 $endTokens = $endWild->getTokens(); 56 sort($endTokens); 57 $this->assertEquals(['doku', 'dokuwiki', 'dokuwikis'], $endTokens); 58 // page1 has doku(1) + dokuwiki(2) + dokuwikis(1) = 4, page2 has dokuwiki(1) = 1 59 $this->assertEquals(['page1' => 4, 'page2' => 1], $endWild->getEntityFrequencies()); 60 61 // *wiki should match: dokuwiki(8), wiki(4) 62 $startTokens = $startWild->getTokens(); 63 sort($startTokens); 64 $this->assertEquals(['dokuwiki', 'wiki'], $startTokens); 65 // page1 has dokuwiki(2) + wiki(1) = 3, page2 has dokuwiki(1) = 1 66 $this->assertEquals(['page1' => 3, 'page2' => 1], $startWild->getEntityFrequencies()); 67 68 // *kuwi* should match: dokuwiki(8), dokuwikis(9) 69 $bothTokens = $bothWild->getTokens(); 70 sort($bothTokens); 71 $this->assertEquals(['dokuwiki', 'dokuwikis'], $bothTokens); 72 // page1 has dokuwiki(2) + dokuwikis(1) = 3, page2 has dokuwiki(1) = 1 73 $this->assertEquals(['page1' => 3, 'page2' => 1], $bothWild->getEntityFrequencies()); 74 } 75 76 /** 77 * Index a real text file via the Tokenizer and search it 78 */ 79 public function testTokenizedPageSearch() 80 { 81 $text = file_get_contents(__DIR__ . '/../data/searchtest.txt'); 82 $tokens = Tokenizer::getWords($text); 83 84 $collection = new MockFrequencyCollection('tp_page', 'tp_w', 'tp_i', 'tp_pageword'); 85 $collection->lock(); 86 $collection->addEntity('search:test', $tokens); 87 $collection->unlock(); 88 89 $search = new CollectionSearch($collection); 90 $exact = $search->addTerm('dokuwiki'); 91 $wild = $search->addTerm('plugin*'); 92 $search->execute(); 93 94 // "dokuwiki" appears 4 times in the text (case-insensitive tokenization) 95 $this->assertEquals(['dokuwiki'], $exact->getTokens()); 96 $this->assertEquals(['search:test' => 4], $exact->getEntityFrequencies()); 97 98 // "plugin*" should match "plugins" (7 chars) and "plugin" would be too if present 99 $wildTokens = $wild->getTokens(); 100 $this->assertContains('plugins', $wildTokens); 101 $this->assertNotEmpty($wild->getEntityFrequencies()); 102 $this->assertArrayHasKey('search:test', $wild->getEntityFrequencies()); 103 } 104 105 public function testNoMatchReturnsEmptyFrequencies() 106 { 107 $collection = new MockFrequencyCollection('nm_page', 'nm_w', 'nm_i', 'nm_pageword'); 108 $collection->lock(); 109 $collection->addEntity('page1', ['alpha', 'beta', 'gamma']); 110 $collection->unlock(); 111 112 $search = new CollectionSearch($collection); 113 $term = $search->addTerm('zzzznotfound'); 114 $search->execute(); 115 116 $this->assertEmpty($term->getTokens()); 117 $this->assertEmpty($term->getEntityFrequencies()); 118 $this->assertEmpty($term->getMatches()); 119 } 120 121 // --- metadata-style search tests (using addTerm/execute without length restrictions) --- 122 123 /** 124 * Exact search on a non-split LookupCollection 125 */ 126 public function testMetadataExact() 127 { 128 $collection = new MockLookupCollection('le_entity', 'le_token', 'le_freq', 'le_reverse'); 129 $collection->lock(); 130 $collection->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 131 $collection->addEntity('wiki:other', ['wiki:syntax']); 132 $collection->unlock(); 133 134 $search = new CollectionSearch($collection); 135 $term = $search->addTerm('wiki:syntax'); 136 $search->execute(); 137 138 $pages = array_keys($term->getEntityFrequencies()); 139 sort($pages); 140 $this->assertEquals(['wiki:other', 'wiki:start'], $pages); 141 } 142 143 /** 144 * Wildcard search on a non-split LookupCollection 145 */ 146 public function testMetadataWildcard() 147 { 148 $collection = new MockLookupCollection('lw_entity', 'lw_token', 'lw_freq', 'lw_reverse'); 149 $collection->lock(); 150 $collection->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 151 $collection->addEntity('wiki:other', ['wiki:syntax', 'other:page']); 152 $collection->unlock(); 153 154 // end wildcard: wiki:* matches wiki:syntax and wiki:welcome 155 $search = new CollectionSearch($collection); 156 $term = $search->addTerm('wiki:*'); 157 $search->execute(); 158 159 $pages = array_keys($term->getEntityFrequencies()); 160 sort($pages); 161 // wiki:start has both tokens (freq 2), wiki:other has wiki:syntax (freq 1) 162 $this->assertEquals(['wiki:other', 'wiki:start'], $pages); 163 164 // start wildcard: *syntax matches only wiki:syntax 165 $search2 = new CollectionSearch($collection); 166 $term2 = $search2->addTerm('*syntax'); 167 $search2->execute(); 168 169 $pages2 = array_keys($term2->getEntityFrequencies()); 170 sort($pages2); 171 $this->assertEquals(['wiki:other', 'wiki:start'], $pages2); 172 } 173 174 /** 175 * Case-insensitive search on a non-split LookupCollection 176 */ 177 public function testMetadataCaseInsensitive() 178 { 179 $collection = new MockLookupCollection('lc_entity', 'lc_token', 'lc_freq', 'lc_reverse'); 180 $collection->lock(); 181 $collection->addEntity('wiki:start', ['Apple', 'Banana']); 182 $collection->addEntity('wiki:other', ['Cherry', 'Apple Pie']); 183 $collection->unlock(); 184 185 $search = new CollectionSearch($collection); 186 $search->caseInsensitive(); 187 $term = $search->addTerm('*apple*'); 188 $search->execute(); 189 190 $pages = array_keys($term->getEntityFrequencies()); 191 sort($pages); 192 $this->assertEquals(['wiki:other', 'wiki:start'], $pages); 193 } 194 195 /** 196 * Search on a DirectCollection (title-style 1:1 mapping) 197 */ 198 public function testSearchOnDirectCollection() 199 { 200 $collection = new MockDirectCollection('ld_entity', 'ld_token'); 201 $collection->lock(); 202 $collection->addEntity('wiki:start', ['Welcome to DokuWiki']); 203 $collection->addEntity('wiki:syntax', ['Formatting Syntax']); 204 $collection->addEntity('wiki:other', ['Other Page']); 205 $collection->unlock(); 206 207 // exact match 208 $search = new CollectionSearch($collection); 209 $term = $search->addTerm('Welcome to DokuWiki'); 210 $search->execute(); 211 $this->assertEquals(['wiki:start'], array_keys($term->getEntityFrequencies())); 212 213 // wildcard match 214 $search2 = new CollectionSearch($collection); 215 $term2 = $search2->addTerm('*Syntax'); 216 $search2->execute(); 217 $this->assertEquals(['wiki:syntax'], array_keys($term2->getEntityFrequencies())); 218 219 // case-insensitive substring match 220 $search3 = new CollectionSearch($collection); 221 $search3->caseInsensitive(); 222 $term3 = $search3->addTerm('*wiki*'); 223 $search3->execute(); 224 $this->assertEquals(['wiki:start'], array_keys($term3->getEntityFrequencies())); 225 } 226 227 /** 228 * Multiple terms in a single search 229 */ 230 public function testMultipleTerms() 231 { 232 $collection = new MockLookupCollection('lm_entity', 'lm_token', 'lm_freq', 'lm_reverse'); 233 $collection->lock(); 234 $collection->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 235 $collection->addEntity('wiki:other', ['wiki:syntax']); 236 $collection->unlock(); 237 238 $search = new CollectionSearch($collection); 239 $term1 = $search->addTerm('wiki:syntax'); 240 $term2 = $search->addTerm('wiki:welcome'); 241 $term3 = $search->addTerm('nonexistent'); 242 $search->execute(); 243 244 $syntax = array_keys($term1->getEntityFrequencies()); 245 sort($syntax); 246 $this->assertEquals(['wiki:other', 'wiki:start'], $syntax); 247 $this->assertEquals(['wiki:start'], array_keys($term2->getEntityFrequencies())); 248 $this->assertEquals([], array_keys($term3->getEntityFrequencies())); 249 } 250 251 /** 252 * Search on a split FrequencyCollection 253 */ 254 public function testSearchOnSplitCollection() 255 { 256 $collection = new MockFrequencyCollection('ls_page', 'ls_w', 'ls_i', 'ls_pageword'); 257 $collection->lock(); 258 $collection->addEntity('page1', ['dokuwiki', 'wiki', 'doku']); 259 $collection->addEntity('page2', ['dokuwiki', 'other']); 260 $collection->unlock(); 261 262 $search = new CollectionSearch($collection); 263 $term = $search->addTerm('dokuwiki'); 264 $search->execute(); 265 266 $pages = array_keys($term->getEntityFrequencies()); 267 sort($pages); 268 $this->assertEquals(['page1', 'page2'], $pages); 269 } 270 271 /** 272 * Searching an empty collection returns no results 273 */ 274 public function testSearchEmptyCollection() 275 { 276 $collection = new MockFrequencyCollection('empty_page', 'empty_w', 'empty_i', 'empty_pw'); 277 278 $search = new CollectionSearch($collection); 279 $term = $search->addTerm('anything'); 280 $search->execute(); 281 $this->assertEquals([], $term->getEntityFrequencies()); 282 } 283 284 /** 285 * Search on an empty collection returns empty frequencies 286 */ 287 public function testSearchEmptyCollection2() 288 { 289 $collection = new MockFrequencyCollection('empty2_page', 'empty2_w', 'empty2_i', 'empty2_pw'); 290 291 $search = new CollectionSearch($collection); 292 $term = $search->addTerm('anything'); 293 $search->execute(); 294 $this->assertEquals([], $term->getEntityFrequencies()); 295 } 296} 297