1<?php 2 3namespace dokuwiki\test\Search\Collection; 4 5use dokuwiki\Search\Collection\FulltextCollection; 6use dokuwiki\Search\Collection\FulltextCollectionSearch; 7use dokuwiki\Search\Collection\Term; 8use dokuwiki\Search\Exception\SearchException; 9use dokuwiki\Search\Index\MemoryIndex; 10use dokuwiki\Search\Query\QueryParser; 11use dokuwiki\Search\Tokenizer; 12 13class TermTest extends \DokuWikiTest 14{ 15 public function testBasicExact() 16 { 17 $term = new Term('dokuwiki'); 18 19 $this->assertEquals('dokuwiki', $term->getOriginal()); 20 $this->assertEquals('dokuwiki', $term->getBase()); 21 $this->assertEquals('dokuwiki', $term->getQuoted()); 22 $this->assertEquals(8, $term->getLength()); 23 $this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard()); 24 } 25 26 public function testBasicLeftWildcard() 27 { 28 $term = new Term('*wiki'); 29 30 $this->assertEquals('*wiki', $term->getOriginal()); 31 $this->assertEquals('wiki', $term->getBase()); 32 $this->assertEquals('.*wiki', $term->getQuoted()); 33 $this->assertEquals(4, $term->getLength()); 34 $this->assertEquals(Term::WILDCARD_START, $term->getWildcard()); 35 } 36 37 public function testBasicRightWildcard() 38 { 39 $term = new Term('wiki*'); 40 41 $this->assertEquals('wiki*', $term->getOriginal()); 42 $this->assertEquals('wiki', $term->getBase()); 43 $this->assertEquals('wiki.*', $term->getQuoted()); 44 $this->assertEquals(4, $term->getLength()); 45 $this->assertEquals(Term::WILDCARD_END, $term->getWildcard()); 46 } 47 48 public function testBasicBothWildcard() 49 { 50 $term = new Term('*wiki*'); 51 52 $this->assertEquals('*wiki*', $term->getOriginal()); 53 $this->assertEquals('wiki', $term->getBase()); 54 $this->assertEquals('.*wiki.*', $term->getQuoted()); 55 $this->assertEquals(4, $term->getLength()); 56 $this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard()); 57 } 58 59 public function testEmptyTerm() 60 { 61 $this->expectException(SearchException::class); 62 $this->expectExceptionMessageMatches('/short/i'); 63 new Term(''); 64 } 65 66 public function testTokenAdding() 67 { 68 $term = new Term('*wiki*'); 69 $term->addTokens(8, [0 => 'dokuwiki']); 70 $term->addTokens(5, [0 => 'wikis', 134 => 'awiki']); 71 72 $this->assertEquals(['dokuwiki', 'wikis', 'awiki'], $term->getTokens()); 73 74 $this->assertEquals([0], $term->getTokenIDsByLength(8)); 75 $this->assertEquals([0, 134], $term->getTokenIDsByLength(5)); 76 $this->assertEquals([], $term->getTokenIDsByLength(3)); 77 } 78 79 public function testFrequencyAdding() 80 { 81 $term = new Term('dokuwiki'); 82 83 $term->addEntityFrequency(7, 7); 84 $term->addEntityFrequency(7, 7); 85 $term->addEntityFrequency(8, 1); 86 87 $this->assertEquals([7 => 14, 8 => 1], $term->getEntityFrequencies()); 88 89 $map = [ 90 7 => 'page1', 91 8 => 'page2' 92 ]; 93 $term->resolveEntities($map); 94 95 $this->assertEquals(['page1' => 14, 'page2' => 1], $term->getEntityFrequencies()); 96 } 97 98 public function testNumericTerm() 99 { 100 // Numeric terms should be allowed even if they're shorter than minimum word length 101 $term = new Term('42'); 102 103 $this->assertEquals('42', $term->getOriginal()); 104 $this->assertEquals('42', $term->getBase()); 105 $this->assertEquals(2, $term->getLength()); 106 $this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard()); 107 } 108 109 public function testSpecialCharactersQuoting() 110 { 111 // Test that special regex characters are properly escaped 112 $term = new Term('test.doc'); 113 114 $this->assertEquals('test.doc', $term->getOriginal()); 115 $this->assertEquals('test.doc', $term->getBase()); 116 // The dot should be escaped in the quoted version 117 $this->assertEquals('test\\.doc', $term->getQuoted()); 118 } 119 120 public function testSpecialCharactersWithWildcard() 121 { 122 // Test special chars with wildcard 123 $term = new Term('test.*'); 124 125 $this->assertEquals('test.*', $term->getOriginal()); 126 $this->assertEquals('test.', $term->getBase()); 127 // The dot should be escaped, but the wildcard * should become .* 128 $this->assertEquals('test\\..*', $term->getQuoted()); 129 $this->assertEquals(Term::WILDCARD_END, $term->getWildcard()); 130 } 131 132 public function testWildcardTrimming() 133 { 134 // Test that only wildcards (not spaces) are trimmed from base 135 $term = new Term('*wiki*'); 136 137 $this->assertEquals('*wiki*', $term->getOriginal()); 138 $this->assertEquals('wiki', $term->getBase()); 139 $this->assertEquals('.*wiki.*', $term->getQuoted()); 140 $this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard()); 141 } 142 143 public function testTooShortTerm() 144 { 145 // Get the minimum word length 146 $minLength = Tokenizer::getMinWordLength(); 147 148 if ($minLength > 1) { 149 $this->expectException(SearchException::class); 150 $this->expectExceptionMessageMatches('/short/i'); 151 // Create a term that's too short (one character less than minimum) 152 new Term(str_repeat('a', $minLength - 1)); 153 } else { 154 // If minimum length is 1 or less, this test doesn't apply 155 $this->markTestSkipped('Minimum word length is too small for this test'); 156 } 157 } 158 159 public function testOnlyWildcards() 160 { 161 $this->expectException(SearchException::class); 162 $this->expectExceptionMessageMatches('/short/i'); 163 new Term('***'); 164 } 165 166 public function testMultipleLengthTokens() 167 { 168 $term = new Term('*wiki*'); 169 170 // Add tokens of various lengths 171 $term->addTokens(4, [10 => 'wiki', 11 => 'mwiki']); 172 $term->addTokens(8, [20 => 'dokuwiki', 21 => 'pmwiki']); 173 $term->addTokens(9, [30 => 'mediawiki']); 174 175 // Check we get all tokens 176 $allTokens = $term->getTokens(); 177 $this->assertCount(5, $allTokens); 178 $this->assertContains('wiki', $allTokens); 179 $this->assertContains('dokuwiki', $allTokens); 180 $this->assertContains('mediawiki', $allTokens); 181 182 // Check we can get tokens by specific length 183 $this->assertEquals([10, 11], $term->getTokenIDsByLength(4)); 184 $this->assertEquals([20, 21], $term->getTokenIDsByLength(8)); 185 $this->assertEquals([30], $term->getTokenIDsByLength(9)); 186 $this->assertEquals([], $term->getTokenIDsByLength(5)); 187 } 188 189 public function testFrequencyAggregationAcrossTokens() 190 { 191 // Simulate a search where term matches multiple tokens on the same entity 192 $term = new Term('*wiki*'); 193 194 // Entity 1 has multiple matching tokens 195 $term->addEntityFrequency(1, 5); // first token appears 5 times 196 $term->addEntityFrequency(1, 3); // second token appears 3 times 197 $term->addEntityFrequency(1, 2); // third token appears 2 times 198 199 // Entity 2 has one matching token 200 $term->addEntityFrequency(2, 7); 201 202 $frequencies = $term->getEntityFrequencies(); 203 $this->assertEquals(10, $frequencies[1]); // 5 + 3 + 2 204 $this->assertEquals(7, $frequencies[2]); 205 } 206 207 public function testEmptyTokensByLength() 208 { 209 $term = new Term('dokuwiki'); 210 211 // Before adding any tokens, getting by length should return empty 212 $this->assertEquals([], $term->getTokenIDsByLength(8)); 213 214 // After adding tokens, querying a non-existent length returns empty 215 $term->addTokens(4, [10 => 'wiki']); 216 $this->assertEquals([], $term->getTokenIDsByLength(8)); 217 } 218 219 public function testZeroFrequency() 220 { 221 $term = new Term('dokuwiki'); 222 223 $term->addEntityFrequency(1, 5); 224 $term->addEntityFrequency(2, 0); // Zero frequency 225 $term->addEntityFrequency(3, 3); 226 227 $frequencies = $term->getEntityFrequencies(); 228 $this->assertEquals(5, $frequencies[1]); 229 $this->assertEquals(0, $frequencies[2]); // Zero is stored 230 $this->assertEquals(3, $frequencies[3]); 231 } 232 233 public function testResolveEntitiesPartialMap() 234 { 235 $term = new Term('dokuwiki'); 236 237 $term->addEntityFrequency(1, 5); 238 $term->addEntityFrequency(2, 3); 239 240 // Resolve with partial map - only some entities are mapped 241 $map = [ 242 1 => 'page1', 243 2 => 'page2' 244 ]; 245 $term->resolveEntities($map); 246 247 $frequencies = $term->getEntityFrequencies(); 248 $this->assertEquals(5, $frequencies['page1']); 249 $this->assertEquals(3, $frequencies['page2']); 250 $this->assertCount(2, $frequencies); 251 } 252 253 public function testCaseSensitiveBase() 254 { 255 // Test that case is preserved 256 $term = new Term('DokuWiki'); 257 258 $this->assertEquals('DokuWiki', $term->getOriginal()); 259 $this->assertEquals('DokuWiki', $term->getBase()); 260 } 261 262 public function testComplexRegexCharacters() 263 { 264 // Test multiple special regex characters 265 $term = new Term('test[0-9]+.txt'); 266 267 $this->assertEquals('test[0-9]+.txt', $term->getOriginal()); 268 $this->assertEquals('test[0-9]+.txt', $term->getBase()); 269 // All special characters should be escaped 270 $quoted = $term->getQuoted(); 271 $this->assertStringContainsString('\\[', $quoted); 272 $this->assertStringContainsString('\\]', $quoted); 273 $this->assertStringContainsString('\\+', $quoted); 274 $this->assertStringContainsString('\\.', $quoted); 275 } 276 277} 278