1<?php 2 3namespace dokuwiki\test\Search\Collection; 4 5use dokuwiki\Search\Collection\Term; 6use dokuwiki\Search\Tokenizer; 7 8class TermTest extends \DokuWikiTest 9{ 10 public function testBasicExact() 11 { 12 $term = new Term('dokuwiki'); 13 14 $this->assertEquals('dokuwiki', $term->getOriginal()); 15 $this->assertEquals('dokuwiki', $term->getBase()); 16 $this->assertEquals('dokuwiki', $term->getQuoted()); 17 $this->assertEquals(8, $term->getLength()); 18 $this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard()); 19 } 20 21 public function testBasicLeftWildcard() 22 { 23 $term = new Term('*wiki'); 24 25 $this->assertEquals('*wiki', $term->getOriginal()); 26 $this->assertEquals('wiki', $term->getBase()); 27 $this->assertEquals('.*wiki', $term->getQuoted()); 28 $this->assertEquals(4, $term->getLength()); 29 $this->assertEquals(Term::WILDCARD_START, $term->getWildcard()); 30 } 31 32 public function testBasicRightWildcard() 33 { 34 $term = new Term('wiki*'); 35 36 $this->assertEquals('wiki*', $term->getOriginal()); 37 $this->assertEquals('wiki', $term->getBase()); 38 $this->assertEquals('wiki.*', $term->getQuoted()); 39 $this->assertEquals(4, $term->getLength()); 40 $this->assertEquals(Term::WILDCARD_END, $term->getWildcard()); 41 } 42 43 public function testBasicBothWildcard() 44 { 45 $term = new Term('*wiki*'); 46 47 $this->assertEquals('*wiki*', $term->getOriginal()); 48 $this->assertEquals('wiki', $term->getBase()); 49 $this->assertEquals('.*wiki.*', $term->getQuoted()); 50 $this->assertEquals(4, $term->getLength()); 51 $this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard()); 52 } 53 54 public function testEmptyTerm() 55 { 56 $term = new Term(''); 57 $this->assertEquals('', $term->getOriginal()); 58 $this->assertEquals('', $term->getBase()); 59 $this->assertEquals(0, $term->getLength()); 60 } 61 62 public function testAddMatch() 63 { 64 $term = new Term('dokuwiki'); 65 66 $term->addMatch('page1', 'dokuwiki', 7); 67 $term->addMatch('page1', 'dokuwiki', 7); 68 $term->addMatch('page2', 'dokuwiki', 1); 69 70 $this->assertEquals(['page1' => 14, 'page2' => 1], $term->getEntityFrequencies()); 71 $this->assertEquals(['dokuwiki'], $term->getTokens()); 72 $this->assertEquals(['page1' => ['dokuwiki'], 'page2' => ['dokuwiki']], $term->getEntityTokens()); 73 } 74 75 public function testNumericTerm() 76 { 77 // Numeric terms should be allowed even if they're shorter than minimum word length 78 $term = new Term('42'); 79 80 $this->assertEquals('42', $term->getOriginal()); 81 $this->assertEquals('42', $term->getBase()); 82 $this->assertEquals(2, $term->getLength()); 83 $this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard()); 84 } 85 86 public function testSpecialCharactersQuoting() 87 { 88 // Test that special regex characters are properly escaped 89 $term = new Term('test.doc'); 90 91 $this->assertEquals('test.doc', $term->getOriginal()); 92 $this->assertEquals('test.doc', $term->getBase()); 93 // The dot should be escaped in the quoted version 94 $this->assertEquals('test\\.doc', $term->getQuoted()); 95 } 96 97 public function testSpecialCharactersWithWildcard() 98 { 99 // Test special chars with wildcard 100 $term = new Term('test.*'); 101 102 $this->assertEquals('test.*', $term->getOriginal()); 103 $this->assertEquals('test.', $term->getBase()); 104 // The dot should be escaped, but the wildcard * should become .* 105 $this->assertEquals('test\\..*', $term->getQuoted()); 106 $this->assertEquals(Term::WILDCARD_END, $term->getWildcard()); 107 } 108 109 public function testWildcardTrimming() 110 { 111 // Test that only wildcards (not spaces) are trimmed from base 112 $term = new Term('*wiki*'); 113 114 $this->assertEquals('*wiki*', $term->getOriginal()); 115 $this->assertEquals('wiki', $term->getBase()); 116 $this->assertEquals('.*wiki.*', $term->getQuoted()); 117 $this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard()); 118 } 119 120 public function testShortTerm() 121 { 122 // Short terms are now accepted — length filtering is the caller's responsibility 123 $term = new Term('a'); 124 $this->assertEquals('a', $term->getBase()); 125 $this->assertEquals(1, $term->getLength()); 126 } 127 128 public function testOnlyWildcards() 129 { 130 // Wildcards-only terms are accepted but have an empty base 131 $term = new Term('***'); 132 $this->assertEquals('', $term->getBase()); 133 $this->assertEquals(0, $term->getLength()); 134 } 135 136 public function testFrequencyAggregationAcrossTokens() 137 { 138 // Simulate a search where term matches multiple tokens on the same entity 139 $term = new Term('*wiki*'); 140 141 $term->addMatch('page1', 'wiki', 5); 142 $term->addMatch('page1', 'dokuwiki', 3); 143 $term->addMatch('page1', 'wikitext', 2); 144 $term->addMatch('page2', 'wikipedia', 7); 145 146 $frequencies = $term->getEntityFrequencies(); 147 $this->assertEquals(10, $frequencies['page1']); // 5 + 3 + 2 148 $this->assertEquals(7, $frequencies['page2']); 149 150 // getTokens returns all unique tokens 151 $tokens = $term->getTokens(); 152 sort($tokens); 153 $this->assertEquals(['dokuwiki', 'wiki', 'wikipedia', 'wikitext'], $tokens); 154 155 // getEntityTokens returns tokens per entity 156 $entityTokens = $term->getEntityTokens(); 157 $this->assertCount(3, $entityTokens['page1']); 158 $this->assertEquals(['wikipedia'], $entityTokens['page2']); 159 160 // getMatches returns full detail 161 $matches = $term->getMatches(); 162 $this->assertEquals(['wiki' => 5, 'dokuwiki' => 3, 'wikitext' => 2], $matches['page1']); 163 $this->assertEquals(['wikipedia' => 7], $matches['page2']); 164 } 165 166 public function testZeroFrequency() 167 { 168 $term = new Term('dokuwiki'); 169 170 $term->addMatch('page1', 'dokuwiki', 5); 171 $term->addMatch('page2', 'dokuwiki', 0); 172 $term->addMatch('page3', 'dokuwiki', 3); 173 174 $frequencies = $term->getEntityFrequencies(); 175 $this->assertEquals(5, $frequencies['page1']); 176 $this->assertEquals(0, $frequencies['page2']); 177 $this->assertEquals(3, $frequencies['page3']); 178 } 179 180 public function testEmptyResults() 181 { 182 $term = new Term('dokuwiki'); 183 184 $this->assertEquals([], $term->getMatches()); 185 $this->assertEquals([], $term->getEntityFrequencies()); 186 $this->assertEquals([], $term->getEntityTokens()); 187 $this->assertEquals([], $term->getTokens()); 188 } 189 190 public function testCaseSensitiveBase() 191 { 192 // Test that case is preserved 193 $term = new Term('DokuWiki'); 194 195 $this->assertEquals('DokuWiki', $term->getOriginal()); 196 $this->assertEquals('DokuWiki', $term->getBase()); 197 } 198 199 public function testComplexRegexCharacters() 200 { 201 // Test multiple special regex characters 202 $term = new Term('test[0-9]+.txt'); 203 204 $this->assertEquals('test[0-9]+.txt', $term->getOriginal()); 205 $this->assertEquals('test[0-9]+.txt', $term->getBase()); 206 // All special characters should be escaped 207 $quoted = $term->getQuoted(); 208 $this->assertStringContainsString('\\[', $quoted); 209 $this->assertStringContainsString('\\]', $quoted); 210 $this->assertStringContainsString('\\+', $quoted); 211 $this->assertStringContainsString('\\.', $quoted); 212 } 213 214} 215