1ede46466SAndreas Gohr<?php 2ede46466SAndreas Gohr 3ede46466SAndreas Gohrnamespace dokuwiki\test\Search\Collection; 4ede46466SAndreas Gohr 5ede46466SAndreas Gohruse dokuwiki\Search\Collection\Term; 6ede46466SAndreas Gohruse dokuwiki\Search\Tokenizer; 7ede46466SAndreas Gohr 8ede46466SAndreas Gohrclass TermTest extends \DokuWikiTest 9ede46466SAndreas Gohr{ 10ede46466SAndreas Gohr public function testBasicExact() 11ede46466SAndreas Gohr { 12ede46466SAndreas Gohr $term = new Term('dokuwiki'); 13ede46466SAndreas Gohr 14ede46466SAndreas Gohr $this->assertEquals('dokuwiki', $term->getOriginal()); 15ede46466SAndreas Gohr $this->assertEquals('dokuwiki', $term->getBase()); 16ede46466SAndreas Gohr $this->assertEquals('dokuwiki', $term->getQuoted()); 17ede46466SAndreas Gohr $this->assertEquals(8, $term->getLength()); 18ede46466SAndreas Gohr $this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard()); 19ede46466SAndreas Gohr } 20ede46466SAndreas Gohr 21ede46466SAndreas Gohr public function testBasicLeftWildcard() 22ede46466SAndreas Gohr { 23ede46466SAndreas Gohr $term = new Term('*wiki'); 24ede46466SAndreas Gohr 25ede46466SAndreas Gohr $this->assertEquals('*wiki', $term->getOriginal()); 26ede46466SAndreas Gohr $this->assertEquals('wiki', $term->getBase()); 27ede46466SAndreas Gohr $this->assertEquals('.*wiki', $term->getQuoted()); 28ede46466SAndreas Gohr $this->assertEquals(4, $term->getLength()); 29ede46466SAndreas Gohr $this->assertEquals(Term::WILDCARD_START, $term->getWildcard()); 30ede46466SAndreas Gohr } 31ede46466SAndreas Gohr 32ede46466SAndreas Gohr public function testBasicRightWildcard() 33ede46466SAndreas Gohr { 34ede46466SAndreas Gohr $term = new Term('wiki*'); 35ede46466SAndreas Gohr 36ede46466SAndreas Gohr $this->assertEquals('wiki*', $term->getOriginal()); 37ede46466SAndreas Gohr $this->assertEquals('wiki', $term->getBase()); 38ede46466SAndreas Gohr $this->assertEquals('wiki.*', $term->getQuoted()); 39ede46466SAndreas Gohr $this->assertEquals(4, $term->getLength()); 40ede46466SAndreas Gohr $this->assertEquals(Term::WILDCARD_END, $term->getWildcard()); 41ede46466SAndreas Gohr } 42ede46466SAndreas Gohr 43ede46466SAndreas Gohr public function testBasicBothWildcard() 44ede46466SAndreas Gohr { 45ede46466SAndreas Gohr $term = new Term('*wiki*'); 46ede46466SAndreas Gohr 47ede46466SAndreas Gohr $this->assertEquals('*wiki*', $term->getOriginal()); 48ede46466SAndreas Gohr $this->assertEquals('wiki', $term->getBase()); 49ede46466SAndreas Gohr $this->assertEquals('.*wiki.*', $term->getQuoted()); 50ede46466SAndreas Gohr $this->assertEquals(4, $term->getLength()); 51ede46466SAndreas Gohr $this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard()); 52ede46466SAndreas Gohr } 53ede46466SAndreas Gohr 54ede46466SAndreas Gohr public function testEmptyTerm() 55ede46466SAndreas Gohr { 56*1148921dSAndreas Gohr $term = new Term(''); 57*1148921dSAndreas Gohr $this->assertEquals('', $term->getOriginal()); 58*1148921dSAndreas Gohr $this->assertEquals('', $term->getBase()); 59*1148921dSAndreas Gohr $this->assertEquals(0, $term->getLength()); 60ede46466SAndreas Gohr } 61ede46466SAndreas Gohr 62*1148921dSAndreas Gohr public function testAddMatch() 63ede46466SAndreas Gohr { 64ede46466SAndreas Gohr $term = new Term('dokuwiki'); 65ede46466SAndreas Gohr 66*1148921dSAndreas Gohr $term->addMatch('page1', 'dokuwiki', 7); 67*1148921dSAndreas Gohr $term->addMatch('page1', 'dokuwiki', 7); 68*1148921dSAndreas Gohr $term->addMatch('page2', 'dokuwiki', 1); 69ede46466SAndreas Gohr 70ede46466SAndreas Gohr $this->assertEquals(['page1' => 14, 'page2' => 1], $term->getEntityFrequencies()); 71*1148921dSAndreas Gohr $this->assertEquals(['dokuwiki'], $term->getTokens()); 72*1148921dSAndreas Gohr $this->assertEquals(['page1' => ['dokuwiki'], 'page2' => ['dokuwiki']], $term->getEntityTokens()); 73ede46466SAndreas Gohr } 74ede46466SAndreas Gohr 75ede46466SAndreas Gohr public function testNumericTerm() 76ede46466SAndreas Gohr { 77ede46466SAndreas Gohr // Numeric terms should be allowed even if they're shorter than minimum word length 78ede46466SAndreas Gohr $term = new Term('42'); 79ede46466SAndreas Gohr 80ede46466SAndreas Gohr $this->assertEquals('42', $term->getOriginal()); 81ede46466SAndreas Gohr $this->assertEquals('42', $term->getBase()); 82ede46466SAndreas Gohr $this->assertEquals(2, $term->getLength()); 83ede46466SAndreas Gohr $this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard()); 84ede46466SAndreas Gohr } 85ede46466SAndreas Gohr 86ede46466SAndreas Gohr public function testSpecialCharactersQuoting() 87ede46466SAndreas Gohr { 88ede46466SAndreas Gohr // Test that special regex characters are properly escaped 89ede46466SAndreas Gohr $term = new Term('test.doc'); 90ede46466SAndreas Gohr 91ede46466SAndreas Gohr $this->assertEquals('test.doc', $term->getOriginal()); 92ede46466SAndreas Gohr $this->assertEquals('test.doc', $term->getBase()); 93ede46466SAndreas Gohr // The dot should be escaped in the quoted version 94ede46466SAndreas Gohr $this->assertEquals('test\\.doc', $term->getQuoted()); 95ede46466SAndreas Gohr } 96ede46466SAndreas Gohr 97ede46466SAndreas Gohr public function testSpecialCharactersWithWildcard() 98ede46466SAndreas Gohr { 99ede46466SAndreas Gohr // Test special chars with wildcard 100ede46466SAndreas Gohr $term = new Term('test.*'); 101ede46466SAndreas Gohr 102ede46466SAndreas Gohr $this->assertEquals('test.*', $term->getOriginal()); 103ede46466SAndreas Gohr $this->assertEquals('test.', $term->getBase()); 104ede46466SAndreas Gohr // The dot should be escaped, but the wildcard * should become .* 105ede46466SAndreas Gohr $this->assertEquals('test\\..*', $term->getQuoted()); 106ede46466SAndreas Gohr $this->assertEquals(Term::WILDCARD_END, $term->getWildcard()); 107ede46466SAndreas Gohr } 108ede46466SAndreas Gohr 109ede46466SAndreas Gohr public function testWildcardTrimming() 110ede46466SAndreas Gohr { 111ede46466SAndreas Gohr // Test that only wildcards (not spaces) are trimmed from base 112ede46466SAndreas Gohr $term = new Term('*wiki*'); 113ede46466SAndreas Gohr 114ede46466SAndreas Gohr $this->assertEquals('*wiki*', $term->getOriginal()); 115ede46466SAndreas Gohr $this->assertEquals('wiki', $term->getBase()); 116ede46466SAndreas Gohr $this->assertEquals('.*wiki.*', $term->getQuoted()); 117ede46466SAndreas Gohr $this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard()); 118ede46466SAndreas Gohr } 119ede46466SAndreas Gohr 120*1148921dSAndreas Gohr public function testShortTerm() 121ede46466SAndreas Gohr { 122*1148921dSAndreas Gohr // Short terms are now accepted — length filtering is the caller's responsibility 123*1148921dSAndreas Gohr $term = new Term('a'); 124*1148921dSAndreas Gohr $this->assertEquals('a', $term->getBase()); 125*1148921dSAndreas Gohr $this->assertEquals(1, $term->getLength()); 126ede46466SAndreas Gohr } 127ede46466SAndreas Gohr 128ede46466SAndreas Gohr public function testOnlyWildcards() 129ede46466SAndreas Gohr { 130*1148921dSAndreas Gohr // Wildcards-only terms are accepted but have an empty base 131*1148921dSAndreas Gohr $term = new Term('***'); 132*1148921dSAndreas Gohr $this->assertEquals('', $term->getBase()); 133*1148921dSAndreas Gohr $this->assertEquals(0, $term->getLength()); 134ede46466SAndreas Gohr } 135ede46466SAndreas Gohr 136ede46466SAndreas Gohr public function testFrequencyAggregationAcrossTokens() 137ede46466SAndreas Gohr { 138ede46466SAndreas Gohr // Simulate a search where term matches multiple tokens on the same entity 139ede46466SAndreas Gohr $term = new Term('*wiki*'); 140ede46466SAndreas Gohr 141*1148921dSAndreas Gohr $term->addMatch('page1', 'wiki', 5); 142*1148921dSAndreas Gohr $term->addMatch('page1', 'dokuwiki', 3); 143*1148921dSAndreas Gohr $term->addMatch('page1', 'wikitext', 2); 144*1148921dSAndreas Gohr $term->addMatch('page2', 'wikipedia', 7); 145ede46466SAndreas Gohr 146ede46466SAndreas Gohr $frequencies = $term->getEntityFrequencies(); 147*1148921dSAndreas Gohr $this->assertEquals(10, $frequencies['page1']); // 5 + 3 + 2 148*1148921dSAndreas Gohr $this->assertEquals(7, $frequencies['page2']); 149ede46466SAndreas Gohr 150*1148921dSAndreas Gohr // getTokens returns all unique tokens 151*1148921dSAndreas Gohr $tokens = $term->getTokens(); 152*1148921dSAndreas Gohr sort($tokens); 153*1148921dSAndreas Gohr $this->assertEquals(['dokuwiki', 'wiki', 'wikipedia', 'wikitext'], $tokens); 154ede46466SAndreas Gohr 155*1148921dSAndreas Gohr // getEntityTokens returns tokens per entity 156*1148921dSAndreas Gohr $entityTokens = $term->getEntityTokens(); 157*1148921dSAndreas Gohr $this->assertCount(3, $entityTokens['page1']); 158*1148921dSAndreas Gohr $this->assertEquals(['wikipedia'], $entityTokens['page2']); 159ede46466SAndreas Gohr 160*1148921dSAndreas Gohr // getMatches returns full detail 161*1148921dSAndreas Gohr $matches = $term->getMatches(); 162*1148921dSAndreas Gohr $this->assertEquals(['wiki' => 5, 'dokuwiki' => 3, 'wikitext' => 2], $matches['page1']); 163*1148921dSAndreas Gohr $this->assertEquals(['wikipedia' => 7], $matches['page2']); 164ede46466SAndreas Gohr } 165ede46466SAndreas Gohr 166ede46466SAndreas Gohr public function testZeroFrequency() 167ede46466SAndreas Gohr { 168ede46466SAndreas Gohr $term = new Term('dokuwiki'); 169ede46466SAndreas Gohr 170*1148921dSAndreas Gohr $term->addMatch('page1', 'dokuwiki', 5); 171*1148921dSAndreas Gohr $term->addMatch('page2', 'dokuwiki', 0); 172*1148921dSAndreas Gohr $term->addMatch('page3', 'dokuwiki', 3); 173ede46466SAndreas Gohr 174ede46466SAndreas Gohr $frequencies = $term->getEntityFrequencies(); 175ede46466SAndreas Gohr $this->assertEquals(5, $frequencies['page1']); 176*1148921dSAndreas Gohr $this->assertEquals(0, $frequencies['page2']); 177*1148921dSAndreas Gohr $this->assertEquals(3, $frequencies['page3']); 178*1148921dSAndreas Gohr } 179*1148921dSAndreas Gohr 180*1148921dSAndreas Gohr public function testEmptyResults() 181*1148921dSAndreas Gohr { 182*1148921dSAndreas Gohr $term = new Term('dokuwiki'); 183*1148921dSAndreas Gohr 184*1148921dSAndreas Gohr $this->assertEquals([], $term->getMatches()); 185*1148921dSAndreas Gohr $this->assertEquals([], $term->getEntityFrequencies()); 186*1148921dSAndreas Gohr $this->assertEquals([], $term->getEntityTokens()); 187*1148921dSAndreas Gohr $this->assertEquals([], $term->getTokens()); 188ede46466SAndreas Gohr } 189ede46466SAndreas Gohr 190ede46466SAndreas Gohr public function testCaseSensitiveBase() 191ede46466SAndreas Gohr { 192ede46466SAndreas Gohr // Test that case is preserved 193ede46466SAndreas Gohr $term = new Term('DokuWiki'); 194ede46466SAndreas Gohr 195ede46466SAndreas Gohr $this->assertEquals('DokuWiki', $term->getOriginal()); 196ede46466SAndreas Gohr $this->assertEquals('DokuWiki', $term->getBase()); 197ede46466SAndreas Gohr } 198ede46466SAndreas Gohr 199ede46466SAndreas Gohr public function testComplexRegexCharacters() 200ede46466SAndreas Gohr { 201ede46466SAndreas Gohr // Test multiple special regex characters 202ede46466SAndreas Gohr $term = new Term('test[0-9]+.txt'); 203ede46466SAndreas Gohr 204ede46466SAndreas Gohr $this->assertEquals('test[0-9]+.txt', $term->getOriginal()); 205ede46466SAndreas Gohr $this->assertEquals('test[0-9]+.txt', $term->getBase()); 206ede46466SAndreas Gohr // All special characters should be escaped 207ede46466SAndreas Gohr $quoted = $term->getQuoted(); 208ede46466SAndreas Gohr $this->assertStringContainsString('\\[', $quoted); 209ede46466SAndreas Gohr $this->assertStringContainsString('\\]', $quoted); 210ede46466SAndreas Gohr $this->assertStringContainsString('\\+', $quoted); 211ede46466SAndreas Gohr $this->assertStringContainsString('\\.', $quoted); 212ede46466SAndreas Gohr } 213ede46466SAndreas Gohr 214ede46466SAndreas Gohr} 215