1*21fbd01bSAndreas Gohr<?php 2*21fbd01bSAndreas Gohr 3*21fbd01bSAndreas Gohrnamespace dokuwiki\test\Search; 4*21fbd01bSAndreas Gohr 5*21fbd01bSAndreas Gohruse dokuwiki\Search\Collection\PageFulltextCollection; 6*21fbd01bSAndreas Gohruse dokuwiki\Search\Collection\PageTitleCollection; 7*21fbd01bSAndreas Gohruse dokuwiki\Search\Exception\IndexIntegrityException; 8*21fbd01bSAndreas Gohruse dokuwiki\Search\Indexer; 9*21fbd01bSAndreas Gohr 10*21fbd01bSAndreas Gohr/** 11*21fbd01bSAndreas Gohr * Tests the index integrity checking 12*21fbd01bSAndreas Gohr */ 13*21fbd01bSAndreas Gohrclass IntegrityTest extends \DokuWikiTest 14*21fbd01bSAndreas Gohr{ 15*21fbd01bSAndreas Gohr /** 16*21fbd01bSAndreas Gohr * Clear the index directory before each test 17*21fbd01bSAndreas Gohr */ 18*21fbd01bSAndreas Gohr public function setUp(): void 19*21fbd01bSAndreas Gohr { 20*21fbd01bSAndreas Gohr parent::setUp(); 21*21fbd01bSAndreas Gohr global $conf; 22*21fbd01bSAndreas Gohr $files = glob($conf['indexdir'] . '/*.idx'); 23*21fbd01bSAndreas Gohr foreach ($files as $file) { 24*21fbd01bSAndreas Gohr @unlink($file); 25*21fbd01bSAndreas Gohr } 26*21fbd01bSAndreas Gohr \dokuwiki\Search\Index\Lock::releaseAll(); 27*21fbd01bSAndreas Gohr } 28*21fbd01bSAndreas Gohr 29*21fbd01bSAndreas Gohr /** 30*21fbd01bSAndreas Gohr * Index a page so we have data to check 31*21fbd01bSAndreas Gohr */ 32*21fbd01bSAndreas Gohr protected function indexTestPage(): void 33*21fbd01bSAndreas Gohr { 34*21fbd01bSAndreas Gohr saveWikiText('integritytest', 'Hello world testing integrity check.', 'Test'); 35*21fbd01bSAndreas Gohr $indexer = new Indexer(); 36*21fbd01bSAndreas Gohr $indexer->addPage('integritytest'); 37*21fbd01bSAndreas Gohr } 38*21fbd01bSAndreas Gohr 39*21fbd01bSAndreas Gohr /** 40*21fbd01bSAndreas Gohr * A healthy index should not throw 41*21fbd01bSAndreas Gohr */ 42*21fbd01bSAndreas Gohr public function testHealthyIndex() 43*21fbd01bSAndreas Gohr { 44*21fbd01bSAndreas Gohr $this->indexTestPage(); 45*21fbd01bSAndreas Gohr 46*21fbd01bSAndreas Gohr $indexer = new Indexer(); 47*21fbd01bSAndreas Gohr $indexer->checkIntegrity(); 48*21fbd01bSAndreas Gohr $this->assertFalse($indexer->isIndexEmpty()); 49*21fbd01bSAndreas Gohr } 50*21fbd01bSAndreas Gohr 51*21fbd01bSAndreas Gohr /** 52*21fbd01bSAndreas Gohr * An empty index should not throw 53*21fbd01bSAndreas Gohr */ 54*21fbd01bSAndreas Gohr public function testEmptyIndex() 55*21fbd01bSAndreas Gohr { 56*21fbd01bSAndreas Gohr $indexer = new Indexer(); 57*21fbd01bSAndreas Gohr $indexer->checkIntegrity(); 58*21fbd01bSAndreas Gohr $this->assertTrue($indexer->isIndexEmpty()); 59*21fbd01bSAndreas Gohr } 60*21fbd01bSAndreas Gohr 61*21fbd01bSAndreas Gohr /** 62*21fbd01bSAndreas Gohr * Corrupted fulltext index (token/frequency mismatch) should throw 63*21fbd01bSAndreas Gohr */ 64*21fbd01bSAndreas Gohr public function testCorruptedFulltextTokenFrequency() 65*21fbd01bSAndreas Gohr { 66*21fbd01bSAndreas Gohr global $conf; 67*21fbd01bSAndreas Gohr $this->indexTestPage(); 68*21fbd01bSAndreas Gohr 69*21fbd01bSAndreas Gohr // Append an extra line to a token index to create a mismatch 70*21fbd01bSAndreas Gohr $collection = new PageFulltextCollection(); 71*21fbd01bSAndreas Gohr $max = $collection->getTokenIndexMaximum(); 72*21fbd01bSAndreas Gohr $this->assertGreaterThan(0, $max); 73*21fbd01bSAndreas Gohr 74*21fbd01bSAndreas Gohr $tokenFile = $conf['indexdir'] . '/w' . $max . '.idx'; 75*21fbd01bSAndreas Gohr $this->assertFileExists($tokenFile); 76*21fbd01bSAndreas Gohr file_put_contents($tokenFile, "corruptedentry\n", FILE_APPEND); 77*21fbd01bSAndreas Gohr 78*21fbd01bSAndreas Gohr $this->expectException(IndexIntegrityException::class); 79*21fbd01bSAndreas Gohr (new PageFulltextCollection())->checkIntegrity(); 80*21fbd01bSAndreas Gohr } 81*21fbd01bSAndreas Gohr 82*21fbd01bSAndreas Gohr /** 83*21fbd01bSAndreas Gohr * Corrupted fulltext index (entity/reverse mismatch) should throw 84*21fbd01bSAndreas Gohr */ 85*21fbd01bSAndreas Gohr public function testCorruptedFulltextEntityReverse() 86*21fbd01bSAndreas Gohr { 87*21fbd01bSAndreas Gohr global $conf; 88*21fbd01bSAndreas Gohr $this->indexTestPage(); 89*21fbd01bSAndreas Gohr 90*21fbd01bSAndreas Gohr $reverseFile = $conf['indexdir'] . '/pageword.idx'; 91*21fbd01bSAndreas Gohr $this->assertFileExists($reverseFile); 92*21fbd01bSAndreas Gohr file_put_contents($reverseFile, "0\n", FILE_APPEND); 93*21fbd01bSAndreas Gohr 94*21fbd01bSAndreas Gohr $this->expectException(IndexIntegrityException::class); 95*21fbd01bSAndreas Gohr (new PageFulltextCollection())->checkIntegrity(); 96*21fbd01bSAndreas Gohr } 97*21fbd01bSAndreas Gohr 98*21fbd01bSAndreas Gohr /** 99*21fbd01bSAndreas Gohr * Corrupted title index (entity/token mismatch) should throw 100*21fbd01bSAndreas Gohr */ 101*21fbd01bSAndreas Gohr public function testCorruptedTitleIndex() 102*21fbd01bSAndreas Gohr { 103*21fbd01bSAndreas Gohr global $conf; 104*21fbd01bSAndreas Gohr $this->indexTestPage(); 105*21fbd01bSAndreas Gohr 106*21fbd01bSAndreas Gohr $titleFile = $conf['indexdir'] . '/title.idx'; 107*21fbd01bSAndreas Gohr $this->assertFileExists($titleFile); 108*21fbd01bSAndreas Gohr file_put_contents($titleFile, "extra title\n", FILE_APPEND); 109*21fbd01bSAndreas Gohr 110*21fbd01bSAndreas Gohr $this->expectException(IndexIntegrityException::class); 111*21fbd01bSAndreas Gohr (new PageTitleCollection())->checkIntegrity(); 112*21fbd01bSAndreas Gohr } 113*21fbd01bSAndreas Gohr 114*21fbd01bSAndreas Gohr /** 115*21fbd01bSAndreas Gohr * Indexer.checkIntegrity aggregates all collection checks 116*21fbd01bSAndreas Gohr */ 117*21fbd01bSAndreas Gohr public function testIndexerCheckIntegrityDetectsCorruption() 118*21fbd01bSAndreas Gohr { 119*21fbd01bSAndreas Gohr global $conf; 120*21fbd01bSAndreas Gohr $this->indexTestPage(); 121*21fbd01bSAndreas Gohr 122*21fbd01bSAndreas Gohr // Corrupt title index 123*21fbd01bSAndreas Gohr $titleFile = $conf['indexdir'] . '/title.idx'; 124*21fbd01bSAndreas Gohr file_put_contents($titleFile, "extra title\n", FILE_APPEND); 125*21fbd01bSAndreas Gohr 126*21fbd01bSAndreas Gohr $this->expectException(IndexIntegrityException::class); 127*21fbd01bSAndreas Gohr (new Indexer())->checkIntegrity(); 128*21fbd01bSAndreas Gohr } 129*21fbd01bSAndreas Gohr} 130