1<?php 2 3namespace dokuwiki\plugin\statistics\test; 4 5use DokuWikiTest; 6use dokuwiki\plugin\statistics\SearchEngines; 7 8/** 9 * Tests for the SearchEngines class 10 * 11 * @group plugin_statistics 12 * @group plugins 13 */ 14class SearchEnginesTest extends DokuWikiTest 15{ 16 /** 17 * Data provider for testing known search engines 18 */ 19 public function knownSearchEnginesProvider(): array 20 { 21 return [ 22 // Google variants 23 'google.com' => [ 24 'https://www.google.com/search?q=dokuwiki+test', 25 true, 26 'google', 27 'Google', 28 'dokuwiki test' 29 ], 30 'google.co.uk' => [ 31 'https://www.google.co.uk/search?q=php+framework', 32 true, 33 'google', 34 'Google', 35 'php framework' 36 ], 37 'google.de' => [ 38 'https://www.google.de/search?q=test+query', 39 true, 40 'google', 41 'Google', 42 'test query' 43 ], 44 45 // Bing 46 'bing.com' => [ 47 'https://www.bing.com/search?q=dokuwiki+plugin', 48 true, 49 'bing', 50 'Bing', 51 'dokuwiki plugin' 52 ], 53 'bing.co.uk' => [ 54 'https://www.bing.co.uk/search?q=search+test', 55 true, 56 'bing', 57 'Bing', 58 'search test' 59 ], 60 61 // Yahoo 62 'yahoo.com' => [ 63 'https://search.yahoo.com/search?p=test+search', 64 true, 65 'yahoo', 66 'Yahoo!', 67 'test search' 68 ], 69 70 // Yandex 71 'yandex.ru' => [ 72 'https://yandex.ru/search/?query=test+query', 73 true, 74 'yandex', 75 'Яндекс (Yandex)', 76 'test query' 77 ], 78 'yandex.com' => [ 79 'https://yandex.com/search/?query=another+test', 80 true, 81 'yandex', 82 'Яндекс (Yandex)', 83 'another test' 84 ], 85 86 // Naver 87 'naver.com' => [ 88 'https://search.naver.com/search.naver?query=korean+search', 89 true, 90 'naver', 91 '네이버 (Naver)', 92 'korean search' 93 ], 94 95 // Baidu 96 'baidu.com' => [ 97 'https://www.baidu.com/s?wd=chinese+search', 98 true, 99 'baidu', 100 '百度 (Baidu)', 101 'chinese search' 102 ], 103 'baidu.com word param' => [ 104 'https://www.baidu.com/s?word=test+word', 105 true, 106 'baidu', 107 '百度 (Baidu)', 108 'test word' 109 ], 110 'baidu.com kw param' => [ 111 'https://www.baidu.com/s?kw=keyword+test', 112 true, 113 'baidu', 114 '百度 (Baidu)', 115 'keyword test' 116 ], 117 118 // Ask 119 'ask.com' => [ 120 'https://www.ask.com/web?q=ask+search', 121 true, 122 'ask', 123 'Ask', 124 'ask search' 125 ], 126 'ask.com ask param' => [ 127 'https://www.ask.com/web?ask=test+ask', 128 true, 129 'ask', 130 'Ask', 131 'test ask' 132 ], 133 'search-results.com' => [ 134 'https://www.search-results.com/web?q=search+results', 135 true, 136 'ask_search_results', 137 'Ask', 138 'search results' 139 ], 140 141 // DuckDuckGo 142 'duckduckgo.com' => [ 143 'https://duckduckgo.com/?q=privacy+search', 144 true, 145 'duckduckgo', 146 'DuckDuckGo', 147 'privacy search' 148 ], 149 150 // Ecosia 151 'ecosia.org' => [ 152 'https://www.ecosia.org/search?method=index&q=eco+friendly+search', 153 true, 154 'ecosia', 155 'Ecosia', 156 'eco friendly search' 157 ], 158 159 // Qwant 160 'qwant.com' => [ 161 'https://www.qwant.com/?q=dokuwiki&t=web', 162 true, 163 'qwant', 164 'Qwant', 165 'dokuwiki' 166 ], 167 168 // AOL 169 'aol.com' => [ 170 'https://search.aol.com/aol/search?query=aol+search', 171 true, 172 'aol', 173 'AOL Search', 174 'aol search' 175 ], 176 177 'aol.co.uk' => [ 178 'https://search.aol.co.uk/aol/search?q=uk+search', 179 true, 180 'aol', 181 'AOL Search', 182 'uk search' 183 ], 184 185 // Babylon 186 'babylon.com' => [ 187 'https://search.babylon.com/?q=babylon+search', 188 true, 189 'babylon', 190 'Babylon', 191 'babylon search' 192 ], 193 194 // AVG 195 'avg.com' => [ 196 'https://search.avg.com/search?q=avg+search', 197 true, 198 'avg', 199 'AVG Safe Search', 200 'avg search' 201 ], 202 203 // Brave 204 'brave.com' => [ 205 'https://search.brave.com/search?q=brave+search', 206 true, 207 'brave', 208 'Brave Search', 209 'brave search' 210 ], 211 ]; 212 } 213 214 /** 215 * Data provider for testing generic search engines 216 */ 217 public function genericSearchEnginesProvider(): array 218 { 219 return [ 220 'generic with q param' => [ 221 'https://search.example.com/?q=generic+search', 222 true, 223 'example', 224 'Example', 225 'generic search' 226 ], 227 'generic with query param' => [ 228 'https://find.testsite.org/search?query=test+query', 229 true, 230 'testsite', 231 'Testsite', 232 'test query' 233 ], 234 'generic with search param' => [ 235 'https://www.searchengine.net/?search=search+term', 236 true, 237 'searchengine', 238 'Searchengine', 239 'search term' 240 ], 241 'generic with keywords param' => [ 242 'https://lookup.site.com/?keywords=keyword+test', 243 true, 244 'site', 245 'Site', 246 'keyword test' 247 ], 248 'generic with keyword param' => [ 249 'https://engine.co.uk/?keyword=single+keyword', 250 true, 251 'engine', 252 'Engine', 253 'single keyword' 254 ], 255 ]; 256 } 257 258 /** 259 * Data provider for testing non-search engine referers 260 */ 261 public function nonSearchEngineProvider(): array 262 { 263 return [ 264 'regular website' => [ 265 'https://www.example.com/page', 266 false, 267 null, 268 null, 269 null 270 ], 271 'social media' => [ 272 'https://www.facebook.com/share', 273 false, 274 null, 275 null, 276 null 277 ], 278 'invalid URL' => [ 279 'not-a-url', 280 false, 281 null, 282 null, 283 null 284 ], 285 'URL without host' => [ 286 '/local/path', 287 false, 288 null, 289 null, 290 null 291 ], 292 ]; 293 } 294 295 /** 296 * Data provider for testing query cleaning 297 */ 298 public function queryCleaningProvider(): array 299 { 300 return [ 301 'cache query removed' => [ 302 'https://www.google.com/search?q=cache:example.com+test', 303 true, 304 'google', 305 'Google', 306 'test' 307 ], 308 'related query removed' => [ 309 'https://www.google.com/search?q=related:example.com+search', 310 true, 311 'google', 312 'Google', 313 'search' 314 ], 315 'multiple spaces compacted' => [ 316 'https://www.google.com/search?q=test++multiple+++spaces', 317 true, 318 'google', 319 'Google', 320 'test multiple spaces' 321 ], 322 'whitespace trimmed' => [ 323 'https://www.google.com/search?q=++trimmed++', 324 true, 325 'google', 326 'Google', 327 'trimmed' 328 ], 329 ]; 330 } 331 332 /** 333 * Data provider for testing fragment-based queries 334 */ 335 public function fragmentQueryProvider(): array 336 { 337 return [ 338 'fragment query' => [ 339 'https://www.google.com/search#q=fragment+query', 340 true, 341 'google', 342 'Google', 343 'fragment query' 344 ], 345 'fragment with multiple params' => [ 346 'https://www.bing.com/search#q=fragment+test&other=param', 347 true, 348 'bing', 349 'Bing', 350 'fragment test' 351 ], 352 ]; 353 } 354 355 /** 356 * Test known search engines 357 * @dataProvider knownSearchEnginesProvider 358 */ 359 public function testKnownSearchEngines( 360 string $referer, 361 bool $expectedIsSearchEngine, 362 ?string $expectedEngine, 363 ?string $expectedName, 364 ?string $expectedQuery 365 ): void { 366 $searchEngine = new SearchEngines($referer); 367 368 $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine()); 369 $this->assertEquals($expectedEngine, $searchEngine->getEngine()); 370 $this->assertEquals($expectedQuery, $searchEngine->getQuery()); 371 372 if ($expectedEngine) { 373 $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine)); 374 } 375 } 376 377 /** 378 * Test generic search engines 379 * @dataProvider genericSearchEnginesProvider 380 */ 381 public function testGenericSearchEngines( 382 string $referer, 383 bool $expectedIsSearchEngine, 384 ?string $expectedEngine, 385 ?string $expectedName, 386 ?string $expectedQuery 387 ): void { 388 $searchEngine = new SearchEngines($referer); 389 390 $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine()); 391 $this->assertEquals($expectedEngine, $searchEngine->getEngine()); 392 $this->assertEquals($expectedQuery, $searchEngine->getQuery()); 393 394 if ($expectedEngine) { 395 $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine)); 396 } 397 } 398 399 /** 400 * Test non-search engine referers 401 * @dataProvider nonSearchEngineProvider 402 */ 403 public function testNonSearchEngines( 404 string $referer, 405 bool $expectedIsSearchEngine, 406 ?string $expectedEngine, 407 ?string $expectedName, 408 ?string $expectedQuery 409 ): void { 410 $searchEngine = new SearchEngines($referer); 411 412 $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine()); 413 $this->assertEquals($expectedEngine, $searchEngine->getEngine()); 414 $this->assertEquals($expectedQuery, $searchEngine->getQuery()); 415 } 416 417 /** 418 * Test query cleaning functionality 419 * @dataProvider queryCleaningProvider 420 */ 421 public function testQueryCleaning( 422 string $referer, 423 bool $expectedIsSearchEngine, 424 ?string $expectedEngine, 425 ?string $expectedName, 426 ?string $expectedQuery 427 ): void { 428 $searchEngine = new SearchEngines($referer); 429 430 $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine()); 431 $this->assertEquals($expectedEngine, $searchEngine->getEngine()); 432 $this->assertEquals($expectedQuery, $searchEngine->getQuery()); 433 } 434 435 /** 436 * Test fragment-based queries 437 * @dataProvider fragmentQueryProvider 438 */ 439 public function testFragmentQueries( 440 string $referer, 441 bool $expectedIsSearchEngine, 442 ?string $expectedEngine, 443 ?string $expectedName, 444 ?string $expectedQuery 445 ): void { 446 $searchEngine = new SearchEngines($referer); 447 448 $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine()); 449 $this->assertEquals($expectedEngine, $searchEngine->getEngine()); 450 $this->assertEquals($expectedQuery, $searchEngine->getQuery()); 451 } 452 453 /** 454 * Test static getName method with unknown engine 455 */ 456 public function testGetNameUnknownEngine(): void 457 { 458 $unknownEngine = 'unknown_engine'; 459 $this->assertEquals('Unknown_engine', SearchEngines::getName($unknownEngine)); 460 } 461 462 /** 463 * Test static getUrl method 464 */ 465 public function testGetUrl(): void 466 { 467 $this->assertEquals('https://www.google.com', SearchEngines::getUrl('google')); 468 $this->assertEquals('https://www.bing.com', SearchEngines::getUrl('bing')); 469 $this->assertNull(SearchEngines::getUrl('unknown_engine')); 470 } 471 472 /** 473 * Test case insensitive domain matching 474 */ 475 public function testCaseInsensitiveDomainMatching(): void 476 { 477 $referer = 'https://WWW.GOOGLE.COM/search?q=case+test'; 478 $searchEngine = new SearchEngines($referer); 479 480 $this->assertTrue($searchEngine->isSearchEngine()); 481 $this->assertEquals('google', $searchEngine->getEngine()); 482 $this->assertEquals('case test', $searchEngine->getQuery()); 483 } 484 485 /** 486 * Test URL encoding in queries 487 */ 488 public function testUrlEncodedQueries(): void 489 { 490 $referer = 'https://www.google.com/search?q=url%20encoded%20query'; 491 $searchEngine = new SearchEngines($referer); 492 493 $this->assertTrue($searchEngine->isSearchEngine()); 494 $this->assertEquals('google', $searchEngine->getEngine()); 495 $this->assertEquals('url encoded query', $searchEngine->getQuery()); 496 } 497 498 /** 499 * Test plus encoding in queries 500 */ 501 public function testPlusEncodedQueries(): void 502 { 503 $referer = 'https://www.google.com/search?q=plus+encoded+query'; 504 $searchEngine = new SearchEngines($referer); 505 506 $this->assertTrue($searchEngine->isSearchEngine()); 507 $this->assertEquals('google', $searchEngine->getEngine()); 508 $this->assertEquals('plus encoded query', $searchEngine->getQuery()); 509 } 510 511 /** 512 * Test empty constructor behavior 513 */ 514 public function testEmptyReferer(): void 515 { 516 $searchEngine = new SearchEngines(''); 517 518 $this->assertFalse($searchEngine->isSearchEngine()); 519 $this->assertNull($searchEngine->getEngine()); 520 $this->assertNull($searchEngine->getQuery()); 521 } 522} 523