1<?php 2 3namespace dokuwiki\plugin\statistics\test; 4 5use DokuWikiTest; 6use dokuwiki\plugin\statistics\SearchEngines; 7 8/** 9 * Tests for the SearchEngines class 10 * 11 * @group plugin_statistics 12 * @group plugins 13 */ 14class SearchEnginesTest extends DokuWikiTest 15{ 16 /** 17 * Data provider for testing known search engines 18 */ 19 public function knownSearchEnginesProvider(): array 20 { 21 return [ 22 // Google variants 23 'google.com' => [ 24 'https://www.google.com/search?q=dokuwiki+test', 25 true, 26 'google', 27 'Google', 28 'dokuwiki test' 29 ], 30 'google.co.uk' => [ 31 'https://www.google.co.uk/search?q=php+framework', 32 true, 33 'google', 34 'Google', 35 'php framework' 36 ], 37 'google.de' => [ 38 'https://www.google.de/search?q=test+query', 39 true, 40 'google', 41 'Google', 42 'test query' 43 ], 44 45 // Bing 46 'bing.com' => [ 47 'https://www.bing.com/search?q=dokuwiki+plugin', 48 true, 49 'bing', 50 'Bing', 51 'dokuwiki plugin' 52 ], 53 'bing.co.uk' => [ 54 'https://www.bing.co.uk/search?q=search+test', 55 true, 56 'bing', 57 'Bing', 58 'search test' 59 ], 60 61 // Yahoo 62 'yahoo.com' => [ 63 'https://search.yahoo.com/search?p=test+search', 64 true, 65 'yahoo', 66 'Yahoo!', 67 'test search' 68 ], 69 70 // Yandex 71 'yandex.ru' => [ 72 'https://yandex.ru/search/?query=test+query', 73 true, 74 'yandex', 75 'Яндекс (Yandex)', 76 'test query' 77 ], 78 'yandex.com' => [ 79 'https://yandex.com/search/?query=another+test', 80 true, 81 'yandex', 82 'Яндекс (Yandex)', 83 'another test' 84 ], 85 86 // Naver 87 'naver.com' => [ 88 'https://search.naver.com/search.naver?query=korean+search', 89 true, 90 'naver', 91 '네이버 (Naver)', 92 'korean search' 93 ], 94 95 // Baidu 96 'baidu.com' => [ 97 'https://www.baidu.com/s?wd=chinese+search', 98 true, 99 'baidu', 100 '百度 (Baidu)', 101 'chinese search' 102 ], 103 'baidu.com word param' => [ 104 'https://www.baidu.com/s?word=test+word', 105 true, 106 'baidu', 107 '百度 (Baidu)', 108 'test word' 109 ], 110 'baidu.com kw param' => [ 111 'https://www.baidu.com/s?kw=keyword+test', 112 true, 113 'baidu', 114 '百度 (Baidu)', 115 'keyword test' 116 ], 117 118 // Ask 119 'ask.com' => [ 120 'https://www.ask.com/web?q=ask+search', 121 true, 122 'ask', 123 'Ask', 124 'ask search' 125 ], 126 'ask.com ask param' => [ 127 'https://www.ask.com/web?ask=test+ask', 128 true, 129 'ask', 130 'Ask', 131 'test ask' 132 ], 133 'search-results.com' => [ 134 'https://www.search-results.com/web?q=search+results', 135 true, 136 'ask_search_results', 137 'Ask', 138 'search results' 139 ], 140 141 // DuckDuckGo 142 'duckduckgo.com' => [ 143 'https://duckduckgo.com/?q=privacy+search', 144 true, 145 'duckduckgo', 146 'DuckDuckGo', 147 'privacy search' 148 ], 149 150 // Ecosia 151 'ecosia.org' => [ 152 'https://www.ecosia.org/search?method=index&q=eco+friendly+search', 153 true, 154 'ecosia', 155 'Ecosia', 156 'eco friendly search' 157 ], 158 159 // Qwant 160 'qwant.com' => [ 161 'https://www.qwant.com/?q=dokuwiki&t=web', 162 true, 163 'qwant', 164 'Qwant', 165 'dokuwiki' 166 ], 167 168 // AOL 169 'aol.com' => [ 170 'https://search.aol.com/aol/search?query=aol+search', 171 true, 172 'aol', 173 'AOL Search', 174 'aol search' 175 ], 176 177 'aol.co.uk' => [ 178 'https://search.aol.co.uk/aol/search?q=uk+search', 179 true, 180 'aol', 181 'AOL Search', 182 'uk search' 183 ], 184 185 // Babylon 186 'babylon.com' => [ 187 'https://search.babylon.com/?q=babylon+search', 188 true, 189 'babylon', 190 'Babylon', 191 'babylon search' 192 ], 193 194 // Google AVG 195 'avg.com' => [ 196 'https://search.avg.com/search?q=avg+search', 197 true, 198 'google_avg', 199 'Google', 200 'avg search' 201 ], 202 ]; 203 } 204 205 /** 206 * Data provider for testing generic search engines 207 */ 208 public function genericSearchEnginesProvider(): array 209 { 210 return [ 211 'generic with q param' => [ 212 'https://search.example.com/?q=generic+search', 213 true, 214 'example', 215 'Example', 216 'generic search' 217 ], 218 'generic with query param' => [ 219 'https://find.testsite.org/search?query=test+query', 220 true, 221 'testsite', 222 'Testsite', 223 'test query' 224 ], 225 'generic with search param' => [ 226 'https://www.searchengine.net/?search=search+term', 227 true, 228 'searchengine', 229 'Searchengine', 230 'search term' 231 ], 232 'generic with keywords param' => [ 233 'https://lookup.site.com/?keywords=keyword+test', 234 true, 235 'site', 236 'Site', 237 'keyword test' 238 ], 239 'generic with keyword param' => [ 240 'https://engine.co.uk/?keyword=single+keyword', 241 true, 242 'engine', 243 'Engine', 244 'single keyword' 245 ], 246 ]; 247 } 248 249 /** 250 * Data provider for testing non-search engine referers 251 */ 252 public function nonSearchEngineProvider(): array 253 { 254 return [ 255 'regular website' => [ 256 'https://www.example.com/page', 257 false, 258 null, 259 null, 260 null 261 ], 262 'social media' => [ 263 'https://www.facebook.com/share', 264 false, 265 null, 266 null, 267 null 268 ], 269 'invalid URL' => [ 270 'not-a-url', 271 false, 272 null, 273 null, 274 null 275 ], 276 'URL without host' => [ 277 '/local/path', 278 false, 279 null, 280 null, 281 null 282 ], 283 ]; 284 } 285 286 /** 287 * Data provider for testing query cleaning 288 */ 289 public function queryCleaningProvider(): array 290 { 291 return [ 292 'cache query removed' => [ 293 'https://www.google.com/search?q=cache:example.com+test', 294 true, 295 'google', 296 'Google', 297 'test' 298 ], 299 'related query removed' => [ 300 'https://www.google.com/search?q=related:example.com+search', 301 true, 302 'google', 303 'Google', 304 'search' 305 ], 306 'multiple spaces compacted' => [ 307 'https://www.google.com/search?q=test++multiple+++spaces', 308 true, 309 'google', 310 'Google', 311 'test multiple spaces' 312 ], 313 'whitespace trimmed' => [ 314 'https://www.google.com/search?q=++trimmed++', 315 true, 316 'google', 317 'Google', 318 'trimmed' 319 ], 320 ]; 321 } 322 323 /** 324 * Data provider for testing fragment-based queries 325 */ 326 public function fragmentQueryProvider(): array 327 { 328 return [ 329 'fragment query' => [ 330 'https://www.google.com/search#q=fragment+query', 331 true, 332 'google', 333 'Google', 334 'fragment query' 335 ], 336 'fragment with multiple params' => [ 337 'https://www.bing.com/search#q=fragment+test&other=param', 338 true, 339 'bing', 340 'Bing', 341 'fragment test' 342 ], 343 ]; 344 } 345 346 /** 347 * Test known search engines 348 * @dataProvider knownSearchEnginesProvider 349 */ 350 public function testKnownSearchEngines( 351 string $referer, 352 bool $expectedIsSearchEngine, 353 ?string $expectedEngine, 354 ?string $expectedName, 355 ?string $expectedQuery 356 ): void { 357 $searchEngine = new SearchEngines($referer); 358 359 $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine()); 360 $this->assertEquals($expectedEngine, $searchEngine->getEngine()); 361 $this->assertEquals($expectedQuery, $searchEngine->getQuery()); 362 363 if ($expectedEngine) { 364 $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine)); 365 } 366 } 367 368 /** 369 * Test generic search engines 370 * @dataProvider genericSearchEnginesProvider 371 */ 372 public function testGenericSearchEngines( 373 string $referer, 374 bool $expectedIsSearchEngine, 375 ?string $expectedEngine, 376 ?string $expectedName, 377 ?string $expectedQuery 378 ): void { 379 $searchEngine = new SearchEngines($referer); 380 381 $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine()); 382 $this->assertEquals($expectedEngine, $searchEngine->getEngine()); 383 $this->assertEquals($expectedQuery, $searchEngine->getQuery()); 384 385 if ($expectedEngine) { 386 $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine)); 387 } 388 } 389 390 /** 391 * Test non-search engine referers 392 * @dataProvider nonSearchEngineProvider 393 */ 394 public function testNonSearchEngines( 395 string $referer, 396 bool $expectedIsSearchEngine, 397 ?string $expectedEngine, 398 ?string $expectedName, 399 ?string $expectedQuery 400 ): void { 401 $searchEngine = new SearchEngines($referer); 402 403 $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine()); 404 $this->assertEquals($expectedEngine, $searchEngine->getEngine()); 405 $this->assertEquals($expectedQuery, $searchEngine->getQuery()); 406 } 407 408 /** 409 * Test query cleaning functionality 410 * @dataProvider queryCleaningProvider 411 */ 412 public function testQueryCleaning( 413 string $referer, 414 bool $expectedIsSearchEngine, 415 ?string $expectedEngine, 416 ?string $expectedName, 417 ?string $expectedQuery 418 ): void { 419 $searchEngine = new SearchEngines($referer); 420 421 $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine()); 422 $this->assertEquals($expectedEngine, $searchEngine->getEngine()); 423 $this->assertEquals($expectedQuery, $searchEngine->getQuery()); 424 } 425 426 /** 427 * Test fragment-based queries 428 * @dataProvider fragmentQueryProvider 429 */ 430 public function testFragmentQueries( 431 string $referer, 432 bool $expectedIsSearchEngine, 433 ?string $expectedEngine, 434 ?string $expectedName, 435 ?string $expectedQuery 436 ): void { 437 $searchEngine = new SearchEngines($referer); 438 439 $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine()); 440 $this->assertEquals($expectedEngine, $searchEngine->getEngine()); 441 $this->assertEquals($expectedQuery, $searchEngine->getQuery()); 442 } 443 444 /** 445 * Test static getName method with unknown engine 446 */ 447 public function testGetNameUnknownEngine(): void 448 { 449 $unknownEngine = 'unknown_engine'; 450 $this->assertEquals('Unknown_engine', SearchEngines::getName($unknownEngine)); 451 } 452 453 /** 454 * Test static getUrl method 455 */ 456 public function testGetUrl(): void 457 { 458 $this->assertEquals('http://www.google.com', SearchEngines::getUrl('google')); 459 $this->assertEquals('http://www.bing.com', SearchEngines::getUrl('bing')); 460 $this->assertNull(SearchEngines::getUrl('unknown_engine')); 461 } 462 463 /** 464 * Test case insensitive domain matching 465 */ 466 public function testCaseInsensitiveDomainMatching(): void 467 { 468 $referer = 'https://WWW.GOOGLE.COM/search?q=case+test'; 469 $searchEngine = new SearchEngines($referer); 470 471 $this->assertTrue($searchEngine->isSearchEngine()); 472 $this->assertEquals('google', $searchEngine->getEngine()); 473 $this->assertEquals('case test', $searchEngine->getQuery()); 474 } 475 476 /** 477 * Test URL encoding in queries 478 */ 479 public function testUrlEncodedQueries(): void 480 { 481 $referer = 'https://www.google.com/search?q=url%20encoded%20query'; 482 $searchEngine = new SearchEngines($referer); 483 484 $this->assertTrue($searchEngine->isSearchEngine()); 485 $this->assertEquals('google', $searchEngine->getEngine()); 486 $this->assertEquals('url encoded query', $searchEngine->getQuery()); 487 } 488 489 /** 490 * Test plus encoding in queries 491 */ 492 public function testPlusEncodedQueries(): void 493 { 494 $referer = 'https://www.google.com/search?q=plus+encoded+query'; 495 $searchEngine = new SearchEngines($referer); 496 497 $this->assertTrue($searchEngine->isSearchEngine()); 498 $this->assertEquals('google', $searchEngine->getEngine()); 499 $this->assertEquals('plus encoded query', $searchEngine->getQuery()); 500 } 501 502 /** 503 * Test empty constructor behavior 504 */ 505 public function testEmptyReferer(): void 506 { 507 $searchEngine = new SearchEngines(''); 508 509 $this->assertFalse($searchEngine->isSearchEngine()); 510 $this->assertNull($searchEngine->getEngine()); 511 $this->assertNull($searchEngine->getQuery()); 512 } 513} 514