xref: /plugin/statistics/_test/SearchEnginesTest.php (revision 1fd51258e31fef4dcc2da00c993da44787daea96)
1<?php
2
3namespace dokuwiki\plugin\statistics\test;
4
5use DokuWikiTest;
6use dokuwiki\plugin\statistics\SearchEngines;
7
8/**
9 * Tests for the SearchEngines class
10 *
11 * @group plugin_statistics
12 * @group plugins
13 */
14class SearchEnginesTest extends DokuWikiTest
15{
16    /**
17     * Data provider for testing known search engines
18     */
19    public function knownSearchEnginesProvider(): array
20    {
21        return [
22            // Google variants
23            'google.com' => [
24                'https://www.google.com/search?q=dokuwiki+test',
25                true,
26                'google',
27                'Google',
28                'dokuwiki test'
29            ],
30            'google.co.uk' => [
31                'https://www.google.co.uk/search?q=php+framework',
32                true,
33                'google',
34                'Google',
35                'php framework'
36            ],
37            'google.de' => [
38                'https://www.google.de/search?q=test+query',
39                true,
40                'google',
41                'Google',
42                'test query'
43            ],
44
45            // Bing
46            'bing.com' => [
47                'https://www.bing.com/search?q=dokuwiki+plugin',
48                true,
49                'bing',
50                'Bing',
51                'dokuwiki plugin'
52            ],
53            'bing.co.uk' => [
54                'https://www.bing.co.uk/search?q=search+test',
55                true,
56                'bing',
57                'Bing',
58                'search test'
59            ],
60
61            // Yahoo
62            'yahoo.com' => [
63                'https://search.yahoo.com/search?p=test+search',
64                true,
65                'yahoo',
66                'Yahoo!',
67                'test search'
68            ],
69
70            // Yandex
71            'yandex.ru' => [
72                'https://yandex.ru/search/?query=test+query',
73                true,
74                'yandex',
75                'Яндекс (Yandex)',
76                'test query'
77            ],
78            'yandex.com' => [
79                'https://yandex.com/search/?query=another+test',
80                true,
81                'yandex',
82                'Яндекс (Yandex)',
83                'another test'
84            ],
85
86            // Naver
87            'naver.com' => [
88                'https://search.naver.com/search.naver?query=korean+search',
89                true,
90                'naver',
91                '네이버 (Naver)',
92                'korean search'
93            ],
94
95            // Baidu
96            'baidu.com' => [
97                'https://www.baidu.com/s?wd=chinese+search',
98                true,
99                'baidu',
100                '百度 (Baidu)',
101                'chinese search'
102            ],
103            'baidu.com word param' => [
104                'https://www.baidu.com/s?word=test+word',
105                true,
106                'baidu',
107                '百度 (Baidu)',
108                'test word'
109            ],
110            'baidu.com kw param' => [
111                'https://www.baidu.com/s?kw=keyword+test',
112                true,
113                'baidu',
114                '百度 (Baidu)',
115                'keyword test'
116            ],
117
118            // Ask
119            'ask.com' => [
120                'https://www.ask.com/web?q=ask+search',
121                true,
122                'ask',
123                'Ask',
124                'ask search'
125            ],
126            'ask.com ask param' => [
127                'https://www.ask.com/web?ask=test+ask',
128                true,
129                'ask',
130                'Ask',
131                'test ask'
132            ],
133            'search-results.com' => [
134                'https://www.search-results.com/web?q=search+results',
135                true,
136                'ask_search_results',
137                'Ask',
138                'search results'
139            ],
140
141            // DuckDuckGo
142            'duckduckgo.com' => [
143                'https://duckduckgo.com/?q=privacy+search',
144                true,
145                'duckduckgo',
146                'DuckDuckGo',
147                'privacy search'
148            ],
149
150            // AOL
151            'aol.com' => [
152                'https://search.aol.com/aol/search?query=aol+search',
153                true,
154                'aol',
155                'AOL Search',
156                'aol search'
157            ],
158            'aol.co.uk' => [
159                'https://search.aol.co.uk/aol/search?q=uk+search',
160                true,
161                'aol',
162                'AOL Search',
163                'uk search'
164            ],
165
166            // Babylon
167            'babylon.com' => [
168                'https://search.babylon.com/?q=babylon+search',
169                true,
170                'babylon',
171                'Babylon',
172                'babylon search'
173            ],
174
175            // Google AVG
176            'avg.com' => [
177                'https://search.avg.com/search?q=avg+search',
178                true,
179                'google_avg',
180                'Google',
181                'avg search'
182            ],
183        ];
184    }
185
186    /**
187     * Data provider for testing generic search engines
188     */
189    public function genericSearchEnginesProvider(): array
190    {
191        return [
192            'generic with q param' => [
193                'https://search.example.com/?q=generic+search',
194                true,
195                'example',
196                'Example',
197                'generic search'
198            ],
199            'generic with query param' => [
200                'https://find.testsite.org/search?query=test+query',
201                true,
202                'testsite',
203                'Testsite',
204                'test query'
205            ],
206            'generic with search param' => [
207                'https://www.searchengine.net/?search=search+term',
208                true,
209                'searchengine',
210                'Searchengine',
211                'search term'
212            ],
213            'generic with keywords param' => [
214                'https://lookup.site.com/?keywords=keyword+test',
215                true,
216                'site',
217                'Site',
218                'keyword test'
219            ],
220            'generic with keyword param' => [
221                'https://engine.co.uk/?keyword=single+keyword',
222                true,
223                'engine',
224                'Engine',
225                'single keyword'
226            ],
227        ];
228    }
229
230    /**
231     * Data provider for testing non-search engine referers
232     */
233    public function nonSearchEngineProvider(): array
234    {
235        return [
236            'regular website' => [
237                'https://www.example.com/page',
238                false,
239                null,
240                null,
241                null
242            ],
243            'social media' => [
244                'https://www.facebook.com/share',
245                false,
246                null,
247                null,
248                null
249            ],
250            'search engine without query' => [
251                'https://www.google.com/',
252                false,
253                null,
254                null,
255                null
256            ],
257            'search engine with empty query' => [
258                'https://www.google.com/search?q=',
259                false,
260                null,
261                null,
262                null
263            ],
264            'invalid URL' => [
265                'not-a-url',
266                false,
267                null,
268                null,
269                null
270            ],
271            'URL without host' => [
272                '/local/path',
273                false,
274                null,
275                null,
276                null
277            ],
278        ];
279    }
280
281    /**
282     * Data provider for testing query cleaning
283     */
284    public function queryCleaningProvider(): array
285    {
286        return [
287            'cache query removed' => [
288                'https://www.google.com/search?q=cache:example.com+test',
289                true,
290                'google',
291                'Google',
292                'test'
293            ],
294            'related query removed' => [
295                'https://www.google.com/search?q=related:example.com+search',
296                true,
297                'google',
298                'Google',
299                'search'
300            ],
301            'multiple spaces compacted' => [
302                'https://www.google.com/search?q=test++multiple+++spaces',
303                true,
304                'google',
305                'Google',
306                'test multiple spaces'
307            ],
308            'whitespace trimmed' => [
309                'https://www.google.com/search?q=++trimmed++',
310                true,
311                'google',
312                'Google',
313                'trimmed'
314            ],
315        ];
316    }
317
318    /**
319     * Data provider for testing fragment-based queries
320     */
321    public function fragmentQueryProvider(): array
322    {
323        return [
324            'fragment query' => [
325                'https://www.google.com/search#q=fragment+query',
326                true,
327                'google',
328                'Google',
329                'fragment query'
330            ],
331            'fragment with multiple params' => [
332                'https://www.bing.com/search#q=fragment+test&other=param',
333                true,
334                'bing',
335                'Bing',
336                'fragment test'
337            ],
338        ];
339    }
340
341    /**
342     * Test known search engines
343     * @dataProvider knownSearchEnginesProvider
344     */
345    public function testKnownSearchEngines(
346        string $referer,
347        bool $expectedIsSearchEngine,
348        ?string $expectedEngine,
349        ?string $expectedName,
350        ?string $expectedQuery
351    ): void {
352        $searchEngine = new SearchEngines($referer);
353
354        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
355        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
356        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
357
358        if ($expectedEngine) {
359            $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine));
360        }
361    }
362
363    /**
364     * Test generic search engines
365     * @dataProvider genericSearchEnginesProvider
366     */
367    public function testGenericSearchEngines(
368        string $referer,
369        bool $expectedIsSearchEngine,
370        ?string $expectedEngine,
371        ?string $expectedName,
372        ?string $expectedQuery
373    ): void {
374        $searchEngine = new SearchEngines($referer);
375
376        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
377        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
378        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
379
380        if ($expectedEngine) {
381            $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine));
382        }
383    }
384
385    /**
386     * Test non-search engine referers
387     * @dataProvider nonSearchEngineProvider
388     */
389    public function testNonSearchEngines(
390        string $referer,
391        bool $expectedIsSearchEngine,
392        ?string $expectedEngine,
393        ?string $expectedName,
394        ?string $expectedQuery
395    ): void {
396        $searchEngine = new SearchEngines($referer);
397
398        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
399        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
400        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
401    }
402
403    /**
404     * Test query cleaning functionality
405     * @dataProvider queryCleaningProvider
406     */
407    public function testQueryCleaning(
408        string $referer,
409        bool $expectedIsSearchEngine,
410        ?string $expectedEngine,
411        ?string $expectedName,
412        ?string $expectedQuery
413    ): void {
414        $searchEngine = new SearchEngines($referer);
415
416        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
417        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
418        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
419    }
420
421    /**
422     * Test fragment-based queries
423     * @dataProvider fragmentQueryProvider
424     */
425    public function testFragmentQueries(
426        string $referer,
427        bool $expectedIsSearchEngine,
428        ?string $expectedEngine,
429        ?string $expectedName,
430        ?string $expectedQuery
431    ): void {
432        $searchEngine = new SearchEngines($referer);
433
434        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
435        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
436        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
437    }
438
439    /**
440     * Test static getName method with unknown engine
441     */
442    public function testGetNameUnknownEngine(): void
443    {
444        $unknownEngine = 'unknown_engine';
445        $this->assertEquals('Unknown_engine', SearchEngines::getName($unknownEngine));
446    }
447
448    /**
449     * Test static getUrl method
450     */
451    public function testGetUrl(): void
452    {
453        $this->assertEquals('http://www.google.com', SearchEngines::getUrl('google'));
454        $this->assertEquals('http://www.bing.com', SearchEngines::getUrl('bing'));
455        $this->assertNull(SearchEngines::getUrl('unknown_engine'));
456    }
457
458    /**
459     * Test DokuWiki internal search detection
460     */
461    public function testDokuWikiInternalSearch(): void
462    {
463        // Mock DOKU_URL for testing
464        if (!defined('DOKU_URL')) {
465            define('DOKU_URL', 'https://wiki.example.com/');
466        }
467
468        $referer = 'https://wiki.example.com/doku.php?do=search&q=internal+search';
469        $searchEngine = new SearchEngines($referer);
470
471        $this->assertTrue($searchEngine->isSearchEngine());
472        $this->assertEquals('dokuwiki', $searchEngine->getEngine());
473        $this->assertEquals('internal search', $searchEngine->getQuery());
474        $this->assertEquals('DokuWiki Internal Search', SearchEngines::getName('dokuwiki'));
475    }
476
477    /**
478     * Test case insensitive domain matching
479     */
480    public function testCaseInsensitiveDomainMatching(): void
481    {
482        $referer = 'https://WWW.GOOGLE.COM/search?q=case+test';
483        $searchEngine = new SearchEngines($referer);
484
485        $this->assertTrue($searchEngine->isSearchEngine());
486        $this->assertEquals('google', $searchEngine->getEngine());
487        $this->assertEquals('case test', $searchEngine->getQuery());
488    }
489
490    /**
491     * Test URL encoding in queries
492     */
493    public function testUrlEncodedQueries(): void
494    {
495        $referer = 'https://www.google.com/search?q=url%20encoded%20query';
496        $searchEngine = new SearchEngines($referer);
497
498        $this->assertTrue($searchEngine->isSearchEngine());
499        $this->assertEquals('google', $searchEngine->getEngine());
500        $this->assertEquals('url encoded query', $searchEngine->getQuery());
501    }
502
503    /**
504     * Test plus encoding in queries
505     */
506    public function testPlusEncodedQueries(): void
507    {
508        $referer = 'https://www.google.com/search?q=plus+encoded+query';
509        $searchEngine = new SearchEngines($referer);
510
511        $this->assertTrue($searchEngine->isSearchEngine());
512        $this->assertEquals('google', $searchEngine->getEngine());
513        $this->assertEquals('plus encoded query', $searchEngine->getQuery());
514    }
515
516    /**
517     * Test empty constructor behavior
518     */
519    public function testEmptyReferer(): void
520    {
521        $searchEngine = new SearchEngines('');
522
523        $this->assertFalse($searchEngine->isSearchEngine());
524        $this->assertNull($searchEngine->getEngine());
525        $this->assertNull($searchEngine->getQuery());
526    }
527}
528