xref: /plugin/statistics/_test/SearchEnginesTest.php (revision 6c5dec65792f8dd0cb5ec53c3da4a034a393f571)
1<?php
2
3namespace dokuwiki\plugin\statistics\test;
4
5use DokuWikiTest;
6use dokuwiki\plugin\statistics\SearchEngines;
7
8/**
9 * Tests for the SearchEngines class
10 *
11 * @group plugin_statistics
12 * @group plugins
13 */
14class SearchEnginesTest extends DokuWikiTest
15{
16    /**
17     * Data provider for testing known search engines
18     */
19    public function knownSearchEnginesProvider(): array
20    {
21        return [
22            // Google variants
23            'google.com' => [
24                'https://www.google.com/search?q=dokuwiki+test',
25                true,
26                'google',
27                'Google',
28                'dokuwiki test'
29            ],
30            'google.co.uk' => [
31                'https://www.google.co.uk/search?q=php+framework',
32                true,
33                'google',
34                'Google',
35                'php framework'
36            ],
37            'google.de' => [
38                'https://www.google.de/search?q=test+query',
39                true,
40                'google',
41                'Google',
42                'test query'
43            ],
44
45            // Bing
46            'bing.com' => [
47                'https://www.bing.com/search?q=dokuwiki+plugin',
48                true,
49                'bing',
50                'Bing',
51                'dokuwiki plugin'
52            ],
53            'bing.co.uk' => [
54                'https://www.bing.co.uk/search?q=search+test',
55                true,
56                'bing',
57                'Bing',
58                'search test'
59            ],
60
61            // Yahoo
62            'yahoo.com' => [
63                'https://search.yahoo.com/search?p=test+search',
64                true,
65                'yahoo',
66                'Yahoo!',
67                'test search'
68            ],
69
70            // Yandex
71            'yandex.ru' => [
72                'https://yandex.ru/search/?query=test+query',
73                true,
74                'yandex',
75                'Яндекс (Yandex)',
76                'test query'
77            ],
78            'yandex.com' => [
79                'https://yandex.com/search/?query=another+test',
80                true,
81                'yandex',
82                'Яндекс (Yandex)',
83                'another test'
84            ],
85
86            // Naver
87            'naver.com' => [
88                'https://search.naver.com/search.naver?query=korean+search',
89                true,
90                'naver',
91                '네이버 (Naver)',
92                'korean search'
93            ],
94
95            // Baidu
96            'baidu.com' => [
97                'https://www.baidu.com/s?wd=chinese+search',
98                true,
99                'baidu',
100                '百度 (Baidu)',
101                'chinese search'
102            ],
103            'baidu.com word param' => [
104                'https://www.baidu.com/s?word=test+word',
105                true,
106                'baidu',
107                '百度 (Baidu)',
108                'test word'
109            ],
110            'baidu.com kw param' => [
111                'https://www.baidu.com/s?kw=keyword+test',
112                true,
113                'baidu',
114                '百度 (Baidu)',
115                'keyword test'
116            ],
117
118            // Ask
119            'ask.com' => [
120                'https://www.ask.com/web?q=ask+search',
121                true,
122                'ask',
123                'Ask',
124                'ask search'
125            ],
126            'ask.com ask param' => [
127                'https://www.ask.com/web?ask=test+ask',
128                true,
129                'ask',
130                'Ask',
131                'test ask'
132            ],
133            'search-results.com' => [
134                'https://www.search-results.com/web?q=search+results',
135                true,
136                'ask_search_results',
137                'Ask',
138                'search results'
139            ],
140
141            // DuckDuckGo
142            'duckduckgo.com' => [
143                'https://duckduckgo.com/?q=privacy+search',
144                true,
145                'duckduckgo',
146                'DuckDuckGo',
147                'privacy search'
148            ],
149
150            // Ecosia
151            'ecosia.org' => [
152                'https://www.ecosia.org/search?method=index&q=eco+friendly+search',
153                true,
154                'ecosia',
155                'Ecosia',
156                'eco friendly search'
157            ],
158
159            // Qwant
160            'qwant.com' => [
161                'https://www.qwant.com/?q=dokuwiki&t=web',
162                true,
163                'qwant',
164                'Qwant',
165                'dokuwiki'
166            ],
167
168            // AOL
169            'aol.com' => [
170                'https://search.aol.com/aol/search?query=aol+search',
171                true,
172                'aol',
173                'AOL Search',
174                'aol search'
175            ],
176
177            'aol.co.uk' => [
178                'https://search.aol.co.uk/aol/search?q=uk+search',
179                true,
180                'aol',
181                'AOL Search',
182                'uk search'
183            ],
184
185            // Babylon
186            'babylon.com' => [
187                'https://search.babylon.com/?q=babylon+search',
188                true,
189                'babylon',
190                'Babylon',
191                'babylon search'
192            ],
193
194            // Google AVG
195            'avg.com' => [
196                'https://search.avg.com/search?q=avg+search',
197                true,
198                'google_avg',
199                'Google',
200                'avg search'
201            ],
202        ];
203    }
204
205    /**
206     * Data provider for testing generic search engines
207     */
208    public function genericSearchEnginesProvider(): array
209    {
210        return [
211            'generic with q param' => [
212                'https://search.example.com/?q=generic+search',
213                true,
214                'example',
215                'Example',
216                'generic search'
217            ],
218            'generic with query param' => [
219                'https://find.testsite.org/search?query=test+query',
220                true,
221                'testsite',
222                'Testsite',
223                'test query'
224            ],
225            'generic with search param' => [
226                'https://www.searchengine.net/?search=search+term',
227                true,
228                'searchengine',
229                'Searchengine',
230                'search term'
231            ],
232            'generic with keywords param' => [
233                'https://lookup.site.com/?keywords=keyword+test',
234                true,
235                'site',
236                'Site',
237                'keyword test'
238            ],
239            'generic with keyword param' => [
240                'https://engine.co.uk/?keyword=single+keyword',
241                true,
242                'engine',
243                'Engine',
244                'single keyword'
245            ],
246        ];
247    }
248
249    /**
250     * Data provider for testing non-search engine referers
251     */
252    public function nonSearchEngineProvider(): array
253    {
254        return [
255            'regular website' => [
256                'https://www.example.com/page',
257                false,
258                null,
259                null,
260                null
261            ],
262            'social media' => [
263                'https://www.facebook.com/share',
264                false,
265                null,
266                null,
267                null
268            ],
269            'invalid URL' => [
270                'not-a-url',
271                false,
272                null,
273                null,
274                null
275            ],
276            'URL without host' => [
277                '/local/path',
278                false,
279                null,
280                null,
281                null
282            ],
283        ];
284    }
285
286    /**
287     * Data provider for testing query cleaning
288     */
289    public function queryCleaningProvider(): array
290    {
291        return [
292            'cache query removed' => [
293                'https://www.google.com/search?q=cache:example.com+test',
294                true,
295                'google',
296                'Google',
297                'test'
298            ],
299            'related query removed' => [
300                'https://www.google.com/search?q=related:example.com+search',
301                true,
302                'google',
303                'Google',
304                'search'
305            ],
306            'multiple spaces compacted' => [
307                'https://www.google.com/search?q=test++multiple+++spaces',
308                true,
309                'google',
310                'Google',
311                'test multiple spaces'
312            ],
313            'whitespace trimmed' => [
314                'https://www.google.com/search?q=++trimmed++',
315                true,
316                'google',
317                'Google',
318                'trimmed'
319            ],
320        ];
321    }
322
323    /**
324     * Data provider for testing fragment-based queries
325     */
326    public function fragmentQueryProvider(): array
327    {
328        return [
329            'fragment query' => [
330                'https://www.google.com/search#q=fragment+query',
331                true,
332                'google',
333                'Google',
334                'fragment query'
335            ],
336            'fragment with multiple params' => [
337                'https://www.bing.com/search#q=fragment+test&other=param',
338                true,
339                'bing',
340                'Bing',
341                'fragment test'
342            ],
343        ];
344    }
345
346    /**
347     * Test known search engines
348     * @dataProvider knownSearchEnginesProvider
349     */
350    public function testKnownSearchEngines(
351        string $referer,
352        bool $expectedIsSearchEngine,
353        ?string $expectedEngine,
354        ?string $expectedName,
355        ?string $expectedQuery
356    ): void {
357        $searchEngine = new SearchEngines($referer);
358
359        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
360        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
361        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
362
363        if ($expectedEngine) {
364            $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine));
365        }
366    }
367
368    /**
369     * Test generic search engines
370     * @dataProvider genericSearchEnginesProvider
371     */
372    public function testGenericSearchEngines(
373        string $referer,
374        bool $expectedIsSearchEngine,
375        ?string $expectedEngine,
376        ?string $expectedName,
377        ?string $expectedQuery
378    ): void {
379        $searchEngine = new SearchEngines($referer);
380
381        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
382        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
383        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
384
385        if ($expectedEngine) {
386            $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine));
387        }
388    }
389
390    /**
391     * Test non-search engine referers
392     * @dataProvider nonSearchEngineProvider
393     */
394    public function testNonSearchEngines(
395        string $referer,
396        bool $expectedIsSearchEngine,
397        ?string $expectedEngine,
398        ?string $expectedName,
399        ?string $expectedQuery
400    ): void {
401        $searchEngine = new SearchEngines($referer);
402
403        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
404        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
405        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
406    }
407
408    /**
409     * Test query cleaning functionality
410     * @dataProvider queryCleaningProvider
411     */
412    public function testQueryCleaning(
413        string $referer,
414        bool $expectedIsSearchEngine,
415        ?string $expectedEngine,
416        ?string $expectedName,
417        ?string $expectedQuery
418    ): void {
419        $searchEngine = new SearchEngines($referer);
420
421        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
422        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
423        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
424    }
425
426    /**
427     * Test fragment-based queries
428     * @dataProvider fragmentQueryProvider
429     */
430    public function testFragmentQueries(
431        string $referer,
432        bool $expectedIsSearchEngine,
433        ?string $expectedEngine,
434        ?string $expectedName,
435        ?string $expectedQuery
436    ): void {
437        $searchEngine = new SearchEngines($referer);
438
439        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
440        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
441        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
442    }
443
444    /**
445     * Test static getName method with unknown engine
446     */
447    public function testGetNameUnknownEngine(): void
448    {
449        $unknownEngine = 'unknown_engine';
450        $this->assertEquals('Unknown_engine', SearchEngines::getName($unknownEngine));
451    }
452
453    /**
454     * Test static getUrl method
455     */
456    public function testGetUrl(): void
457    {
458        $this->assertEquals('http://www.google.com', SearchEngines::getUrl('google'));
459        $this->assertEquals('http://www.bing.com', SearchEngines::getUrl('bing'));
460        $this->assertNull(SearchEngines::getUrl('unknown_engine'));
461    }
462
463    /**
464     * Test DokuWiki internal search detection
465     */
466    public function testDokuWikiInternalSearch(): void
467    {
468        // Mock DOKU_URL for testing
469        if (!defined('DOKU_URL')) {
470            define('DOKU_URL', 'https://wiki.example.com/');
471        }
472
473        $referer = 'https://wiki.example.com/doku.php?do=search&q=internal+search';
474        $searchEngine = new SearchEngines($referer);
475
476        $this->assertTrue($searchEngine->isSearchEngine());
477        $this->assertEquals('dokuwiki', $searchEngine->getEngine());
478        $this->assertEquals('internal search', $searchEngine->getQuery());
479        $this->assertEquals('DokuWiki Internal Search', SearchEngines::getName('dokuwiki'));
480    }
481
482    /**
483     * Test case insensitive domain matching
484     */
485    public function testCaseInsensitiveDomainMatching(): void
486    {
487        $referer = 'https://WWW.GOOGLE.COM/search?q=case+test';
488        $searchEngine = new SearchEngines($referer);
489
490        $this->assertTrue($searchEngine->isSearchEngine());
491        $this->assertEquals('google', $searchEngine->getEngine());
492        $this->assertEquals('case test', $searchEngine->getQuery());
493    }
494
495    /**
496     * Test URL encoding in queries
497     */
498    public function testUrlEncodedQueries(): void
499    {
500        $referer = 'https://www.google.com/search?q=url%20encoded%20query';
501        $searchEngine = new SearchEngines($referer);
502
503        $this->assertTrue($searchEngine->isSearchEngine());
504        $this->assertEquals('google', $searchEngine->getEngine());
505        $this->assertEquals('url encoded query', $searchEngine->getQuery());
506    }
507
508    /**
509     * Test plus encoding in queries
510     */
511    public function testPlusEncodedQueries(): void
512    {
513        $referer = 'https://www.google.com/search?q=plus+encoded+query';
514        $searchEngine = new SearchEngines($referer);
515
516        $this->assertTrue($searchEngine->isSearchEngine());
517        $this->assertEquals('google', $searchEngine->getEngine());
518        $this->assertEquals('plus encoded query', $searchEngine->getQuery());
519    }
520
521    /**
522     * Test empty constructor behavior
523     */
524    public function testEmptyReferer(): void
525    {
526        $searchEngine = new SearchEngines('');
527
528        $this->assertFalse($searchEngine->isSearchEngine());
529        $this->assertNull($searchEngine->getEngine());
530        $this->assertNull($searchEngine->getQuery());
531    }
532}
533