1<?php
2
3namespace dokuwiki\plugin\statistics\test;
4
5use DokuWikiTest;
6use dokuwiki\plugin\statistics\SearchEngines;
7
8/**
9 * Tests for the SearchEngines class
10 *
11 * @group plugin_statistics
12 * @group plugins
13 */
14class SearchEnginesTest extends DokuWikiTest
15{
16    /**
17     * Data provider for testing known search engines
18     */
19    public function knownSearchEnginesProvider(): array
20    {
21        return [
22            // Google variants
23            'google.com' => [
24                'https://www.google.com/search?q=dokuwiki+test',
25                true,
26                'google',
27                'Google',
28                'dokuwiki test'
29            ],
30            'google.co.uk' => [
31                'https://www.google.co.uk/search?q=php+framework',
32                true,
33                'google',
34                'Google',
35                'php framework'
36            ],
37            'google.de' => [
38                'https://www.google.de/search?q=test+query',
39                true,
40                'google',
41                'Google',
42                'test query'
43            ],
44
45            // Bing
46            'bing.com' => [
47                'https://www.bing.com/search?q=dokuwiki+plugin',
48                true,
49                'bing',
50                'Bing',
51                'dokuwiki plugin'
52            ],
53            'bing.co.uk' => [
54                'https://www.bing.co.uk/search?q=search+test',
55                true,
56                'bing',
57                'Bing',
58                'search test'
59            ],
60
61            // Yahoo
62            'yahoo.com' => [
63                'https://search.yahoo.com/search?p=test+search',
64                true,
65                'yahoo',
66                'Yahoo!',
67                'test search'
68            ],
69
70            // Yandex
71            'yandex.ru' => [
72                'https://yandex.ru/search/?query=test+query',
73                true,
74                'yandex',
75                'Яндекс (Yandex)',
76                'test query'
77            ],
78            'yandex.com' => [
79                'https://yandex.com/search/?query=another+test',
80                true,
81                'yandex',
82                'Яндекс (Yandex)',
83                'another test'
84            ],
85
86            // Naver
87            'naver.com' => [
88                'https://search.naver.com/search.naver?query=korean+search',
89                true,
90                'naver',
91                '네이버 (Naver)',
92                'korean search'
93            ],
94
95            // Baidu
96            'baidu.com' => [
97                'https://www.baidu.com/s?wd=chinese+search',
98                true,
99                'baidu',
100                '百度 (Baidu)',
101                'chinese search'
102            ],
103            'baidu.com word param' => [
104                'https://www.baidu.com/s?word=test+word',
105                true,
106                'baidu',
107                '百度 (Baidu)',
108                'test word'
109            ],
110            'baidu.com kw param' => [
111                'https://www.baidu.com/s?kw=keyword+test',
112                true,
113                'baidu',
114                '百度 (Baidu)',
115                'keyword test'
116            ],
117
118            // Ask
119            'ask.com' => [
120                'https://www.ask.com/web?q=ask+search',
121                true,
122                'ask',
123                'Ask',
124                'ask search'
125            ],
126            'ask.com ask param' => [
127                'https://www.ask.com/web?ask=test+ask',
128                true,
129                'ask',
130                'Ask',
131                'test ask'
132            ],
133            'search-results.com' => [
134                'https://www.search-results.com/web?q=search+results',
135                true,
136                'ask_search_results',
137                'Ask',
138                'search results'
139            ],
140
141            // DuckDuckGo
142            'duckduckgo.com' => [
143                'https://duckduckgo.com/?q=privacy+search',
144                true,
145                'duckduckgo',
146                'DuckDuckGo',
147                'privacy search'
148            ],
149
150            // Ecosia
151            'ecosia.org' => [
152                'https://www.ecosia.org/search?method=index&q=eco+friendly+search',
153                true,
154                'ecosia',
155                'Ecosia',
156                'eco friendly search'
157            ],
158
159            // Qwant
160            'qwant.com' => [
161                'https://www.qwant.com/?q=dokuwiki&t=web',
162                true,
163                'qwant',
164                'Qwant',
165                'dokuwiki'
166            ],
167
168            // AOL
169            'aol.com' => [
170                'https://search.aol.com/aol/search?query=aol+search',
171                true,
172                'aol',
173                'AOL Search',
174                'aol search'
175            ],
176
177            'aol.co.uk' => [
178                'https://search.aol.co.uk/aol/search?q=uk+search',
179                true,
180                'aol',
181                'AOL Search',
182                'uk search'
183            ],
184
185            // Babylon
186            'babylon.com' => [
187                'https://search.babylon.com/?q=babylon+search',
188                true,
189                'babylon',
190                'Babylon',
191                'babylon search'
192            ],
193
194            // AVG
195            'avg.com' => [
196                'https://search.avg.com/search?q=avg+search',
197                true,
198                'avg',
199                'AVG Safe Search',
200                'avg search'
201            ],
202
203            // Brave
204            'brave.com' => [
205                'https://search.brave.com/search?q=brave+search',
206                true,
207                'brave',
208                'Brave Search',
209                'brave search'
210            ],
211        ];
212    }
213
214    /**
215     * Data provider for testing generic search engines
216     */
217    public function genericSearchEnginesProvider(): array
218    {
219        return [
220            'generic with q param' => [
221                'https://search.example.com/?q=generic+search',
222                true,
223                'example',
224                'Example',
225                'generic search'
226            ],
227            'generic with query param' => [
228                'https://find.testsite.org/search?query=test+query',
229                true,
230                'testsite',
231                'Testsite',
232                'test query'
233            ],
234            'generic with search param' => [
235                'https://www.searchengine.net/?search=search+term',
236                true,
237                'searchengine',
238                'Searchengine',
239                'search term'
240            ],
241            'generic with keywords param' => [
242                'https://lookup.site.com/?keywords=keyword+test',
243                true,
244                'site',
245                'Site',
246                'keyword test'
247            ],
248            'generic with keyword param' => [
249                'https://engine.co.uk/?keyword=single+keyword',
250                true,
251                'engine',
252                'Engine',
253                'single keyword'
254            ],
255        ];
256    }
257
258    /**
259     * Data provider for testing non-search engine referers
260     */
261    public function nonSearchEngineProvider(): array
262    {
263        return [
264            'regular website' => [
265                'https://www.example.com/page',
266                false,
267                null,
268                null,
269                null
270            ],
271            'social media' => [
272                'https://www.facebook.com/share',
273                false,
274                null,
275                null,
276                null
277            ],
278            'invalid URL' => [
279                'not-a-url',
280                false,
281                null,
282                null,
283                null
284            ],
285            'URL without host' => [
286                '/local/path',
287                false,
288                null,
289                null,
290                null
291            ],
292        ];
293    }
294
295    /**
296     * Data provider for testing query cleaning
297     */
298    public function queryCleaningProvider(): array
299    {
300        return [
301            'cache query removed' => [
302                'https://www.google.com/search?q=cache:example.com+test',
303                true,
304                'google',
305                'Google',
306                'test'
307            ],
308            'related query removed' => [
309                'https://www.google.com/search?q=related:example.com+search',
310                true,
311                'google',
312                'Google',
313                'search'
314            ],
315            'multiple spaces compacted' => [
316                'https://www.google.com/search?q=test++multiple+++spaces',
317                true,
318                'google',
319                'Google',
320                'test multiple spaces'
321            ],
322            'whitespace trimmed' => [
323                'https://www.google.com/search?q=++trimmed++',
324                true,
325                'google',
326                'Google',
327                'trimmed'
328            ],
329        ];
330    }
331
332    /**
333     * Data provider for testing fragment-based queries
334     */
335    public function fragmentQueryProvider(): array
336    {
337        return [
338            'fragment query' => [
339                'https://www.google.com/search#q=fragment+query',
340                true,
341                'google',
342                'Google',
343                'fragment query'
344            ],
345            'fragment with multiple params' => [
346                'https://www.bing.com/search#q=fragment+test&other=param',
347                true,
348                'bing',
349                'Bing',
350                'fragment test'
351            ],
352        ];
353    }
354
355    /**
356     * Test known search engines
357     * @dataProvider knownSearchEnginesProvider
358     */
359    public function testKnownSearchEngines(
360        string $referer,
361        bool $expectedIsSearchEngine,
362        ?string $expectedEngine,
363        ?string $expectedName,
364        ?string $expectedQuery
365    ): void {
366        $searchEngine = new SearchEngines($referer);
367
368        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
369        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
370        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
371
372        if ($expectedEngine) {
373            $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine));
374        }
375    }
376
377    /**
378     * Test generic search engines
379     * @dataProvider genericSearchEnginesProvider
380     */
381    public function testGenericSearchEngines(
382        string $referer,
383        bool $expectedIsSearchEngine,
384        ?string $expectedEngine,
385        ?string $expectedName,
386        ?string $expectedQuery
387    ): void {
388        $searchEngine = new SearchEngines($referer);
389
390        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
391        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
392        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
393
394        if ($expectedEngine) {
395            $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine));
396        }
397    }
398
399    /**
400     * Test non-search engine referers
401     * @dataProvider nonSearchEngineProvider
402     */
403    public function testNonSearchEngines(
404        string $referer,
405        bool $expectedIsSearchEngine,
406        ?string $expectedEngine,
407        ?string $expectedName,
408        ?string $expectedQuery
409    ): void {
410        $searchEngine = new SearchEngines($referer);
411
412        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
413        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
414        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
415    }
416
417    /**
418     * Test query cleaning functionality
419     * @dataProvider queryCleaningProvider
420     */
421    public function testQueryCleaning(
422        string $referer,
423        bool $expectedIsSearchEngine,
424        ?string $expectedEngine,
425        ?string $expectedName,
426        ?string $expectedQuery
427    ): void {
428        $searchEngine = new SearchEngines($referer);
429
430        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
431        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
432        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
433    }
434
435    /**
436     * Test fragment-based queries
437     * @dataProvider fragmentQueryProvider
438     */
439    public function testFragmentQueries(
440        string $referer,
441        bool $expectedIsSearchEngine,
442        ?string $expectedEngine,
443        ?string $expectedName,
444        ?string $expectedQuery
445    ): void {
446        $searchEngine = new SearchEngines($referer);
447
448        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
449        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
450        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
451    }
452
453    /**
454     * Test static getName method with unknown engine
455     */
456    public function testGetNameUnknownEngine(): void
457    {
458        $unknownEngine = 'unknown_engine';
459        $this->assertEquals('Unknown_engine', SearchEngines::getName($unknownEngine));
460    }
461
462    /**
463     * Test static getUrl method
464     */
465    public function testGetUrl(): void
466    {
467        $this->assertEquals('https://www.google.com', SearchEngines::getUrl('google'));
468        $this->assertEquals('https://www.bing.com', SearchEngines::getUrl('bing'));
469        $this->assertNull(SearchEngines::getUrl('unknown_engine'));
470    }
471
472    /**
473     * Test case insensitive domain matching
474     */
475    public function testCaseInsensitiveDomainMatching(): void
476    {
477        $referer = 'https://WWW.GOOGLE.COM/search?q=case+test';
478        $searchEngine = new SearchEngines($referer);
479
480        $this->assertTrue($searchEngine->isSearchEngine());
481        $this->assertEquals('google', $searchEngine->getEngine());
482        $this->assertEquals('case test', $searchEngine->getQuery());
483    }
484
485    /**
486     * Test URL encoding in queries
487     */
488    public function testUrlEncodedQueries(): void
489    {
490        $referer = 'https://www.google.com/search?q=url%20encoded%20query';
491        $searchEngine = new SearchEngines($referer);
492
493        $this->assertTrue($searchEngine->isSearchEngine());
494        $this->assertEquals('google', $searchEngine->getEngine());
495        $this->assertEquals('url encoded query', $searchEngine->getQuery());
496    }
497
498    /**
499     * Test plus encoding in queries
500     */
501    public function testPlusEncodedQueries(): void
502    {
503        $referer = 'https://www.google.com/search?q=plus+encoded+query';
504        $searchEngine = new SearchEngines($referer);
505
506        $this->assertTrue($searchEngine->isSearchEngine());
507        $this->assertEquals('google', $searchEngine->getEngine());
508        $this->assertEquals('plus encoded query', $searchEngine->getQuery());
509    }
510
511    /**
512     * Test empty constructor behavior
513     */
514    public function testEmptyReferer(): void
515    {
516        $searchEngine = new SearchEngines('');
517
518        $this->assertFalse($searchEngine->isSearchEngine());
519        $this->assertNull($searchEngine->getEngine());
520        $this->assertNull($searchEngine->getQuery());
521    }
522}
523