xref: /plugin/statistics/_test/SearchEnginesTest.php (revision 45f4cdff9eeee357a9c7da871bb438b139ad9748)
1<?php
2
3namespace dokuwiki\plugin\statistics\test;
4
5use DokuWikiTest;
6use dokuwiki\plugin\statistics\SearchEngines;
7
8/**
9 * Tests for the SearchEngines class
10 *
11 * @group plugin_statistics
12 * @group plugins
13 */
14class SearchEnginesTest extends DokuWikiTest
15{
16    /**
17     * Data provider for testing known search engines
18     */
19    public function knownSearchEnginesProvider(): array
20    {
21        return [
22            // Google variants
23            'google.com' => [
24                'https://www.google.com/search?q=dokuwiki+test',
25                true,
26                'google',
27                'Google',
28                'dokuwiki test'
29            ],
30            'google.co.uk' => [
31                'https://www.google.co.uk/search?q=php+framework',
32                true,
33                'google',
34                'Google',
35                'php framework'
36            ],
37            'google.de' => [
38                'https://www.google.de/search?q=test+query',
39                true,
40                'google',
41                'Google',
42                'test query'
43            ],
44
45            // Bing
46            'bing.com' => [
47                'https://www.bing.com/search?q=dokuwiki+plugin',
48                true,
49                'bing',
50                'Bing',
51                'dokuwiki plugin'
52            ],
53            'bing.co.uk' => [
54                'https://www.bing.co.uk/search?q=search+test',
55                true,
56                'bing',
57                'Bing',
58                'search test'
59            ],
60
61            // Yahoo
62            'yahoo.com' => [
63                'https://search.yahoo.com/search?p=test+search',
64                true,
65                'yahoo',
66                'Yahoo!',
67                'test search'
68            ],
69
70            // Yandex
71            'yandex.ru' => [
72                'https://yandex.ru/search/?query=test+query',
73                true,
74                'yandex',
75                'Яндекс (Yandex)',
76                'test query'
77            ],
78            'yandex.com' => [
79                'https://yandex.com/search/?query=another+test',
80                true,
81                'yandex',
82                'Яндекс (Yandex)',
83                'another test'
84            ],
85
86            // Naver
87            'naver.com' => [
88                'https://search.naver.com/search.naver?query=korean+search',
89                true,
90                'naver',
91                '네이버 (Naver)',
92                'korean search'
93            ],
94
95            // Baidu
96            'baidu.com' => [
97                'https://www.baidu.com/s?wd=chinese+search',
98                true,
99                'baidu',
100                '百度 (Baidu)',
101                'chinese search'
102            ],
103            'baidu.com word param' => [
104                'https://www.baidu.com/s?word=test+word',
105                true,
106                'baidu',
107                '百度 (Baidu)',
108                'test word'
109            ],
110            'baidu.com kw param' => [
111                'https://www.baidu.com/s?kw=keyword+test',
112                true,
113                'baidu',
114                '百度 (Baidu)',
115                'keyword test'
116            ],
117
118            // Ask
119            'ask.com' => [
120                'https://www.ask.com/web?q=ask+search',
121                true,
122                'ask',
123                'Ask',
124                'ask search'
125            ],
126            'ask.com ask param' => [
127                'https://www.ask.com/web?ask=test+ask',
128                true,
129                'ask',
130                'Ask',
131                'test ask'
132            ],
133            'search-results.com' => [
134                'https://www.search-results.com/web?q=search+results',
135                true,
136                'ask_search_results',
137                'Ask',
138                'search results'
139            ],
140
141            // DuckDuckGo
142            'duckduckgo.com' => [
143                'https://duckduckgo.com/?q=privacy+search',
144                true,
145                'duckduckgo',
146                'DuckDuckGo',
147                'privacy search'
148            ],
149
150            // Ecosia
151            'ecosia.org' => [
152                'https://www.ecosia.org/search?method=index&q=eco+friendly+search',
153                true,
154                'ecosia',
155                'Ecosia',
156                'eco friendly search'
157            ],
158
159            // Qwant
160            'qwant.com' => [
161                'https://www.qwant.com/?q=dokuwiki&t=web',
162                true,
163                'qwant',
164                'Qwant',
165                'dokuwiki'
166            ],
167
168            // AOL
169            'aol.com' => [
170                'https://search.aol.com/aol/search?query=aol+search',
171                true,
172                'aol',
173                'AOL Search',
174                'aol search'
175            ],
176
177            'aol.co.uk' => [
178                'https://search.aol.co.uk/aol/search?q=uk+search',
179                true,
180                'aol',
181                'AOL Search',
182                'uk search'
183            ],
184
185            // Babylon
186            'babylon.com' => [
187                'https://search.babylon.com/?q=babylon+search',
188                true,
189                'babylon',
190                'Babylon',
191                'babylon search'
192            ],
193
194            // Google AVG
195            'avg.com' => [
196                'https://search.avg.com/search?q=avg+search',
197                true,
198                'google_avg',
199                'Google',
200                'avg search'
201            ],
202        ];
203    }
204
205    /**
206     * Data provider for testing generic search engines
207     */
208    public function genericSearchEnginesProvider(): array
209    {
210        return [
211            'generic with q param' => [
212                'https://search.example.com/?q=generic+search',
213                true,
214                'example',
215                'Example',
216                'generic search'
217            ],
218            'generic with query param' => [
219                'https://find.testsite.org/search?query=test+query',
220                true,
221                'testsite',
222                'Testsite',
223                'test query'
224            ],
225            'generic with search param' => [
226                'https://www.searchengine.net/?search=search+term',
227                true,
228                'searchengine',
229                'Searchengine',
230                'search term'
231            ],
232            'generic with keywords param' => [
233                'https://lookup.site.com/?keywords=keyword+test',
234                true,
235                'site',
236                'Site',
237                'keyword test'
238            ],
239            'generic with keyword param' => [
240                'https://engine.co.uk/?keyword=single+keyword',
241                true,
242                'engine',
243                'Engine',
244                'single keyword'
245            ],
246        ];
247    }
248
249    /**
250     * Data provider for testing non-search engine referers
251     */
252    public function nonSearchEngineProvider(): array
253    {
254        return [
255            'regular website' => [
256                'https://www.example.com/page',
257                false,
258                null,
259                null,
260                null
261            ],
262            'social media' => [
263                'https://www.facebook.com/share',
264                false,
265                null,
266                null,
267                null
268            ],
269            'search engine without query' => [
270                'https://www.google.com/',
271                false,
272                null,
273                null,
274                null
275            ],
276            'search engine with empty query' => [
277                'https://www.google.com/search?q=',
278                false,
279                null,
280                null,
281                null
282            ],
283            'invalid URL' => [
284                'not-a-url',
285                false,
286                null,
287                null,
288                null
289            ],
290            'URL without host' => [
291                '/local/path',
292                false,
293                null,
294                null,
295                null
296            ],
297        ];
298    }
299
300    /**
301     * Data provider for testing query cleaning
302     */
303    public function queryCleaningProvider(): array
304    {
305        return [
306            'cache query removed' => [
307                'https://www.google.com/search?q=cache:example.com+test',
308                true,
309                'google',
310                'Google',
311                'test'
312            ],
313            'related query removed' => [
314                'https://www.google.com/search?q=related:example.com+search',
315                true,
316                'google',
317                'Google',
318                'search'
319            ],
320            'multiple spaces compacted' => [
321                'https://www.google.com/search?q=test++multiple+++spaces',
322                true,
323                'google',
324                'Google',
325                'test multiple spaces'
326            ],
327            'whitespace trimmed' => [
328                'https://www.google.com/search?q=++trimmed++',
329                true,
330                'google',
331                'Google',
332                'trimmed'
333            ],
334        ];
335    }
336
337    /**
338     * Data provider for testing fragment-based queries
339     */
340    public function fragmentQueryProvider(): array
341    {
342        return [
343            'fragment query' => [
344                'https://www.google.com/search#q=fragment+query',
345                true,
346                'google',
347                'Google',
348                'fragment query'
349            ],
350            'fragment with multiple params' => [
351                'https://www.bing.com/search#q=fragment+test&other=param',
352                true,
353                'bing',
354                'Bing',
355                'fragment test'
356            ],
357        ];
358    }
359
360    /**
361     * Test known search engines
362     * @dataProvider knownSearchEnginesProvider
363     */
364    public function testKnownSearchEngines(
365        string $referer,
366        bool $expectedIsSearchEngine,
367        ?string $expectedEngine,
368        ?string $expectedName,
369        ?string $expectedQuery
370    ): void {
371        $searchEngine = new SearchEngines($referer);
372
373        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
374        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
375        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
376
377        if ($expectedEngine) {
378            $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine));
379        }
380    }
381
382    /**
383     * Test generic search engines
384     * @dataProvider genericSearchEnginesProvider
385     */
386    public function testGenericSearchEngines(
387        string $referer,
388        bool $expectedIsSearchEngine,
389        ?string $expectedEngine,
390        ?string $expectedName,
391        ?string $expectedQuery
392    ): void {
393        $searchEngine = new SearchEngines($referer);
394
395        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
396        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
397        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
398
399        if ($expectedEngine) {
400            $this->assertEquals($expectedName, SearchEngines::getName($expectedEngine));
401        }
402    }
403
404    /**
405     * Test non-search engine referers
406     * @dataProvider nonSearchEngineProvider
407     */
408    public function testNonSearchEngines(
409        string $referer,
410        bool $expectedIsSearchEngine,
411        ?string $expectedEngine,
412        ?string $expectedName,
413        ?string $expectedQuery
414    ): void {
415        $searchEngine = new SearchEngines($referer);
416
417        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
418        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
419        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
420    }
421
422    /**
423     * Test query cleaning functionality
424     * @dataProvider queryCleaningProvider
425     */
426    public function testQueryCleaning(
427        string $referer,
428        bool $expectedIsSearchEngine,
429        ?string $expectedEngine,
430        ?string $expectedName,
431        ?string $expectedQuery
432    ): void {
433        $searchEngine = new SearchEngines($referer);
434
435        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
436        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
437        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
438    }
439
440    /**
441     * Test fragment-based queries
442     * @dataProvider fragmentQueryProvider
443     */
444    public function testFragmentQueries(
445        string $referer,
446        bool $expectedIsSearchEngine,
447        ?string $expectedEngine,
448        ?string $expectedName,
449        ?string $expectedQuery
450    ): void {
451        $searchEngine = new SearchEngines($referer);
452
453        $this->assertEquals($expectedIsSearchEngine, $searchEngine->isSearchEngine());
454        $this->assertEquals($expectedEngine, $searchEngine->getEngine());
455        $this->assertEquals($expectedQuery, $searchEngine->getQuery());
456    }
457
458    /**
459     * Test static getName method with unknown engine
460     */
461    public function testGetNameUnknownEngine(): void
462    {
463        $unknownEngine = 'unknown_engine';
464        $this->assertEquals('Unknown_engine', SearchEngines::getName($unknownEngine));
465    }
466
467    /**
468     * Test static getUrl method
469     */
470    public function testGetUrl(): void
471    {
472        $this->assertEquals('http://www.google.com', SearchEngines::getUrl('google'));
473        $this->assertEquals('http://www.bing.com', SearchEngines::getUrl('bing'));
474        $this->assertNull(SearchEngines::getUrl('unknown_engine'));
475    }
476
477    /**
478     * Test DokuWiki internal search detection
479     */
480    public function testDokuWikiInternalSearch(): void
481    {
482        // Mock DOKU_URL for testing
483        if (!defined('DOKU_URL')) {
484            define('DOKU_URL', 'https://wiki.example.com/');
485        }
486
487        $referer = 'https://wiki.example.com/doku.php?do=search&q=internal+search';
488        $searchEngine = new SearchEngines($referer);
489
490        $this->assertTrue($searchEngine->isSearchEngine());
491        $this->assertEquals('dokuwiki', $searchEngine->getEngine());
492        $this->assertEquals('internal search', $searchEngine->getQuery());
493        $this->assertEquals('DokuWiki Internal Search', SearchEngines::getName('dokuwiki'));
494    }
495
496    /**
497     * Test case insensitive domain matching
498     */
499    public function testCaseInsensitiveDomainMatching(): void
500    {
501        $referer = 'https://WWW.GOOGLE.COM/search?q=case+test';
502        $searchEngine = new SearchEngines($referer);
503
504        $this->assertTrue($searchEngine->isSearchEngine());
505        $this->assertEquals('google', $searchEngine->getEngine());
506        $this->assertEquals('case test', $searchEngine->getQuery());
507    }
508
509    /**
510     * Test URL encoding in queries
511     */
512    public function testUrlEncodedQueries(): void
513    {
514        $referer = 'https://www.google.com/search?q=url%20encoded%20query';
515        $searchEngine = new SearchEngines($referer);
516
517        $this->assertTrue($searchEngine->isSearchEngine());
518        $this->assertEquals('google', $searchEngine->getEngine());
519        $this->assertEquals('url encoded query', $searchEngine->getQuery());
520    }
521
522    /**
523     * Test plus encoding in queries
524     */
525    public function testPlusEncodedQueries(): void
526    {
527        $referer = 'https://www.google.com/search?q=plus+encoded+query';
528        $searchEngine = new SearchEngines($referer);
529
530        $this->assertTrue($searchEngine->isSearchEngine());
531        $this->assertEquals('google', $searchEngine->getEngine());
532        $this->assertEquals('plus encoded query', $searchEngine->getQuery());
533    }
534
535    /**
536     * Test empty constructor behavior
537     */
538    public function testEmptyReferer(): void
539    {
540        $searchEngine = new SearchEngines('');
541
542        $this->assertFalse($searchEngine->isSearchEngine());
543        $this->assertNull($searchEngine->getEngine());
544        $this->assertNull($searchEngine->getQuery());
545    }
546}
547