xref: /plugin/statistics/Logger.php (revision d40a62919b0661e8ac2124588f3f26da5dd7bab6)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\HTTP\DokuHTTPClient;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use dokuwiki\Utf8\Clean;
12use helper_plugin_popularity;
13use helper_plugin_statistics;
14
15
16class Logger
17{
18    /** @var helper_plugin_statistics The statistics helper plugin instance */
19    protected helper_plugin_statistics $hlp;
20
21    /** @var SQLiteDB The SQLite database instance */
22    protected SQLiteDB $db;
23
24    /** @var string The full user agent string */
25    protected string $uaAgent;
26
27    /** @var string The type of user agent (browser, robot, feedreader) */
28    protected string $uaType = 'browser';
29
30    /** @var string The browser/client name */
31    protected string $uaName;
32
33    /** @var string The browser/client version */
34    protected string $uaVersion;
35
36    /** @var string The operating system/platform */
37    protected string $uaPlatform;
38
39    /** @var string The unique user identifier */
40    protected string $uid;
41
42    /** @var DokuHTTPClient|null The HTTP client instance for testing */
43    protected ?DokuHTTPClient $httpClient = null;
44
45
46    /**
47     * Constructor
48     *
49     * Parses browser info and set internal vars
50     */
51    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
52    {
53        global $INPUT;
54
55        $this->hlp = $hlp;
56        $this->db = $this->hlp->getDB();
57        $this->httpClient = $httpClient;
58
59        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
60
61        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
62        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
63        $dd->discardBotInformation();
64        $dd->parse();
65
66        if ($dd->isFeedReader()) {
67            $this->uaType = 'feedreader';
68        } else if ($dd->isBot()) {
69            $this->uaType = 'robot';
70
71            // for now ignore bots
72            throw new \RuntimeException('Bot detected, not logging');
73        }
74
75        $this->uaAgent = $ua;
76        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
77        $this->uaVersion = $dd->getClient('version') ?: '0';
78        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
79        $this->uid = $this->getUID();
80
81
82        $this->logLastseen();
83    }
84
85    /**
86     * Should be called before logging
87     *
88     * This starts a transaction, so all logging is done in one go
89     */
90    public function begin(): void
91    {
92        $this->hlp->getDB()->getPdo()->beginTransaction();
93    }
94
95    /**
96     * Should be called after logging
97     *
98     * This commits the transaction started in begin()
99     */
100    public function end(): void
101    {
102        $this->hlp->getDB()->getPdo()->commit();
103    }
104
105    /**
106     * Get the unique user ID
107     *
108     * @return string The unique user identifier
109     */
110    protected function getUID(): string
111    {
112        global $INPUT;
113
114        $uid = $INPUT->str('uid');
115        if (!$uid) $uid = get_doku_pref('plgstats', false);
116        if (!$uid) $uid = session_id();
117        set_doku_pref('plgstats', $uid);
118        return $uid;
119    }
120
121    /**
122     * Return the user's session ID
123     *
124     * This is usually our own managed session, not a PHP session (only in fallback)
125     *
126     * @return string The session identifier
127     */
128    protected function getSession(): string
129    {
130        global $INPUT;
131
132        $ses = $INPUT->str('ses');
133        if (!$ses) $ses = get_doku_pref('plgstatsses', false);
134        if (!$ses) $ses = session_id();
135        set_doku_pref('plgstatsses', $ses);
136        return $ses;
137    }
138
139    /**
140     * Log that we've seen the user (authenticated only)
141     */
142    public function logLastseen(): void
143    {
144        global $INPUT;
145
146        if (empty($INPUT->server->str('REMOTE_USER'))) return;
147
148        $this->db->exec(
149            'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)',
150            $INPUT->server->str('REMOTE_USER'),
151        );
152    }
153
154    /**
155     * Log actions by groups
156     *
157     * @param int $pid Id of access data row (foreign key)
158     * @param string $type The type of access to log ('view','edit')
159     * @param array $groups The groups to log
160     */
161    public function logGroups(int $pid, string $type, array $groups): void
162    {
163        if (empty($groups) || !$pid) return;
164
165        $toLog = (array)$this->hlp->getConf('loggroups');
166
167        // if specific groups are configured, limit logging to them only
168        $groups = !empty(array_filter($toLog)) ? array_intersect($groups, $toLog) : $groups;
169        if (!$groups) return;
170
171        $placeholders = join(',', array_fill(0, count($groups), '(?, ?, ?)'));
172        $params = [];
173        $sql = "INSERT INTO groups (`pid`, `type`, `group`) VALUES $placeholders";
174        foreach ($groups as $group) {
175            $params[] = $pid;
176            $params[] = $type;
177            $params[] = $group;
178        }
179        $sql = rtrim($sql, ',');
180        $this->db->exec($sql, $params);
181    }
182
183    /**
184     * Log email domain, skip logging if no domain is found
185     *
186     * @param int $pid Id of access data row (foreign key)
187     * @param string $type The type of access to log ('view','edit')
188     * @param string $mail The email to extract the domain from
189     */
190    public function logDomain(int $pid, string $type, string $mail): void
191    {
192        if (!$pid) return;
193
194        $pos = strrpos($mail, '@');
195        if (!$pos) return;
196        $domain = substr($mail, $pos + 1);
197        if (empty($domain)) return;
198
199        $sql = "INSERT INTO domain (`pid`, `type`, `domain`) VALUES (?, ?, ?)";
200        $this->db->exec($sql, [$pid, $type, $domain]);
201    }
202
203    /**
204     * Log external search queries
205     *
206     * Will not write anything if the referer isn't a search engine
207     *
208     * @param string $referer The HTTP referer URL
209     * @param string $type Reference to the type variable that will be modified
210     */
211    public function logExternalSearch(string $referer, string &$type): void
212    {
213        global $INPUT;
214
215        $searchEngine = new SearchEngines($referer);
216
217        if (!$searchEngine->isSearchEngine()) {
218            return; // not a search engine
219        }
220
221        $type = 'search';
222        $query = $searchEngine->getQuery();
223
224        // log it!
225        $words = [];
226        if ($query) {
227            $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*'));
228        }
229        $this->logSearch($INPUT->str('p'), $searchEngine->getEngine(), $query, $words);
230    }
231
232    /**
233     * Log search data to the search related tables
234     *
235     * @param string $page The page being searched from
236     * @param string $engine The search engine name
237     * @param string|null $query The search query
238     * @param array|null $words Array of search words
239     */
240    public function logSearch(string $page, string $engine, ?string $query, ?array $words): void
241    {
242        $sid = $this->db->exec(
243            'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)',
244            $page, $query ?? '', $engine
245        );
246        if (!$sid) return;
247
248        foreach ($words as $word) {
249            if (!$word) continue;
250            $this->db->exec(
251                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
252                $sid, $word
253            );
254        }
255    }
256
257    /**
258     * Log that the session was seen
259     *
260     * This is used to calculate the time people spend on the whole site
261     * during their session
262     *
263     * Viewcounts are used for bounce calculation
264     *
265     * @param int $addview set to 1 to count a view
266     */
267    public function logSession(int $addview = 0): void
268    {
269        // only log browser sessions
270        if ($this->uaType != 'browser') return;
271
272        $session = $this->getSession();
273        $this->db->exec(
274            'INSERT OR REPLACE INTO session (
275                session, dt, end, views, uid
276             ) VALUES (
277                ?,
278                CURRENT_TIMESTAMP,
279                CURRENT_TIMESTAMP,
280                COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?),
281                ?
282             )',
283            $session, $session, $addview, $addview, $this->uid
284        );
285    }
286
287    /**
288     * Resolve IP to country/city and store in database
289     *
290     * @param string $ip The IP address to resolve
291     */
292    public function logIp(string $ip): void
293    {
294        // check if IP already known and up-to-date
295        $result = $this->db->queryValue(
296            "SELECT ip
297             FROM   iplocation
298             WHERE  ip = ?
299               AND  lastupd > date('now', '-30 days')",
300            $ip
301        );
302        if ($result) return;
303
304        $http = $this->httpClient ?: new DokuHTTPClient();
305        $http->timeout = 10;
306        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
307
308        if (!$json) return; // FIXME log error
309        try {
310            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
311        } catch (\JsonException $e) {
312            return; // FIXME log error
313        }
314        if (!isset($data['status']) || $data['status'] !== 'success') {
315            return; // FIXME log error
316        }
317
318        $host = gethostbyaddr($ip);
319        $this->db->exec(
320            'INSERT OR REPLACE INTO iplocation (
321                    ip, country, code, city, host, lastupd
322                 ) VALUES (
323                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
324                 )',
325            $ip, $data['country'], $data['countryCode'], $data['city'], $host
326        );
327    }
328
329    /**
330     * Log a click on an external link
331     *
332     * Called from log.php
333     */
334    public function logOutgoing(): void
335    {
336        global $INPUT;
337
338        if (!$INPUT->str('ol')) return;
339
340        $link = $INPUT->str('ol');
341        $link_md5 = md5($link);
342        $session = $this->getSession();
343        $page = $INPUT->str('p');
344
345        $this->db->exec(
346            'INSERT INTO outlinks (
347                dt, session, page, link_md5, link
348             ) VALUES (
349                CURRENT_TIMESTAMP, ?, ?, ?, ?
350             )',
351            $session, $page, $link_md5, $link
352        );
353    }
354
355    /**
356     * Log a page access
357     *
358     * Called from log.php
359     */
360    public function logAccess(): void
361    {
362        global $INPUT, $USERINFO;
363
364        if (!$INPUT->str('p')) return;
365
366        # FIXME check referer against blacklist and drop logging for bad boys
367
368        // handle referer
369        $referer = trim($INPUT->str('r'));
370        if ($referer) {
371            $ref = $referer;
372            $ref_md5 = md5($referer);
373            if (str_starts_with($referer, DOKU_URL)) {
374                $ref_type = 'internal';
375            } else {
376                $ref_type = 'external';
377                $this->logExternalSearch($referer, $ref_type);
378            }
379        } else {
380            $ref = '';
381            $ref_md5 = '';
382            $ref_type = '';
383        }
384
385        $page = $INPUT->str('p');
386        $ip = clientIP(true);
387        $sx = $INPUT->int('sx');
388        $sy = $INPUT->int('sy');
389        $vx = $INPUT->int('vx');
390        $vy = $INPUT->int('vy');
391        $js = $INPUT->int('js');
392        $user = $INPUT->server->str('REMOTE_USER');
393        $session = $this->getSession();
394
395        $accessId = $this->db->exec(
396            'INSERT INTO access (
397                dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type,
398                screen_x, screen_y, view_x, view_y, js, user, session, uid
399             ) VALUES (
400                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
401                ?, ?, ?, ?, ?, ?, ?, ?
402             )',
403            $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
404            $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid
405        );
406
407        if ($ref_md5) {
408            $this->db->exec(
409                'INSERT OR IGNORE INTO refseen (
410                    ref_md5, dt
411                 ) VALUES (
412                    ?, CURRENT_TIMESTAMP
413                 )',
414                $ref_md5
415            );
416        }
417
418        // log group access
419        if (isset($USERINFO['grps'])) {
420            $this->logGroups($accessId, 'view', $USERINFO['grps']);
421        }
422        // log email domain
423        if (!empty($USERINFO['mail'])) {
424            $this->logDomain($accessId, 'view', $USERINFO['mail']);
425        }
426
427        // resolve the IP
428        $this->logIp(clientIP(true));
429    }
430
431    /**
432     * Log access to a media file
433     *
434     * Called from action.php
435     *
436     * @param string $media The media ID
437     * @param string $mime The media's mime type
438     * @param bool $inline Is this displayed inline?
439     * @param int $size Size of the media file
440     */
441    public function logMedia(string $media, string $mime, bool $inline, int $size): void
442    {
443        global $INPUT;
444
445        [$mime1, $mime2] = explode('/', strtolower($mime));
446        $inline = $inline ? 1 : 0;
447        $size = (int)$size;
448
449        $ip = clientIP(true);
450        $user = $INPUT->server->str('REMOTE_USER');
451        $session = $this->getSession();
452
453        $this->db->exec(
454            'INSERT INTO media (
455                dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid,
456                size, mime1, mime2, inline
457             ) VALUES (
458                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
459                ?, ?, ?, ?
460             )',
461            $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
462            $user, $session, $this->uid, $size, $mime1, $mime2, $inline
463        );
464    }
465
466    /**
467     * Log page edits
468     *
469     * @param string $page The page that was edited
470     * @param string $type The type of edit (create, edit, etc.)
471     */
472    public function logEdit(string $page, string $type): void
473    {
474        global $INPUT, $USERINFO;
475
476        $ip = clientIP(true);
477        $user = $INPUT->server->str('REMOTE_USER');
478        $session = $this->getSession();
479
480        $editId = $this->db->exec(
481            'INSERT INTO edits (
482                dt, page, type, ip, user, session, uid
483             ) VALUES (
484                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?
485             )',
486            $page, $type, $ip, $user, $session, $this->uid
487        );
488
489        // log group access
490        if (isset($USERINFO['grps'])) {
491            $this->logGroups($editId, 'edit', $USERINFO['grps']);
492        }
493
494        // log email domain
495        if (!empty($USERINFO['mail'])) {
496            $this->logDomain($editId, 'edit', $USERINFO['mail']);
497        }
498    }
499
500    /**
501     * Log login/logoffs and user creations
502     *
503     * @param string $type The type of login event (login, logout, create)
504     * @param string $user The username (optional, will use current user if empty)
505     */
506    public function logLogin(string $type, string $user = ''): void
507    {
508        global $INPUT;
509
510        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
511
512        $ip = clientIP(true);
513        $session = $this->getSession();
514
515        $this->db->exec(
516            'INSERT INTO logins (
517                dt, type, ip, user, session, uid
518             ) VALUES (
519                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
520             )',
521            $type, $ip, $user, $session, $this->uid
522        );
523    }
524
525    /**
526     * Log the current page count and size as today's history entry
527     */
528    public function logHistoryPages(): void
529    {
530        global $conf;
531
532        // use the popularity plugin's search method to find the wanted data
533        /** @var helper_plugin_popularity $pop */
534        $pop = plugin_load('helper', 'popularity');
535        $list = $this->initEmptySearchList();
536        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
537        $page_count = $list['file_count'];
538        $page_size = $list['file_size'];
539
540        $this->db->exec(
541            'INSERT OR REPLACE INTO history (
542                info, value, dt
543             ) VALUES (
544                ?, ?, CURRENT_TIMESTAMP
545             )',
546            'page_count', $page_count
547        );
548        $this->db->exec(
549            'INSERT OR REPLACE INTO history (
550                info, value, dt
551             ) VALUES (
552                ?, ?, CURRENT_TIMESTAMP
553             )',
554            'page_size', $page_size
555        );
556    }
557
558    /**
559     * Log the current media count and size as today's history entry
560     */
561    public function logHistoryMedia(): void
562    {
563        global $conf;
564
565        // use the popularity plugin's search method to find the wanted data
566        /** @var helper_plugin_popularity $pop */
567        $pop = plugin_load('helper', 'popularity');
568        $list = $this->initEmptySearchList();
569        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
570        $media_count = $list['file_count'];
571        $media_size = $list['file_size'];
572
573        $this->db->exec(
574            'INSERT OR REPLACE INTO history (
575                info, value, dt
576             ) VALUES (
577                ?, ?, CURRENT_TIMESTAMP
578             )',
579            'media_count', $media_count
580        );
581        $this->db->exec(
582            'INSERT OR REPLACE INTO history (
583                info, value, dt
584             ) VALUES (
585                ?, ?, CURRENT_TIMESTAMP
586             )',
587            'media_size', $media_size
588        );
589    }
590
591    /**
592     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
593     * @return array
594     */
595    protected function initEmptySearchList()
596    {
597        return array_fill_keys([
598            'file_count',
599            'file_size',
600            'file_max',
601            'file_min',
602            'dir_count',
603            'dir_nest',
604            'file_oldest'
605        ], 0);
606    }
607}
608