xref: /plugin/statistics/Logger.php (revision 2adee4c68974f633621c42f9308b229422ef36a9)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\HTTP\DokuHTTPClient;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use dokuwiki\Utf8\Clean;
12use helper_plugin_popularity;
13use helper_plugin_statistics;
14
15class Logger
16{
17    /** @var helper_plugin_statistics The statistics helper plugin instance */
18    protected helper_plugin_statistics $hlp;
19
20    /** @var SQLiteDB The SQLite database instance */
21    protected SQLiteDB $db;
22
23    /** @var string The full user agent string */
24    protected string $uaAgent;
25
26    /** @var string The type of user agent (browser, robot, feedreader) */
27    protected string $uaType = 'browser';
28
29    /** @var string The browser/client name */
30    protected string $uaName;
31
32    /** @var string The browser/client version */
33    protected string $uaVersion;
34
35    /** @var string The operating system/platform */
36    protected string $uaPlatform;
37
38    /** @var string The unique user identifier */
39    protected string $uid;
40
41    /** @var DokuHTTPClient|null The HTTP client instance for testing */
42    protected ?DokuHTTPClient $httpClient = null;
43
44
45    /**
46     * Constructor
47     *
48     * Parses browser info and set internal vars
49     */
50    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
51    {
52        global $INPUT;
53
54        $this->hlp = $hlp;
55        $this->db = $this->hlp->getDB();
56        $this->httpClient = $httpClient;
57
58        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
59
60        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
61        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
62        $dd->discardBotInformation();
63        $dd->parse();
64
65        if ($dd->isFeedReader()) {
66            $this->uaType = 'feedreader';
67        } elseif ($dd->isBot()) {
68            $this->uaType = 'robot';
69            // for now ignore bots
70            throw new \RuntimeException('Bot detected, not logging');
71        }
72
73        $this->uaAgent = $ua;
74        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
75        $this->uaVersion = $dd->getClient('version') ?: '0';
76        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
77        $this->uid = $this->getUID();
78
79
80        $this->logLastseen();
81    }
82
83    /**
84     * Should be called before logging
85     *
86     * This starts a transaction, so all logging is done in one go
87     */
88    public function begin(): void
89    {
90        $this->hlp->getDB()->getPdo()->beginTransaction();
91    }
92
93    /**
94     * Should be called after logging
95     *
96     * This commits the transaction started in begin()
97     */
98    public function end(): void
99    {
100        $this->hlp->getDB()->getPdo()->commit();
101    }
102
103    /**
104     * Get the unique user ID
105     *
106     * @return string The unique user identifier
107     */
108    protected function getUID(): string
109    {
110        global $INPUT;
111
112        $uid = $INPUT->str('uid');
113        if (!$uid) $uid = get_doku_pref('plgstats', false);
114        if (!$uid) $uid = session_id();
115        set_doku_pref('plgstats', $uid);
116        return $uid;
117    }
118
119    /**
120     * Return the user's session ID
121     *
122     * This is usually our own managed session, not a PHP session (only in fallback)
123     *
124     * @return string The session identifier
125     */
126    protected function getSession(): string
127    {
128        global $INPUT;
129
130        $ses = $INPUT->str('ses');
131        if (!$ses) $ses = get_doku_pref('plgstatsses', false);
132        if (!$ses) $ses = session_id();
133        set_doku_pref('plgstatsses', $ses);
134        return $ses;
135    }
136
137    /**
138     * Log that we've seen the user (authenticated only)
139     */
140    public function logLastseen(): void
141    {
142        global $INPUT;
143
144        if (empty($INPUT->server->str('REMOTE_USER'))) return;
145
146        $this->db->exec(
147            'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)',
148            $INPUT->server->str('REMOTE_USER'),
149        );
150    }
151
152    /**
153     * Log actions by groups
154     *
155     * @param int $pid Id of access data row (foreign key)
156     * @param string $type The type of access to log ('view','edit')
157     * @param array $groups The groups to log
158     */
159    public function logGroups(int $pid, string $type, array $groups): void
160    {
161        if ($groups === [] || !$pid) return;
162
163        $toLog = (array)$this->hlp->getConf('loggroups');
164
165        // if specific groups are configured, limit logging to them only
166        $groups = empty(array_filter($toLog)) ? $groups : array_intersect($groups, $toLog);
167        if (!$groups) return;
168
169        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?, ?)'));
170        $params = [];
171        $sql = "INSERT INTO groups (`pid`, `type`, `group`) VALUES $placeholders";
172        foreach ($groups as $group) {
173            $params[] = $pid;
174            $params[] = $type;
175            $params[] = $group;
176        }
177        $sql = rtrim($sql, ',');
178        $this->db->exec($sql, $params);
179    }
180
181    /**
182     * Log email domain, skip logging if no domain is found
183     *
184     * @param int $pid Id of access data row (foreign key)
185     * @param string $type The type of access to log ('view','edit')
186     * @param string $mail The email to extract the domain from
187     */
188    public function logDomain(int $pid, string $type, string $mail): void
189    {
190        if (!$pid) return;
191
192        $pos = strrpos($mail, '@');
193        if (!$pos) return;
194        $domain = substr($mail, $pos + 1);
195        if (empty($domain)) return;
196
197        $sql = "INSERT INTO domain (`pid`, `type`, `domain`) VALUES (?, ?, ?)";
198        $this->db->exec($sql, [$pid, $type, $domain]);
199    }
200
201    /**
202     * Log external search queries
203     *
204     * Will not write anything if the referer isn't a search engine
205     *
206     * @param string $referer The HTTP referer URL
207     * @param string $type Reference to the type variable that will be modified
208     */
209    public function logExternalSearch(string $referer, string &$type): void
210    {
211        global $INPUT;
212
213        $searchEngine = new SearchEngines($referer);
214
215        if (!$searchEngine->isSearchEngine()) {
216            return; // not a search engine
217        }
218
219        $type = 'search';
220        $query = $searchEngine->getQuery();
221
222        // log it!
223        $words = [];
224        if ($query) {
225            $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*'));
226        }
227        $this->logSearch($INPUT->str('p'), $searchEngine->getEngine(), $query, $words);
228    }
229
230    /**
231     * Log search data to the search related tables
232     *
233     * @param string $page The page being searched from
234     * @param string $engine The search engine name
235     * @param string|null $query The search query
236     * @param array|null $words Array of search words
237     */
238    public function logSearch(string $page, string $engine, ?string $query, ?array $words): void
239    {
240        $sid = $this->db->exec(
241            'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)',
242            $page,
243            $query ?? '',
244            $engine
245        );
246        if (!$sid) return;
247
248        foreach ($words as $word) {
249            if (!$word) continue;
250            $this->db->exec(
251                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
252                $sid,
253                $word
254            );
255        }
256    }
257
258    /**
259     * Log that the session was seen
260     *
261     * This is used to calculate the time people spend on the whole site
262     * during their session
263     *
264     * Viewcounts are used for bounce calculation
265     *
266     * @param int $addview set to 1 to count a view
267     */
268    public function logSession(int $addview = 0): void
269    {
270        // only log browser sessions
271        if ($this->uaType != 'browser') return;
272
273        $session = $this->getSession();
274        $this->db->exec(
275            'INSERT OR REPLACE INTO session (
276                session, dt, end, views, uid
277             ) VALUES (
278                ?,
279                CURRENT_TIMESTAMP,
280                CURRENT_TIMESTAMP,
281                COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?),
282                ?
283             )',
284            $session,
285            $session,
286            $addview,
287            $addview,
288            $this->uid
289        );
290    }
291
292    /**
293     * Resolve IP to country/city and store in database
294     *
295     * @param string $ip The IP address to resolve
296     */
297    public function logIp(string $ip): void
298    {
299        // check if IP already known and up-to-date
300        $result = $this->db->queryValue(
301            "SELECT ip
302             FROM   iplocation
303             WHERE  ip = ?
304               AND  lastupd > date('now', '-30 days')",
305            $ip
306        );
307        if ($result) return;
308
309        $http = $this->httpClient ?: new DokuHTTPClient();
310        $http->timeout = 10;
311        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
312
313        if (!$json) return; // FIXME log error
314        try {
315            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
316        } catch (\JsonException $e) {
317            return; // FIXME log error
318        }
319        if (!isset($data['status']) || $data['status'] !== 'success') {
320            return; // FIXME log error
321        }
322
323        $host = gethostbyaddr($ip);
324        $this->db->exec(
325            'INSERT OR REPLACE INTO iplocation (
326                    ip, country, code, city, host, lastupd
327                 ) VALUES (
328                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
329                 )',
330            $ip,
331            $data['country'],
332            $data['countryCode'],
333            $data['city'],
334            $host
335        );
336    }
337
338    /**
339     * Log a click on an external link
340     *
341     * Called from log.php
342     */
343    public function logOutgoing(): void
344    {
345        global $INPUT;
346
347        if (!$INPUT->str('ol')) return;
348
349        $link = $INPUT->str('ol');
350        $link_md5 = md5($link);
351        $session = $this->getSession();
352        $page = $INPUT->str('p');
353
354        $this->db->exec(
355            'INSERT INTO outlinks (
356                dt, session, page, link_md5, link
357             ) VALUES (
358                CURRENT_TIMESTAMP, ?, ?, ?, ?
359             )',
360            $session,
361            $page,
362            $link_md5,
363            $link
364        );
365    }
366
367    /**
368     * Log a page access
369     *
370     * Called from log.php
371     */
372    public function logAccess(): void
373    {
374        global $INPUT, $USERINFO;
375
376        if (!$INPUT->str('p')) return;
377
378        # FIXME check referer against blacklist and drop logging for bad boys
379
380        // handle referer
381        $referer = trim($INPUT->str('r'));
382        if ($referer) {
383            $ref = $referer;
384            $ref_md5 = md5($referer);
385            if (str_starts_with($referer, DOKU_URL)) {
386                $ref_type = 'internal';
387            } else {
388                $ref_type = 'external';
389                $this->logExternalSearch($referer, $ref_type);
390            }
391        } else {
392            $ref = '';
393            $ref_md5 = '';
394            $ref_type = '';
395        }
396
397        $page = $INPUT->str('p');
398        $ip = clientIP(true);
399        $sx = $INPUT->int('sx');
400        $sy = $INPUT->int('sy');
401        $vx = $INPUT->int('vx');
402        $vy = $INPUT->int('vy');
403        $js = $INPUT->int('js');
404        $user = $INPUT->server->str('REMOTE_USER');
405        $session = $this->getSession();
406
407        $accessId = $this->db->exec(
408            'INSERT INTO access (
409                dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type,
410                screen_x, screen_y, view_x, view_y, js, user, session, uid
411             ) VALUES (
412                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
413                ?, ?, ?, ?, ?, ?, ?, ?
414             )',
415            $page,
416            $ip,
417            $this->uaAgent,
418            $this->uaName,
419            $this->uaType,
420            $this->uaVersion,
421            $this->uaPlatform,
422            $ref,
423            $ref_md5,
424            $ref_type,
425            $sx,
426            $sy,
427            $vx,
428            $vy,
429            $js,
430            $user,
431            $session,
432            $this->uid
433        );
434
435        if ($ref_md5) {
436            $this->db->exec(
437                'INSERT OR IGNORE INTO refseen (
438                    ref_md5, dt
439                 ) VALUES (
440                    ?, CURRENT_TIMESTAMP
441                 )',
442                $ref_md5
443            );
444        }
445
446        // log group access
447        if (isset($USERINFO['grps'])) {
448            $this->logGroups($accessId, 'view', $USERINFO['grps']);
449        }
450        // log email domain
451        if (!empty($USERINFO['mail'])) {
452            $this->logDomain($accessId, 'view', $USERINFO['mail']);
453        }
454
455        // resolve the IP
456        $this->logIp(clientIP(true));
457    }
458
459    /**
460     * Log access to a media file
461     *
462     * Called from action.php
463     *
464     * @param string $media The media ID
465     * @param string $mime The media's mime type
466     * @param bool $inline Is this displayed inline?
467     * @param int $size Size of the media file
468     */
469    public function logMedia(string $media, string $mime, bool $inline, int $size): void
470    {
471        global $INPUT;
472
473        [$mime1, $mime2] = explode('/', strtolower($mime));
474        $inline = $inline ? 1 : 0;
475
476        $ip = clientIP(true);
477        $user = $INPUT->server->str('REMOTE_USER');
478        $session = $this->getSession();
479
480        $this->db->exec(
481            'INSERT INTO media (
482                dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid,
483                size, mime1, mime2, inline
484             ) VALUES (
485                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
486                ?, ?, ?, ?
487             )',
488            $media,
489            $ip,
490            $this->uaAgent,
491            $this->uaName,
492            $this->uaType,
493            $this->uaVersion,
494            $this->uaPlatform,
495            $user,
496            $session,
497            $this->uid,
498            $size,
499            $mime1,
500            $mime2,
501            $inline
502        );
503    }
504
505    /**
506     * Log page edits
507     *
508     * @param string $page The page that was edited
509     * @param string $type The type of edit (create, edit, etc.)
510     */
511    public function logEdit(string $page, string $type): void
512    {
513        global $INPUT, $USERINFO;
514
515        $ip = clientIP(true);
516        $user = $INPUT->server->str('REMOTE_USER');
517        $session = $this->getSession();
518
519        $editId = $this->db->exec(
520            'INSERT INTO edits (
521                dt, page, type, ip, user, session, uid
522             ) VALUES (
523                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?
524             )',
525            $page,
526            $type,
527            $ip,
528            $user,
529            $session,
530            $this->uid
531        );
532
533        // log group access
534        if (isset($USERINFO['grps'])) {
535            $this->logGroups($editId, 'edit', $USERINFO['grps']);
536        }
537
538        // log email domain
539        if (!empty($USERINFO['mail'])) {
540            $this->logDomain($editId, 'edit', $USERINFO['mail']);
541        }
542    }
543
544    /**
545     * Log login/logoffs and user creations
546     *
547     * @param string $type The type of login event (login, logout, create)
548     * @param string $user The username (optional, will use current user if empty)
549     */
550    public function logLogin(string $type, string $user = ''): void
551    {
552        global $INPUT;
553
554        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
555
556        $ip = clientIP(true);
557        $session = $this->getSession();
558
559        $this->db->exec(
560            'INSERT INTO logins (
561                dt, type, ip, user, session, uid
562             ) VALUES (
563                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
564             )',
565            $type,
566            $ip,
567            $user,
568            $session,
569            $this->uid
570        );
571    }
572
573    /**
574     * Log the current page count and size as today's history entry
575     */
576    public function logHistoryPages(): void
577    {
578        global $conf;
579
580        // use the popularity plugin's search method to find the wanted data
581        /** @var helper_plugin_popularity $pop */
582        $pop = plugin_load('helper', 'popularity');
583        $list = $this->initEmptySearchList();
584        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
585        $page_count = $list['file_count'];
586        $page_size = $list['file_size'];
587
588        $this->db->exec(
589            'INSERT OR REPLACE INTO history (
590                info, value, dt
591             ) VALUES (
592                ?, ?, CURRENT_TIMESTAMP
593             )',
594            'page_count',
595            $page_count
596        );
597        $this->db->exec(
598            'INSERT OR REPLACE INTO history (
599                info, value, dt
600             ) VALUES (
601                ?, ?, CURRENT_TIMESTAMP
602             )',
603            'page_size',
604            $page_size
605        );
606    }
607
608    /**
609     * Log the current media count and size as today's history entry
610     */
611    public function logHistoryMedia(): void
612    {
613        global $conf;
614
615        // use the popularity plugin's search method to find the wanted data
616        /** @var helper_plugin_popularity $pop */
617        $pop = plugin_load('helper', 'popularity');
618        $list = $this->initEmptySearchList();
619        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
620        $media_count = $list['file_count'];
621        $media_size = $list['file_size'];
622
623        $this->db->exec(
624            'INSERT OR REPLACE INTO history (
625                info, value, dt
626             ) VALUES (
627                ?, ?, CURRENT_TIMESTAMP
628             )',
629            'media_count',
630            $media_count
631        );
632        $this->db->exec(
633            'INSERT OR REPLACE INTO history (
634                info, value, dt
635             ) VALUES (
636                ?, ?, CURRENT_TIMESTAMP
637             )',
638            'media_size',
639            $media_size
640        );
641    }
642
643    /**
644     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
645     * @return array
646     */
647    protected function initEmptySearchList()
648    {
649        return array_fill_keys([
650            'file_count',
651            'file_size',
652            'file_max',
653            'file_min',
654            'dir_count',
655            'dir_nest',
656            'file_oldest'
657        ], 0);
658    }
659}
660