xref: /plugin/statistics/Logger.php (revision 00f786d8543ecba8806a73e00cab7a6e9718571a)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\HTTP\DokuHTTPClient;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use dokuwiki\Utf8\Clean;
12use helper_plugin_popularity;
13use helper_plugin_statistics;
14
15
16class Logger
17{
18    /** @var helper_plugin_statistics The statistics helper plugin instance */
19    protected helper_plugin_statistics $hlp;
20
21    /** @var SQLiteDB The SQLite database instance */
22    protected SQLiteDB $db;
23
24    /** @var string The full user agent string */
25    protected string $uaAgent;
26
27    /** @var string The type of user agent (browser, robot, feedreader) */
28    protected string $uaType = 'browser';
29
30    /** @var string The browser/client name */
31    protected string $uaName;
32
33    /** @var string The browser/client version */
34    protected string $uaVersion;
35
36    /** @var string The operating system/platform */
37    protected string $uaPlatform;
38
39    /** @var string The unique user identifier */
40    protected string $uid;
41
42    /** @var DokuHTTPClient|null The HTTP client instance for testing */
43    protected ?DokuHTTPClient $httpClient = null;
44
45
46    /**
47     * Constructor
48     *
49     * Parses browser info and set internal vars
50     */
51    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
52    {
53        global $INPUT;
54
55        $this->hlp = $hlp;
56        $this->db = $this->hlp->getDB();
57        $this->httpClient = $httpClient;
58
59        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
60
61        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
62        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
63        $dd->discardBotInformation();
64        $dd->parse();
65
66        if ($dd->isFeedReader()) {
67            $this->uaType = 'feedreader';
68        } else if ($dd->isBot()) {
69            $this->uaType = 'robot';
70
71            // for now ignore bots
72            throw new \RuntimeException('Bot detected, not logging');
73        }
74
75        $this->uaAgent = $ua;
76        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
77        $this->uaVersion = $dd->getClient('version') ?: '0';
78        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
79        $this->uid = $this->getUID();
80
81
82        $this->logLastseen();
83    }
84
85    /**
86     * Should be called before logging
87     *
88     * This starts a transaction, so all logging is done in one go
89     */
90    public function begin(): void
91    {
92        $this->hlp->getDB()->getPdo()->beginTransaction();
93    }
94
95    /**
96     * Should be called after logging
97     *
98     * This commits the transaction started in begin()
99     */
100    public function end(): void
101    {
102        $this->hlp->getDB()->getPdo()->commit();
103    }
104
105    /**
106     * Get the unique user ID
107     *
108     * @return string The unique user identifier
109     */
110    protected function getUID(): string
111    {
112        global $INPUT;
113
114        $uid = $INPUT->str('uid');
115        if (!$uid) $uid = get_doku_pref('plgstats', false);
116        if (!$uid) $uid = session_id();
117        return $uid;
118    }
119
120    /**
121     * Return the user's session ID
122     *
123     * This is usually our own managed session, not a PHP session (only in fallback)
124     *
125     * @return string The session identifier
126     */
127    protected function getSession(): string
128    {
129        global $INPUT;
130
131        $ses = $INPUT->str('ses');
132        if (!$ses) $ses = get_doku_pref('plgstatsses', false);
133        if (!$ses) $ses = session_id();
134        return $ses;
135    }
136
137    /**
138     * Log that we've seen the user (authenticated only)
139     */
140    public function logLastseen(): void
141    {
142        global $INPUT;
143
144        if (empty($INPUT->server->str('REMOTE_USER'))) return;
145
146        $this->db->exec(
147            'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)',
148            $INPUT->server->str('REMOTE_USER'),
149        );
150    }
151
152    /**
153     * Log actions by groups
154     *
155     * @param string $type The type of access to log ('view','edit')
156     * @param array $groups The groups to log
157     */
158    public function logGroups(string $type, array $groups): void
159    {
160        if (!$groups) return;
161
162        $toLog = (array)$this->hlp->getConf('loggroups');
163        $groups = array_intersect($groups, $toLog);
164        if (!$groups) return;
165
166        $placeholders = join(',', array_fill(0, count($groups), '(?, ?)'));
167        $params = [];
168        $sql = "INSERT INTO groups (`type`, `group`) VALUES $placeholders";
169        foreach ($groups as $group) {
170            $params[] = $type;
171            $params[] = $group;
172        }
173        $sql = rtrim($sql, ',');
174        $this->db->exec($sql, $params);
175    }
176
177    /**
178     * Log external search queries
179     *
180     * Will not write anything if the referer isn't a search engine
181     *
182     * @param string $referer The HTTP referer URL
183     * @param string $type Reference to the type variable that will be modified
184     */
185    public function logExternalSearch(string $referer, string &$type): void
186    {
187        global $INPUT;
188
189        $searchEngine = new SearchEngines($referer);
190
191        if (!$searchEngine->isSearchEngine()) {
192            return; // not a search engine
193        }
194
195        $type = 'search';
196        $query = $searchEngine->getQuery();
197
198        // log it!
199        $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*'));
200        $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine());
201    }
202
203    /**
204     * Log search data to the search related tables
205     *
206     * @param string $page The page being searched from
207     * @param string $query The search query
208     * @param array $words Array of search words
209     * @param string $engine The search engine name
210     */
211    public function logSearch(string $page, string $query, array $words, string $engine): void
212    {
213        $sid = $this->db->exec(
214            'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)',
215            $page, $query, $engine
216        );
217        if (!$sid) return;
218
219        foreach ($words as $word) {
220            if (!$word) continue;
221            $this->db->exec(
222                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
223                $sid, $word
224            );
225        }
226    }
227
228    /**
229     * Log that the session was seen
230     *
231     * This is used to calculate the time people spend on the whole site
232     * during their session
233     *
234     * Viewcounts are used for bounce calculation
235     *
236     * @param int $addview set to 1 to count a view
237     */
238    public function logSession(int $addview = 0): void
239    {
240        // only log browser sessions
241        if ($this->uaType != 'browser') return;
242
243        $session = $this->getSession();
244        $this->db->exec(
245            'INSERT OR REPLACE INTO session (
246                session, dt, end, views, uid
247             ) VALUES (
248                ?,
249                CURRENT_TIMESTAMP,
250                CURRENT_TIMESTAMP,
251                COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?),
252                ?
253             )',
254            $session, $session, $addview, $addview, $this->uid
255        );
256    }
257
258    /**
259     * Resolve IP to country/city and store in database
260     *
261     * @param string $ip The IP address to resolve
262     */
263    public function logIp(string $ip): void
264    {
265        // check if IP already known and up-to-date
266        $result = $this->db->queryValue(
267            "SELECT ip
268             FROM   iplocation
269             WHERE  ip = ?
270               AND  lastupd > date('now', '-30 days')",
271            $ip
272        );
273        if ($result) return;
274
275        $http = $this->httpClient ?: new DokuHTTPClient();
276        $http->timeout = 10;
277        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
278
279        if (!$json) return; // FIXME log error
280        try {
281            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
282        } catch (\JsonException $e) {
283            return; // FIXME log error
284        }
285        if (!isset($data['status']) || $data['status'] !== 'success') {
286            return; // FIXME log error
287        }
288
289        $host = gethostbyaddr($ip);
290        $this->db->exec(
291            'INSERT OR REPLACE INTO iplocation (
292                    ip, country, code, city, host, lastupd
293                 ) VALUES (
294                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
295                 )',
296            $ip, $data['country'], $data['countryCode'], $data['city'], $host
297        );
298    }
299
300    /**
301     * Log a click on an external link
302     *
303     * Called from log.php
304     */
305    public function logOutgoing(): void
306    {
307        global $INPUT;
308
309        if (!$INPUT->str('ol')) return;
310
311        $link = $INPUT->str('ol');
312        $link_md5 = md5($link);
313        $session = $this->getSession();
314        $page = $INPUT->str('p');
315
316        $this->db->exec(
317            'INSERT INTO outlinks (
318                dt, session, page, link_md5, link
319             ) VALUES (
320                CURRENT_TIMESTAMP, ?, ?, ?, ?
321             )',
322            $session, $page, $link_md5, $link
323        );
324    }
325
326    /**
327     * Log a page access
328     *
329     * Called from log.php
330     */
331    public function logAccess(): void
332    {
333        global $INPUT, $USERINFO;
334
335        if (!$INPUT->str('p')) return;
336
337        # FIXME check referer against blacklist and drop logging for bad boys
338
339        // handle referer
340        $referer = trim($INPUT->str('r'));
341        if ($referer) {
342            $ref = $referer;
343            $ref_md5 = md5($referer);
344            if (str_starts_with($referer, DOKU_URL)) {
345                $ref_type = 'internal';
346            } else {
347                $ref_type = 'external';
348                $this->logExternalSearch($referer, $ref_type);
349            }
350        } else {
351            $ref = '';
352            $ref_md5 = '';
353            $ref_type = '';
354        }
355
356        $page = $INPUT->str('p');
357        $ip = clientIP(true);
358        $sx = $INPUT->int('sx');
359        $sy = $INPUT->int('sy');
360        $vx = $INPUT->int('vx');
361        $vy = $INPUT->int('vy');
362        $js = $INPUT->int('js');
363        $user = $INPUT->server->str('REMOTE_USER');
364        $session = $this->getSession();
365
366        $this->db->exec(
367            'INSERT INTO access (
368                dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type,
369                screen_x, screen_y, view_x, view_y, js, user, session, uid
370             ) VALUES (
371                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
372                ?, ?, ?, ?, ?, ?, ?, ?
373             )',
374            $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
375            $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid
376        );
377
378        if ($ref_md5) {
379            $this->db->exec(
380                'INSERT OR IGNORE INTO refseen (
381                    ref_md5, dt
382                 ) VALUES (
383                    ?, CURRENT_TIMESTAMP
384                 )',
385                $ref_md5
386            );
387        }
388
389        // log group access
390        if (isset($USERINFO['grps'])) {
391            $this->logGroups('view', $USERINFO['grps']);
392        }
393
394        // resolve the IP
395        $this->logIp(clientIP(true));
396    }
397
398    /**
399     * Log access to a media file
400     *
401     * Called from action.php
402     *
403     * @param string $media The media ID
404     * @param string $mime The media's mime type
405     * @param bool $inline Is this displayed inline?
406     * @param int $size Size of the media file
407     */
408    public function logMedia(string $media, string $mime, bool $inline, int $size): void
409    {
410        global $INPUT;
411
412        [$mime1, $mime2] = explode('/', strtolower($mime));
413        $inline = $inline ? 1 : 0;
414        $size = (int)$size;
415
416        $ip = clientIP(true);
417        $user = $INPUT->server->str('REMOTE_USER');
418        $session = $this->getSession();
419
420        $this->db->exec(
421            'INSERT INTO media (
422                dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid,
423                size, mime1, mime2, inline
424             ) VALUES (
425                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
426                ?, ?, ?, ?
427             )',
428            $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
429            $user, $session, $this->uid, $size, $mime1, $mime2, $inline
430        );
431    }
432
433    /**
434     * Log page edits
435     *
436     * @param string $page The page that was edited
437     * @param string $type The type of edit (create, edit, etc.)
438     */
439    public function logEdit(string $page, string $type): void
440    {
441        global $INPUT, $USERINFO;
442
443        $ip = clientIP(true);
444        $user = $INPUT->server->str('REMOTE_USER');
445        $session = $this->getSession();
446
447        $this->db->exec(
448            'INSERT INTO edits (
449                dt, page, type, ip, user, session, uid
450             ) VALUES (
451                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?
452             )',
453            $page, $type, $ip, $user, $session, $this->uid
454        );
455
456        // log group access
457        if (isset($USERINFO['grps'])) {
458            $this->logGroups('edit', $USERINFO['grps']);
459        }
460    }
461
462    /**
463     * Log login/logoffs and user creations
464     *
465     * @param string $type The type of login event (login, logout, create)
466     * @param string $user The username (optional, will use current user if empty)
467     */
468    public function logLogin(string $type, string $user = ''): void
469    {
470        global $INPUT;
471
472        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
473
474        $ip = clientIP(true);
475        $session = $this->getSession();
476
477        $this->db->exec(
478            'INSERT INTO logins (
479                dt, type, ip, user, session, uid
480             ) VALUES (
481                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
482             )',
483            $type, $ip, $user, $session, $this->uid
484        );
485    }
486
487    /**
488     * Log the current page count and size as today's history entry
489     */
490    public function logHistoryPages(): void
491    {
492        global $conf;
493
494        // use the popularity plugin's search method to find the wanted data
495        /** @var helper_plugin_popularity $pop */
496        $pop = plugin_load('helper', 'popularity');
497        $list = [];
498        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
499        $page_count = $list['file_count'];
500        $page_size = $list['file_size'];
501
502        $this->db->exec(
503            'INSERT OR REPLACE INTO history (
504                info, value, dt
505             ) VALUES (
506                ?, ?, CURRENT_TIMESTAMP
507             )',
508            'page_count', $page_count
509        );
510        $this->db->exec(
511            'INSERT OR REPLACE INTO history (
512                info, value, dt
513             ) VALUES (
514                ?, ?, CURRENT_TIMESTAMP
515             )',
516            'page_size', $page_size
517        );
518    }
519
520    /**
521     * Log the current media count and size as today's history entry
522     */
523    public function logHistoryMedia(): void
524    {
525        global $conf;
526
527        // use the popularity plugin's search method to find the wanted data
528        /** @var helper_plugin_popularity $pop */
529        $pop = plugin_load('helper', 'popularity');
530        $list = [];
531        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
532        $media_count = $list['file_count'];
533        $media_size = $list['file_size'];
534
535        $this->db->exec(
536            'INSERT OR REPLACE INTO history (
537                info, value, dt
538             ) VALUES (
539                ?, ?, CURRENT_TIMESTAMP
540             )',
541            'media_count', $media_count
542        );
543        $this->db->exec(
544            'INSERT OR REPLACE INTO history (
545                info, value, dt
546             ) VALUES (
547                ?, ?, CURRENT_TIMESTAMP
548             )',
549            'media_size', $media_size
550        );
551    }
552}
553