xref: /plugin/statistics/Logger.php (revision 04928db47f79a2117caaf6d9ca4114fef94cb8f2)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\HTTP\DokuHTTPClient;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use helper_plugin_popularity;
12use helper_plugin_statistics;
13
14class Logger
15{
16    /** @var helper_plugin_statistics The statistics helper plugin instance */
17    protected helper_plugin_statistics $hlp;
18
19    /** @var SQLiteDB The SQLite database instance */
20    protected SQLiteDB $db;
21
22    /** @var string The full user agent string */
23    protected string $uaAgent;
24
25    /** @var string The type of user agent (browser, robot, feedreader) */
26    protected string $uaType = 'browser';
27
28    /** @var string The browser/client name */
29    protected string $uaName;
30
31    /** @var string The browser/client version */
32    protected string $uaVersion;
33
34    /** @var string The operating system/platform */
35    protected string $uaPlatform;
36
37    /** @var string|null The user name, if available */
38    protected ?string $user = null;
39
40    /** @var string The unique user identifier */
41    protected string $uid;
42
43    /** @var string The session identifier */
44    protected string $session;
45
46    /** @var int|null The ID of the main access log entry if any */
47    protected ?int $hit = null;
48
49    /** @var DokuHTTPClient|null The HTTP client instance for testing */
50    protected ?DokuHTTPClient $httpClient = null;
51
52    // region lifecycle
53
54    /**
55     * Constructor
56     *
57     * Parses browser info and set internal vars
58     */
59    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
60    {
61        global $INPUT;
62
63        $this->hlp = $hlp;
64        $this->db = $this->hlp->getDB();
65        $this->httpClient = $httpClient;
66
67        // FIXME if we already have a session, we should not re-parse the user agent
68
69        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
70        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
71        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
72        $dd->discardBotInformation();
73        $dd->parse();
74
75        if ($dd->isFeedReader()) {
76            $this->uaType = 'feedreader';
77        } elseif ($dd->isBot()) {
78            $this->uaType = 'robot';
79            // for now ignore bots
80            throw new IgnoreException('Bot detected, not logging');
81        }
82
83        $this->uaAgent = $ua;
84        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
85        $this->uaVersion = $dd->getClient('version') ?: '0';
86        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
87        $this->uid = $this->getUID();
88        $this->session = $this->getSession();
89        $this->user = $INPUT->server->str('REMOTE_USER') ?: null;
90    }
91
92    /**
93     * Should be called before logging
94     *
95     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
96     */
97    public function begin(): void
98    {
99        $this->hlp->getDB()->getPdo()->beginTransaction();
100
101        $this->logUser();
102        $this->logGroups();
103        $this->logDomain();
104        $this->logSession();
105    }
106
107    /**
108     * Should be called after logging
109     *
110     * This commits the transaction started in begin()
111     */
112    public function end(): void
113    {
114        $this->hlp->getDB()->getPdo()->commit();
115    }
116
117    // endregion
118    // region data gathering
119
120    /**
121     * Get the unique user ID
122     *
123     * The user ID is stored in the user preferences and should stay there forever.
124     * @return string The unique user identifier
125     */
126    protected function getUID(): string
127    {
128        if(!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) {
129            // when there is no session UID set, we assume this was deliberate and we simply abort all logging
130            // @todo we may later make UID generation optional
131            throw new IgnoreException('No user ID found');
132        }
133
134        return $_SESSION[DOKU_COOKIE]['statistics']['uid'];
135    }
136
137    /**
138     * Return the user's session ID
139     *
140     * @return string The session identifier
141     */
142    protected function getSession(): string
143    {
144        if(!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) {
145            // when there is no session ID set, we assume this was deliberate and we simply abort all logging
146            throw new IgnoreException('No session ID found');
147        }
148
149        return $_SESSION[DOKU_COOKIE]['statistics']['id'];
150    }
151
152    // endregion
153    // region automatic logging
154
155    /**
156     * Log the user was seen
157     */
158    protected function logUser(): void
159    {
160        if (!$this->user) return;
161
162        $this->db->exec(
163            'INSERT INTO users (user, dt)
164                  VALUES (?, CURRENT_TIMESTAMP)
165            ON CONFLICT (user) DO UPDATE SET
166                         dt = CURRENT_TIMESTAMP
167                   WHERE excluded.user = users.user
168            ',
169            $this->user
170        );
171
172    }
173
174    /**
175     * Log the session and user agent information
176     */
177    protected function logSession(): void
178    {
179        $this->db->exec(
180            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
181                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
182             ON CONFLICT (session) DO UPDATE SET
183                         end = CURRENT_TIMESTAMP
184                   WHERE excluded.session = sessions.session
185             ',
186            $this->session,
187            $this->uid,
188            $this->user,
189            $this->uaAgent,
190            $this->uaName,
191            $this->uaType,
192            $this->uaVersion,
193            $this->uaPlatform
194        );
195    }
196
197    /**
198     * Log all groups for the user
199     *
200     * @todo maybe this should be done only once per session?
201     */
202    protected function logGroups(): void
203    {
204        global $USERINFO;
205
206        if (!$this->user) return;
207        if (!isset($USERINFO['grps'])) return;
208        if (!is_array($USERINFO['grps'])) return;
209        $groups = $USERINFO['grps'];
210
211        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
212
213        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
214        $params = [];
215        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
216        foreach ($groups as $group) {
217            $params[] = $this->user;
218            $params[] = $group;
219        }
220        $this->db->exec($sql, $params);
221    }
222
223    /**
224     * Log email domain
225     *
226     * @todo maybe this should be done only once per session?
227     */
228    protected function logDomain(): void
229    {
230        global $USERINFO;
231        if (!$this->user) return;
232        if (!isset($USERINFO['mail'])) return;
233        $mail = $USERINFO['mail'];
234
235        $pos = strrpos($mail, '@');
236        if (!$pos) return;
237        $domain = substr($mail, $pos + 1);
238        if (empty($domain)) return;
239
240        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
241        $this->db->exec($sql, [$domain, $this->user]);
242    }
243
244    // endregion
245    // region internal loggers called by the dispatchers
246
247    /**
248     * Log the given referer URL
249     *
250     * @param $referer
251     * @return int|null The referer ID or null if no referer was given
252     */
253    public function logReferer($referer): ?int
254    {
255        if (!$referer) return null;
256
257        // FIXME we could check against a blacklist here
258
259        $se = new SearchEngines($referer);
260        $type = $se->isSearchEngine() ? 'search' : 'external';
261
262        $sql = 'INSERT OR IGNORE INTO referers (url, type, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
263        return $this->db->exec($sql, [$referer, $type]); // returns ID even if the insert was ignored
264    }
265
266    /**
267     * Resolve IP to country/city and store in database
268     *
269     * @return string The IP address as stored
270     */
271    public function logIp(): string
272    {
273        $ip = clientIP(true);
274        $hash = $ip; // @todo we could anonymize here
275
276        // check if IP already known and up-to-date
277        $result = $this->db->queryValue(
278            "SELECT ip
279             FROM   iplocation
280             WHERE  ip = ?
281               AND  lastupd > date('now', '-30 days')",
282            $hash
283        );
284        if ($result) return $hash; // already known and up-to-date
285
286        $http = $this->httpClient ?: new DokuHTTPClient();
287        $http->timeout = 7;
288        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
289
290        if (!$json) {
291            \dokuwiki\Logger::error('Statistics Plugin - Failed talk to ip-api.com.');
292            return $hash;
293        }
294        try {
295            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
296        } catch (\JsonException $e) {
297            \dokuwiki\Logger::error('Statistics Plugin - Failed to decode JSON from ip-api.com.', $e);
298            return $hash;
299        }
300        if (!isset($data['status'])) {
301            \dokuwiki\Logger::error('Statistics Plugin - Invalid ip-api.com result' . $ip, $data);
302            return $hash;
303        };
304
305        // we do not check for 'success' status here. when the API can't resolve the IP we still log it
306        // without location data, so we won't re-query it in the next 30 days.
307
308        $host = gethostbyaddr($ip); // @todo if we anonymize the IP, we should not do this
309        $this->db->exec(
310            'INSERT OR REPLACE INTO iplocation (
311                    ip, country, code, city, host, lastupd
312                 ) VALUES (
313                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
314                 )',
315            $hash,
316            $data['country'] ?? '',
317            $data['countryCode'] ?? '',
318            $data['city'] ?? '',
319            $host
320        );
321
322        return $hash;
323    }
324
325    // endregion
326    // region log dispatchers
327
328    public function logPageView(): void
329    {
330        global $INPUT;
331
332        if (!$INPUT->str('p')) return;
333
334
335        $referer = $INPUT->filter('trim')->str('r');
336        $ip = $this->logIp(); // resolve the IP address
337
338        $data = [
339            'page' => $INPUT->filter('cleanID')->str('p'),
340            'ip' => $ip,
341            'ref_id' => $this->logReferer($referer),
342            'sx' => $INPUT->int('sx'),
343            'sy' => $INPUT->int('sy'),
344            'vx' => $INPUT->int('vx'),
345            'vy' => $INPUT->int('vy'),
346            'session' => $this->session,
347        ];
348
349        $this->db->exec('
350        INSERT INTO pageviews (
351            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
352        ) VALUES (
353            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
354        )
355        ',
356            $data
357        );
358    }
359
360    /**
361     * Log a click on an external link
362     *
363     * Called from log.php
364     */
365    public function logOutgoing(): void
366    {
367        global $INPUT;
368
369        if (!$INPUT->str('ol')) return;
370
371        $link = $INPUT->filter('trim')->str('ol');
372        $session = $this->session;
373        $page = $INPUT->filter('cleanID')->str('p');
374
375        $this->db->exec(
376            'INSERT INTO outlinks (
377                dt, session, page, link
378             ) VALUES (
379                CURRENT_TIMESTAMP, ?, ?, ?, ?
380             )',
381            $session,
382            $page,
383            $link
384        );
385    }
386
387    /**
388     * Log access to a media file
389     *
390     * Called from action.php
391     *
392     * @param string $media The media ID
393     * @param string $mime The media's mime type
394     * @param bool $inline Is this displayed inline?
395     * @param int $size Size of the media file
396     */
397    public function logMedia(string $media, string $mime, bool $inline, int $size): void
398    {
399        [$mime1, $mime2] = explode('/', strtolower($mime));
400        $inline = $inline ? 1 : 0;
401
402
403        $data = [
404            'media' => cleanID($media),
405            'ip' => $this->logIp(), // resolve the IP address
406            'session' => $this->session,
407            'size' => $size,
408            'mime1' => $mime1,
409            'mime2' => $mime2,
410            'inline' => $inline,
411        ];
412
413        $this->db->exec('
414                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
415                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
416            ',
417            $data
418        );
419    }
420
421    /**
422     * Log page edits
423     *
424     * called from action.php
425     *
426     * @param string $page The page that was edited
427     * @param string $type The type of edit (create, edit, etc.)
428     */
429    public function logEdit(string $page, string $type): void
430    {
431        $data = [
432            'page' => cleanID($page),
433            'type' => $type,
434            'ip' => $this->logIp(), // resolve the IP address
435            'session' => $this->session
436        ];
437
438        $editId = $this->db->exec(
439            'INSERT INTO edits (
440                dt, page, type, ip, session
441             ) VALUES (
442                CURRENT_TIMESTAMP, :page, :type, :ip, :session
443             )',
444            $data
445        );
446    }
447
448    /**
449     * Log login/logoffs and user creations
450     *
451     * @param string $type The type of login event (login, logout, create)
452     * @param string $user The username (optional, will use current user if empty)
453     * @fixme this is still broken, I need to figure out the session handling first
454     */
455    public function logLogin(string $type, string $user = ''): void
456    {
457        global $INPUT;
458
459        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
460
461        $ip = clientIP(true);
462        $session = $this->session;
463
464        $this->db->exec(
465            'INSERT INTO logins (
466                dt, type, ip, session
467             ) VALUES (
468                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
469             )',
470            $type,
471            $ip,
472            $user,
473            $session,
474            $this->uid
475        );
476    }
477
478    /**
479     * Log search data to the search related tables
480     *
481     * @param string $query The search query
482     * @param string[] $words The query split into words
483     */
484    public function logSearch(string $query, array $words): void
485    {
486        if (!$query) return;
487
488        $sid = $this->db->exec(
489            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
490            $this->logIp(), // resolve the IP address
491            $this->session,
492            $query,
493        );
494
495        foreach ($words as $word) {
496            if (!$word) continue;
497            $this->db->exec(
498                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
499                $sid,
500                $word
501            );
502        }
503    }
504
505    /**
506     * Log the current page count and size as today's history entry
507     */
508    public function logHistoryPages(): void
509    {
510        global $conf;
511
512        // use the popularity plugin's search method to find the wanted data
513        /** @var helper_plugin_popularity $pop */
514        $pop = plugin_load('helper', 'popularity');
515        $list = $this->initEmptySearchList();
516        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
517        $page_count = $list['file_count'];
518        $page_size = $list['file_size'];
519
520        $this->db->exec(
521            'INSERT OR REPLACE INTO history (
522                info, value, dt
523             ) VALUES (
524                ?, ?, CURRENT_TIMESTAMP
525             )',
526            'page_count',
527            $page_count
528        );
529        $this->db->exec(
530            'INSERT OR REPLACE INTO history (
531                info, value, dt
532             ) VALUES (
533                ?, ?, CURRENT_TIMESTAMP
534             )',
535            'page_size',
536            $page_size
537        );
538    }
539
540    /**
541     * Log the current media count and size as today's history entry
542     */
543    public function logHistoryMedia(): void
544    {
545        global $conf;
546
547        // use the popularity plugin's search method to find the wanted data
548        /** @var helper_plugin_popularity $pop */
549        $pop = plugin_load('helper', 'popularity');
550        $list = $this->initEmptySearchList();
551        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
552        $media_count = $list['file_count'];
553        $media_size = $list['file_size'];
554
555        $this->db->exec(
556            'INSERT OR REPLACE INTO history (
557                info, value, dt
558             ) VALUES (
559                ?, ?, CURRENT_TIMESTAMP
560             )',
561            'media_count',
562            $media_count
563        );
564        $this->db->exec(
565            'INSERT OR REPLACE INTO history (
566                info, value, dt
567             ) VALUES (
568                ?, ?, CURRENT_TIMESTAMP
569             )',
570            'media_size',
571            $media_size
572        );
573    }
574
575    // endregion
576
577    /**
578     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
579     * @return array
580     */
581    protected function initEmptySearchList()
582    {
583        return array_fill_keys([
584            'file_count',
585            'file_size',
586            'file_max',
587            'file_min',
588            'dir_count',
589            'dir_nest',
590            'file_oldest'
591        ], 0);
592    }
593}
594