xref: /plugin/statistics/Logger.php (revision 02aa9b73ad7fe7ccc600c393ae1cf861c37c9024)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\ErrorHandler;
10use dokuwiki\HTTP\DokuHTTPClient;
11use dokuwiki\plugin\sqlite\SQLiteDB;
12use dokuwiki\Utf8\Clean;
13use helper_plugin_popularity;
14use helper_plugin_statistics;
15
16class Logger
17{
18    /** @var helper_plugin_statistics The statistics helper plugin instance */
19    protected helper_plugin_statistics $hlp;
20
21    /** @var SQLiteDB The SQLite database instance */
22    protected SQLiteDB $db;
23
24    /** @var string The full user agent string */
25    protected string $uaAgent;
26
27    /** @var string The type of user agent (browser, robot, feedreader) */
28    protected string $uaType = 'browser';
29
30    /** @var string The browser/client name */
31    protected string $uaName;
32
33    /** @var string The browser/client version */
34    protected string $uaVersion;
35
36    /** @var string The operating system/platform */
37    protected string $uaPlatform;
38
39    /** @var string|null The user name, if available */
40    protected ?string $user = null;
41
42    /** @var string The unique user identifier */
43    protected string $uid;
44
45    /** @var string The session identifier */
46    protected string $session;
47
48    /** @var int|null The ID of the main access log entry if any */
49    protected ?int $hit = null;
50
51    /** @var DokuHTTPClient|null The HTTP client instance for testing */
52    protected ?DokuHTTPClient $httpClient = null;
53
54    // region lifecycle
55
56    /**
57     * Constructor
58     *
59     * Parses browser info and set internal vars
60     */
61    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
62    {
63        global $INPUT;
64
65        $this->hlp = $hlp;
66        $this->db = $this->hlp->getDB();
67        $this->httpClient = $httpClient;
68
69        // FIXME if we already have a session, we should not re-parse the user agent
70
71        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
72        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
73        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
74        $dd->discardBotInformation();
75        $dd->parse();
76
77        if ($dd->isFeedReader()) {
78            $this->uaType = 'feedreader';
79        } elseif ($dd->isBot()) {
80            $this->uaType = 'robot';
81            // for now ignore bots
82            throw new IgnoreException('Bot detected, not logging');
83        }
84
85        $this->uaAgent = $ua;
86        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
87        $this->uaVersion = $dd->getClient('version') ?: '0';
88        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
89        $this->uid = $this->getUID();
90        $this->session = $this->getSession();
91        $this->user = $INPUT->server->str('REMOTE_USER') ?: null;
92    }
93
94    /**
95     * Should be called before logging
96     *
97     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
98     */
99    public function begin(): void
100    {
101        $this->hlp->getDB()->getPdo()->beginTransaction();
102
103        $this->logUser();
104        $this->logGroups();
105        $this->logDomain();
106        $this->logSession();
107    }
108
109    /**
110     * Should be called after logging
111     *
112     * This commits the transaction started in begin()
113     */
114    public function end(): void
115    {
116        $this->hlp->getDB()->getPdo()->commit();
117    }
118
119    // endregion
120    // region data gathering
121
122    /**
123     * Get the unique user ID
124     *
125     * @return string The unique user identifier
126     */
127    protected function getUID(): string
128    {
129        global $INPUT;
130
131        $uid = $INPUT->str('uid');
132        if (!$uid) $uid = get_doku_pref('plgstats', false);
133        if (!$uid) $uid = session_id();
134        set_doku_pref('plgstats', $uid);
135        return $uid;
136    }
137
138    /**
139     * Return the user's session ID
140     *
141     * This is usually our own managed session, not a PHP session (only in fallback)
142     *
143     * @return string The session identifier
144     */
145    protected function getSession(): string
146    {
147        global $INPUT;
148
149
150
151
152        // FIXME session setting needs work. It should be reset on user change, maybe we do rely on the PHP session?
153        // We also want to store the user agent in the session table, so this needs also change the session ID
154        $ses = $INPUT->str('ses');
155        if (!$ses) $ses = get_doku_pref('plgstatsses', false);
156        if (!$ses) $ses = session_id();
157        set_doku_pref('plgstatsses', $ses);
158        return $ses;
159    }
160
161    // endregion
162    // region automatic logging
163
164    /**
165     * Log the user was seen
166     */
167    protected function logUser(): void
168    {
169        if (!$this->user) return;
170
171        $this->db->exec(
172            'INSERT INTO users (user, dt)
173                  VALUES (?, CURRENT_TIMESTAMP)
174            ON CONFLICT (user) DO UPDATE SET
175                         dt = CURRENT_TIMESTAMP
176                   WHERE excluded.user = users.user
177            ',
178            $this->user
179        );
180
181    }
182
183    /**
184     * Log the session and user agent information
185     */
186    protected function logSession(): void
187    {
188        $this->db->exec(
189            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
190                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
191             ON CONFLICT (session) DO UPDATE SET
192                         end = CURRENT_TIMESTAMP
193                   WHERE excluded.session = sessions.session
194             ',
195            $this->session,
196            $this->uid,
197            $this->user,
198            $this->uaAgent,
199            $this->uaName,
200            $this->uaType,
201            $this->uaVersion,
202            $this->uaPlatform
203        );
204    }
205
206    /**
207     * Log all groups for the user
208     *
209     * @todo maybe this should be done only once per session?
210     */
211    protected function logGroups(): void
212    {
213        global $USERINFO;
214
215        if (!$this->user) return;
216        if (!isset($USERINFO['grps'])) return;
217        if (!is_array($USERINFO['grps'])) return;
218        $groups = $USERINFO['grps'];
219
220        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
221
222        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
223        $params = [];
224        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
225        foreach ($groups as $group) {
226            $params[] = $this->user;
227            $params[] = $group;
228        }
229        $this->db->exec($sql, $params);
230    }
231
232    /**
233     * Log email domain
234     *
235     * @todo maybe this should be done only once per session?
236     */
237    protected function logDomain(): void
238    {
239        global $USERINFO;
240        if (!$this->user) return;
241        if (!isset($USERINFO['mail'])) return;
242        $mail = $USERINFO['mail'];
243
244        $pos = strrpos($mail, '@');
245        if (!$pos) return;
246        $domain = substr($mail, $pos + 1);
247        if (empty($domain)) return;
248
249        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
250        $this->db->exec($sql, [$domain, $this->user]);
251    }
252
253    // endregion
254    // region internal loggers called by the dispatchers
255
256    /**
257     * Log the given referer URL
258     *
259     * @param $referer
260     * @return int|null The referer ID or null if no referer was given
261     */
262    public function logReferer($referer): ?int
263    {
264        if (!$referer) return null;
265
266        // FIXME we could check against a blacklist here
267
268        $se = new SearchEngines($referer);
269        $type = $se->isSearchEngine() ? 'search' : 'external';
270
271        $sql = 'INSERT OR IGNORE INTO referers (url, type, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
272        return $this->db->exec($sql, [$referer, $type]); // returns ID even if the insert was ignored
273    }
274
275    /**
276     * Resolve IP to country/city and store in database
277     *
278     * @return string The IP address as stored
279     */
280    public function logIp(): string
281    {
282        $ip = clientIP(true);
283        $hash = $ip; // @todo we could anonymize here
284
285        // check if IP already known and up-to-date
286        $result = $this->db->queryValue(
287            "SELECT ip
288             FROM   iplocation
289             WHERE  ip = ?
290               AND  lastupd > date('now', '-30 days')",
291            $hash
292        );
293        if ($result) return $hash; // already known and up-to-date
294
295        $http = $this->httpClient ?: new DokuHTTPClient();
296        $http->timeout = 7;
297        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
298
299        if (!$json) {
300            \dokuwiki\Logger::error('Statistics Plugin - Failed talk to ip-api.com.');
301            return $hash;
302        }
303        try {
304            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
305        } catch (\JsonException $e) {
306            \dokuwiki\Logger::error('Statistics Plugin - Failed to decode JSON from ip-api.com.', $e);
307            return $hash;
308        }
309        if (!isset($data['status'])) {
310            \dokuwiki\Logger::error('Statistics Plugin - Invalid ip-api.com result' . $ip, $data);
311            return $hash;
312        };
313
314        // we do not check for 'success' status here. when the API can't resolve the IP we still log it
315        // without location data, so we won't re-query it in the next 30 days.
316
317        $host = gethostbyaddr($ip); // @todo if we anonymize the IP, we should not do this
318        $this->db->exec(
319            'INSERT OR REPLACE INTO iplocation (
320                    ip, country, code, city, host, lastupd
321                 ) VALUES (
322                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
323                 )',
324            $hash,
325            $data['country'] ?? '',
326            $data['countryCode'] ?? '',
327            $data['city'] ?? '',
328            $host
329        );
330
331        return $hash;
332    }
333
334    // endregion
335    // region log dispatchers
336
337    public function logPageView(): void
338    {
339        global $INPUT;
340
341        if (!$INPUT->str('p')) return;
342
343
344        $referer = $INPUT->filter('trim')->str('r');
345        $ip = $this->logIp(); // resolve the IP address
346
347        $data = [
348            'page' => $INPUT->filter('cleanID')->str('p'),
349            'ip' => $ip,
350            'ref_id' => $this->logReferer($referer),
351            'sx' => $INPUT->int('sx'),
352            'sy' => $INPUT->int('sy'),
353            'vx' => $INPUT->int('vx'),
354            'vy' => $INPUT->int('vy'),
355            'session' => $this->session,
356        ];
357
358        $this->db->exec('
359        INSERT INTO pageviews (
360            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
361        ) VALUES (
362            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
363        )
364        ',
365            $data
366        );
367    }
368
369    /**
370     * Log a click on an external link
371     *
372     * Called from log.php
373     */
374    public function logOutgoing(): void
375    {
376        global $INPUT;
377
378        if (!$INPUT->str('ol')) return;
379
380        $link = $INPUT->filter('trim')->str('ol');
381        $session = $this->session;
382        $page = $INPUT->filter('cleanID')->str('p');
383
384        $this->db->exec(
385            'INSERT INTO outlinks (
386                dt, session, page, link
387             ) VALUES (
388                CURRENT_TIMESTAMP, ?, ?, ?, ?
389             )',
390            $session,
391            $page,
392            $link
393        );
394    }
395
396    /**
397     * Log access to a media file
398     *
399     * Called from action.php
400     *
401     * @param string $media The media ID
402     * @param string $mime The media's mime type
403     * @param bool $inline Is this displayed inline?
404     * @param int $size Size of the media file
405     */
406    public function logMedia(string $media, string $mime, bool $inline, int $size): void
407    {
408        [$mime1, $mime2] = explode('/', strtolower($mime));
409        $inline = $inline ? 1 : 0;
410
411
412        $data = [
413            'media' => cleanID($media),
414            'ip' => $this->logIp(), // resolve the IP address
415            'session' => $this->session,
416            'size' => $size,
417            'mime1' => $mime1,
418            'mime2' => $mime2,
419            'inline' => $inline,
420        ];
421
422        $this->db->exec('
423                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
424                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
425            ',
426            $data
427        );
428    }
429
430    /**
431     * Log page edits
432     *
433     * called from action.php
434     *
435     * @param string $page The page that was edited
436     * @param string $type The type of edit (create, edit, etc.)
437     */
438    public function logEdit(string $page, string $type): void
439    {
440        $data = [
441            'page' => cleanID($page),
442            'type' => $type,
443            'ip' => $this->logIp(), // resolve the IP address
444            'session' => $this->session
445        ];
446
447        $editId = $this->db->exec(
448            'INSERT INTO edits (
449                dt, page, type, ip, session
450             ) VALUES (
451                CURRENT_TIMESTAMP, :page, :type, :ip, :session
452             )',
453            $data
454        );
455    }
456
457    /**
458     * Log login/logoffs and user creations
459     *
460     * @param string $type The type of login event (login, logout, create)
461     * @param string $user The username (optional, will use current user if empty)
462     * @fixme this is still broken, I need to figure out the session handling first
463     */
464    public function logLogin(string $type, string $user = ''): void
465    {
466        global $INPUT;
467
468        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
469
470        $ip = clientIP(true);
471        $session = $this->session;
472
473        $this->db->exec(
474            'INSERT INTO logins (
475                dt, type, ip, session
476             ) VALUES (
477                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
478             )',
479            $type,
480            $ip,
481            $user,
482            $session,
483            $this->uid
484        );
485    }
486
487    /**
488     * Log search data to the search related tables
489     *
490     * @param string $query The search query
491     * @param string[] $words The query split into words
492     */
493    public function logSearch(string $query, array $words): void
494    {
495        if(!$query) return;
496
497        $sid = $this->db->exec(
498            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
499            $this->logIp(), // resolve the IP address
500            $this->session,
501            $query,
502        );
503
504        foreach ($words as $word) {
505            if (!$word) continue;
506            $this->db->exec(
507                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
508                $sid,
509                $word
510            );
511        }
512    }
513
514    /**
515     * Log the current page count and size as today's history entry
516     */
517    public function logHistoryPages(): void
518    {
519        global $conf;
520
521        // use the popularity plugin's search method to find the wanted data
522        /** @var helper_plugin_popularity $pop */
523        $pop = plugin_load('helper', 'popularity');
524        $list = $this->initEmptySearchList();
525        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
526        $page_count = $list['file_count'];
527        $page_size = $list['file_size'];
528
529        $this->db->exec(
530            'INSERT OR REPLACE INTO history (
531                info, value, dt
532             ) VALUES (
533                ?, ?, CURRENT_TIMESTAMP
534             )',
535            'page_count',
536            $page_count
537        );
538        $this->db->exec(
539            'INSERT OR REPLACE INTO history (
540                info, value, dt
541             ) VALUES (
542                ?, ?, CURRENT_TIMESTAMP
543             )',
544            'page_size',
545            $page_size
546        );
547    }
548
549    /**
550     * Log the current media count and size as today's history entry
551     */
552    public function logHistoryMedia(): void
553    {
554        global $conf;
555
556        // use the popularity plugin's search method to find the wanted data
557        /** @var helper_plugin_popularity $pop */
558        $pop = plugin_load('helper', 'popularity');
559        $list = $this->initEmptySearchList();
560        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
561        $media_count = $list['file_count'];
562        $media_size = $list['file_size'];
563
564        $this->db->exec(
565            'INSERT OR REPLACE INTO history (
566                info, value, dt
567             ) VALUES (
568                ?, ?, CURRENT_TIMESTAMP
569             )',
570            'media_count',
571            $media_count
572        );
573        $this->db->exec(
574            'INSERT OR REPLACE INTO history (
575                info, value, dt
576             ) VALUES (
577                ?, ?, CURRENT_TIMESTAMP
578             )',
579            'media_size',
580            $media_size
581        );
582    }
583
584    // endregion
585
586    /**
587     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
588     * @return array
589     */
590    protected function initEmptySearchList()
591    {
592        return array_fill_keys([
593            'file_count',
594            'file_size',
595            'file_max',
596            'file_min',
597            'dir_count',
598            'dir_nest',
599            'file_oldest'
600        ], 0);
601    }
602}
603