xref: /plugin/statistics/Logger.php (revision 4a163f509d699546ac698844a4c1763ef1e213a1)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\ErrorHandler;
10use dokuwiki\HTTP\DokuHTTPClient;
11use dokuwiki\plugin\sqlite\SQLiteDB;
12use dokuwiki\Utf8\Clean;
13use helper_plugin_popularity;
14use helper_plugin_statistics;
15
16class Logger
17{
18    /** @var helper_plugin_statistics The statistics helper plugin instance */
19    protected helper_plugin_statistics $hlp;
20
21    /** @var SQLiteDB The SQLite database instance */
22    protected SQLiteDB $db;
23
24    /** @var string The full user agent string */
25    protected string $uaAgent;
26
27    /** @var string The type of user agent (browser, robot, feedreader) */
28    protected string $uaType = 'browser';
29
30    /** @var string The browser/client name */
31    protected string $uaName;
32
33    /** @var string The browser/client version */
34    protected string $uaVersion;
35
36    /** @var string The operating system/platform */
37    protected string $uaPlatform;
38
39    /** @var string|null The user name, if available */
40    protected ?string $user = null;
41
42    /** @var string The unique user identifier */
43    protected string $uid;
44
45    /** @var string The session identifier */
46    protected string $session;
47
48    /** @var int|null The ID of the main access log entry if any */
49    protected ?int $hit = null;
50
51    /** @var DokuHTTPClient|null The HTTP client instance for testing */
52    protected ?DokuHTTPClient $httpClient = null;
53
54    // region lifecycle
55
56    /**
57     * Constructor
58     *
59     * Parses browser info and set internal vars
60     */
61    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
62    {
63        global $INPUT;
64
65        $this->hlp = $hlp;
66        $this->db = $this->hlp->getDB();
67        $this->httpClient = $httpClient;
68
69        // FIXME if we already have a session, we should not re-parse the user agent
70
71        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
72        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
73        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
74        $dd->discardBotInformation();
75        $dd->parse();
76
77        if ($dd->isFeedReader()) {
78            $this->uaType = 'feedreader';
79        } elseif ($dd->isBot()) {
80            $this->uaType = 'robot';
81            // for now ignore bots
82            throw new IgnoreException('Bot detected, not logging');
83        }
84
85        $this->uaAgent = $ua;
86        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
87        $this->uaVersion = $dd->getClient('version') ?: '0';
88        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
89        $this->uid = $this->getUID();
90        $this->session = $this->getSession();
91        $this->user = $INPUT->server->str('REMOTE_USER') ?: null;
92    }
93
94    /**
95     * Should be called before logging
96     *
97     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
98     */
99    public function begin(): void
100    {
101        $this->hlp->getDB()->getPdo()->beginTransaction();
102
103        $this->logUser();
104        $this->logGroups();
105        $this->logDomain();
106        $this->logSession();
107    }
108
109    /**
110     * Should be called after logging
111     *
112     * This commits the transaction started in begin()
113     */
114    public function end(): void
115    {
116        $this->hlp->getDB()->getPdo()->commit();
117    }
118
119    // endregion
120    // region data gathering
121
122    /**
123     * Get the unique user ID
124     *
125     * @return string The unique user identifier
126     */
127    protected function getUID(): string
128    {
129        global $INPUT;
130
131        $uid = $INPUT->str('uid');
132        if (!$uid) $uid = get_doku_pref('plgstats', false);
133        if (!$uid) $uid = session_id();
134        set_doku_pref('plgstats', $uid);
135        return $uid;
136    }
137
138    /**
139     * Return the user's session ID
140     *
141     * This is usually our own managed session, not a PHP session (only in fallback)
142     *
143     * @return string The session identifier
144     */
145    protected function getSession(): string
146    {
147        global $INPUT;
148
149        // FIXME session setting needs work. It should be reset on user change, maybe we do rely on the PHP session?
150        // We also want to store the user agent in the session table, so this needs also change the session ID
151        $ses = $INPUT->str('ses');
152        if (!$ses) $ses = get_doku_pref('plgstatsses', false);
153        if (!$ses) $ses = session_id();
154        set_doku_pref('plgstatsses', $ses);
155        return $ses;
156    }
157
158    // endregion
159    // region automatic logging
160
161    /**
162     * Log the user was seen
163     */
164    protected function logUser(): void
165    {
166        if (!$this->user) return;
167
168        $this->db->exec(
169            'INSERT INTO users (user, dt)
170                  VALUES (?, CURRENT_TIMESTAMP)
171            ON CONFLICT (user) DO UPDATE SET
172                         dt = CURRENT_TIMESTAMP
173                   WHERE excluded.user = users.user
174            ',
175            $this->user
176        );
177
178    }
179
180    /**
181     * Log the session and user agent information
182     */
183    protected function logSession(): void
184    {
185        $this->db->exec(
186            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
187                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
188             ON CONFLICT (session) DO UPDATE SET
189                         end = CURRENT_TIMESTAMP
190                   WHERE excluded.session = sessions.session
191             ',
192            $this->session,
193            $this->uid,
194            $this->user,
195            $this->uaAgent,
196            $this->uaName,
197            $this->uaType,
198            $this->uaVersion,
199            $this->uaPlatform
200        );
201    }
202
203    /**
204     * Log all groups for the user
205     *
206     * @todo maybe this should be done only once per session?
207     */
208    protected function logGroups(): void
209    {
210        global $USERINFO;
211
212        if (!$this->user) return;
213        if (!isset($USERINFO['grps'])) return;
214        if (!is_array($USERINFO['grps'])) return;
215        $groups = $USERINFO['grps'];
216
217        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
218
219        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?, ?)'));
220        $params = [];
221        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
222        foreach ($groups as $group) {
223            $params[] = $this->user;
224            $params[] = $group;
225        }
226        $this->db->exec($sql, $params);
227    }
228
229    /**
230     * Log email domain
231     *
232     * @todo maybe this should be done only once per session?
233     */
234    protected function logDomain(): void
235    {
236        global $USERINFO;
237        if (!$this->user) return;
238        if (!isset($USERINFO['mail'])) return;
239        $mail = $USERINFO['mail'];
240
241        $pos = strrpos($mail, '@');
242        if (!$pos) return;
243        $domain = substr($mail, $pos + 1);
244        if (empty($domain)) return;
245
246        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
247        $this->db->exec($sql, [$domain, $this->user]);
248    }
249
250    // endregion
251    // region internal loggers called by the dispatchers
252
253    /**
254     * Log the given referer URL
255     *
256     * @param $referer
257     * @return int|null The referer ID or null if no referer was given
258     */
259    public function logReferer($referer): ?int
260    {
261        if (!$referer) return null;
262
263        // FIXME we could check against a blacklist here
264
265        $se = new SearchEngines($referer);
266        $type = $se->isSearchEngine() ? 'search' : 'external';
267
268        $sql = '
269            INSERT INTO referers (url, type, dt)
270                 VALUES (?, ?, CURRENT_TIMESTAMP)
271            ON CONFLICT (url)
272              DO UPDATE
273                    SET type = excluded.type, dt = excluded.dt;
274        ';
275        return $this->db->exec($sql, [$referer, $type]);
276    }
277
278    /**
279     * Resolve IP to country/city and store in database
280     *
281     * @return string The IP address as stored
282     */
283    public function logIp(): string
284    {
285        $ip = clientIP(true);
286        $hash = $ip; // @todo we could anonymize here
287
288        // check if IP already known and up-to-date
289        $result = $this->db->queryValue(
290            "SELECT ip
291             FROM   iplocation
292             WHERE  ip = ?
293               AND  lastupd > date('now', '-30 days')",
294            $hash
295        );
296        if ($result) return $hash; // already known and up-to-date
297
298        $http = $this->httpClient ?: new DokuHTTPClient();
299        $http->timeout = 7;
300        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
301
302        if (!$json) {
303            \dokuwiki\Logger::error('Statistics Plugin - Failed talk to ip-api.com.');
304            return $hash;
305        }
306        try {
307            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
308        } catch (\JsonException $e) {
309            \dokuwiki\Logger::error('Statistics Plugin - Failed to decode JSON from ip-api.com.', $e);
310            return $hash;
311        }
312        if (!isset($data['status']) || $data['status'] !== 'success') {
313            \dokuwiki\Logger::error('Statistics Plugin - IP location lookup failed for ' . $ip, $data);
314            return $hash;
315        }
316
317        $host = gethostbyaddr($ip); // @todo if we anonymize the IP, we should not do this
318        $this->db->exec(
319            'INSERT OR REPLACE INTO iplocation (
320                    ip, country, code, city, host, lastupd
321                 ) VALUES (
322                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
323                 )',
324            $hash,
325            $data['country'],
326            $data['countryCode'],
327            $data['city'],
328            $host
329        );
330
331        return $hash;
332    }
333
334    // endregion
335    // region log dispatchers
336
337    public function logPageView(): void
338    {
339        global $INPUT;
340
341        if (!$INPUT->str('p')) return;
342
343
344        $referer = $INPUT->filter('trim')->str('r');
345        $ip = $this->logIp(); // resolve the IP address
346
347        $data = [
348            'page' => $INPUT->filter('cleanID')->str('p'),
349            'ip' => $ip,
350            'ref_id' => $this->logReferer($referer),
351            'sx' => $INPUT->int('sx'),
352            'sy' => $INPUT->int('sy'),
353            'vx' => $INPUT->int('vx'),
354            'vy' => $INPUT->int('vy'),
355            'session' => $this->session,
356        ];
357
358        $this->db->exec('
359        INSERT INTO pageviews (
360            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
361        ) VALUES (
362            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
363        )
364        ',
365            $data
366        );
367    }
368
369    /**
370     * Log a click on an external link
371     *
372     * Called from log.php
373     */
374    public function logOutgoing(): void
375    {
376        global $INPUT;
377
378        if (!$INPUT->str('ol')) return;
379
380        $link = $INPUT->filter('trim')->str('ol');
381        $session = $this->session;
382        $page = $INPUT->filter('cleanID')->str('p');
383
384        $this->db->exec(
385            'INSERT INTO outlinks (
386                dt, session, page, link
387             ) VALUES (
388                CURRENT_TIMESTAMP, ?, ?, ?, ?
389             )',
390            $session,
391            $page,
392            $link
393        );
394    }
395
396    /**
397     * Log access to a media file
398     *
399     * Called from action.php
400     *
401     * @param string $media The media ID
402     * @param string $mime The media's mime type
403     * @param bool $inline Is this displayed inline?
404     * @param int $size Size of the media file
405     */
406    public function logMedia(string $media, string $mime, bool $inline, int $size): void
407    {
408        [$mime1, $mime2] = explode('/', strtolower($mime));
409        $inline = $inline ? 1 : 0;
410
411
412        $data = [
413            'media' => cleanID($media),
414            'ip' => $this->logIp(), // resolve the IP address
415            'session' => $this->session,
416            'size' => $size,
417            'mime1' => $mime1,
418            'mime2' => $mime2,
419            'inline' => $inline,
420        ];
421
422        $this->db->exec('
423                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
424                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
425            ',
426            $data
427        );
428    }
429
430    /**
431     * Log page edits
432     *
433     * called from action.php
434     *
435     * @param string $page The page that was edited
436     * @param string $type The type of edit (create, edit, etc.)
437     */
438    public function logEdit(string $page, string $type): void
439    {
440        $data = [
441            'page' => cleanID($page),
442            'type' => $type,
443            'ip' => $this->logIp(), // resolve the IP address
444            'session' => $this->session
445        ];
446
447        $editId = $this->db->exec(
448            'INSERT INTO edits (
449                dt, page, type, ip, session
450             ) VALUES (
451                CURRENT_TIMESTAMP, :page, :type, :ip, :session
452             )',
453            $data
454        );
455    }
456
457    /**
458     * Log login/logoffs and user creations
459     *
460     * @param string $type The type of login event (login, logout, create)
461     * @param string $user The username (optional, will use current user if empty)
462     * @fixme this is still broken, I need to figure out the session handling first
463     */
464    public function logLogin(string $type, string $user = ''): void
465    {
466        global $INPUT;
467
468        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
469
470        $ip = clientIP(true);
471        $session = $this->session;
472
473        $this->db->exec(
474            'INSERT INTO logins (
475                dt, type, ip, session
476             ) VALUES (
477                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
478             )',
479            $type,
480            $ip,
481            $user,
482            $session,
483            $this->uid
484        );
485    }
486
487    /**
488     * Log the current page count and size as today's history entry
489     */
490    public function logHistoryPages(): void
491    {
492        global $conf;
493
494        // use the popularity plugin's search method to find the wanted data
495        /** @var helper_plugin_popularity $pop */
496        $pop = plugin_load('helper', 'popularity');
497        $list = $this->initEmptySearchList();
498        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
499        $page_count = $list['file_count'];
500        $page_size = $list['file_size'];
501
502        $this->db->exec(
503            'INSERT OR REPLACE INTO history (
504                info, value, dt
505             ) VALUES (
506                ?, ?, CURRENT_TIMESTAMP
507             )',
508            'page_count',
509            $page_count
510        );
511        $this->db->exec(
512            'INSERT OR REPLACE INTO history (
513                info, value, dt
514             ) VALUES (
515                ?, ?, CURRENT_TIMESTAMP
516             )',
517            'page_size',
518            $page_size
519        );
520    }
521
522    /**
523     * Log the current media count and size as today's history entry
524     */
525    public function logHistoryMedia(): void
526    {
527        global $conf;
528
529        // use the popularity plugin's search method to find the wanted data
530        /** @var helper_plugin_popularity $pop */
531        $pop = plugin_load('helper', 'popularity');
532        $list = $this->initEmptySearchList();
533        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
534        $media_count = $list['file_count'];
535        $media_size = $list['file_size'];
536
537        $this->db->exec(
538            'INSERT OR REPLACE INTO history (
539                info, value, dt
540             ) VALUES (
541                ?, ?, CURRENT_TIMESTAMP
542             )',
543            'media_count',
544            $media_count
545        );
546        $this->db->exec(
547            'INSERT OR REPLACE INTO history (
548                info, value, dt
549             ) VALUES (
550                ?, ?, CURRENT_TIMESTAMP
551             )',
552            'media_size',
553            $media_size
554        );
555    }
556
557    // endregion
558
559    /**
560     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
561     * @return array
562     */
563    protected function initEmptySearchList()
564    {
565        return array_fill_keys([
566            'file_count',
567            'file_size',
568            'file_max',
569            'file_min',
570            'dir_count',
571            'dir_nest',
572            'file_oldest'
573        ], 0);
574    }
575}
576