xref: /plugin/statistics/Logger.php (revision 41d1fffc4a3b58bed7f96d983ba8317fe9e225a5)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5/**
6 * Exception thrown when logging should be ignored
7 */
8class IgnoreException extends \RuntimeException
9{
10}
11
12use DeviceDetector\DeviceDetector;
13use DeviceDetector\Parser\Client\Browser;
14use DeviceDetector\Parser\Device\AbstractDeviceParser;
15use DeviceDetector\Parser\OperatingSystem;
16use dokuwiki\HTTP\DokuHTTPClient;
17use dokuwiki\Input\Input;
18use dokuwiki\plugin\sqlite\SQLiteDB;
19use helper_plugin_popularity;
20use helper_plugin_statistics;
21
22class Logger
23{
24    /** @var helper_plugin_statistics The statistics helper plugin instance */
25    protected helper_plugin_statistics $hlp;
26
27    /** @var SQLiteDB The SQLite database instance */
28    protected SQLiteDB $db;
29
30    /** @var string The full user agent string */
31    protected string $uaAgent;
32
33    /** @var string The type of user agent (browser, robot, feedreader) */
34    protected string $uaType = 'browser';
35
36    /** @var string The browser/client name */
37    protected string $uaName;
38
39    /** @var string The browser/client version */
40    protected string $uaVersion;
41
42    /** @var string The operating system/platform */
43    protected string $uaPlatform;
44
45    /** @var string|null The user name, if available */
46    protected ?string $user = null;
47
48    /** @var string The unique user identifier */
49    protected string $uid;
50
51    /** @var string The session identifier */
52    protected string $session;
53
54    /** @var int|null The ID of the main access log entry if any */
55    protected ?int $hit = null;
56
57    /** @var DokuHTTPClient|null The HTTP client instance for testing */
58    protected ?DokuHTTPClient $httpClient = null;
59
60    // region lifecycle
61
62    /**
63     * Constructor
64     *
65     * Parses browser info and set internal vars
66     */
67    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
68    {
69        /** @var Input $INPUT */
70        global $INPUT;
71
72        $this->hlp = $hlp;
73        $this->db = $this->hlp->getDB();
74        $this->httpClient = $httpClient;
75
76        // FIXME if we already have a session, we should not re-parse the user agent
77
78        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
79        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
80        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
81        $dd->discardBotInformation();
82        $dd->parse();
83
84        if ($dd->isFeedReader()) {
85            $this->uaType = 'feedreader';
86        } elseif ($dd->isBot()) {
87            $this->uaType = 'robot';
88            // for now ignore bots
89            throw new IgnoreException('Bot detected, not logging');
90        }
91
92        $this->uaAgent = $ua;
93        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
94        $this->uaVersion = $dd->getClient('version') ?: '0';
95        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
96        $this->uid = $this->getUID();
97        $this->session = $this->getSession();
98        $this->user = $INPUT->server->str('REMOTE_USER', null, true);
99    }
100
101    /**
102     * Should be called before logging
103     *
104     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
105     */
106    public function begin(): void
107    {
108        $this->hlp->getDB()->getPdo()->beginTransaction();
109
110        $this->logUser();
111        $this->logGroups();
112        $this->logDomain();
113        $this->logSession();
114    }
115
116    /**
117     * Should be called after logging
118     *
119     * This commits the transaction started in begin()
120     */
121    public function end(): void
122    {
123        $this->hlp->getDB()->getPdo()->commit();
124    }
125
126    // endregion
127    // region data gathering
128
129    /**
130     * Get the unique user ID
131     *
132     * The user ID is stored in the user preferences and should stay there forever.
133     * @return string The unique user identifier
134     */
135    protected function getUID(): string
136    {
137        if(!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) {
138            // when there is no session UID set, we assume this was deliberate and we simply abort all logging
139            // @todo we may later make UID generation optional
140            throw new IgnoreException('No user ID found');
141        }
142
143        return $_SESSION[DOKU_COOKIE]['statistics']['uid'];
144    }
145
146    /**
147     * Return the user's session ID
148     *
149     * @return string The session identifier
150     */
151    protected function getSession(): string
152    {
153        if(!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) {
154            // when there is no session ID set, we assume this was deliberate and we simply abort all logging
155            throw new IgnoreException('No session ID found');
156        }
157
158        return $_SESSION[DOKU_COOKIE]['statistics']['id'];
159    }
160
161    // endregion
162    // region automatic logging
163
164    /**
165     * Log the user was seen
166     */
167    protected function logUser(): void
168    {
169        if (!$this->user) return;
170
171        $this->db->exec(
172            'INSERT INTO users (user, dt)
173                  VALUES (?, CURRENT_TIMESTAMP)
174            ON CONFLICT (user) DO UPDATE SET
175                         dt = CURRENT_TIMESTAMP
176                   WHERE excluded.user = users.user
177            ',
178            $this->user
179        );
180
181    }
182
183    /**
184     * Log the session and user agent information
185     */
186    protected function logSession(): void
187    {
188        $this->db->exec(
189            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
190                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
191             ON CONFLICT (session) DO UPDATE SET
192                         end = CURRENT_TIMESTAMP,
193                         user = excluded.user,
194                         uid = excluded.uid
195                   WHERE excluded.session = sessions.session
196             ',
197            $this->session,
198            $this->uid,
199            $this->user,
200            $this->uaAgent,
201            $this->uaName,
202            $this->uaType,
203            $this->uaVersion,
204            $this->uaPlatform
205        );
206    }
207
208    /**
209     * Log all groups for the user
210     *
211     * @todo maybe this should be done only once per session?
212     */
213    protected function logGroups(): void
214    {
215        global $USERINFO;
216
217        if (!$this->user) return;
218        if (!isset($USERINFO['grps'])) return;
219        if (!is_array($USERINFO['grps'])) return;
220        $groups = $USERINFO['grps'];
221
222        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
223
224        if( empty($groups)) {
225            return;
226        }
227
228        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
229        $params = [];
230        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
231        foreach ($groups as $group) {
232            $params[] = $this->user;
233            $params[] = $group;
234        }
235        $this->db->exec($sql, $params);
236    }
237
238    /**
239     * Log email domain
240     *
241     * @todo maybe this should be done only once per session?
242     */
243    protected function logDomain(): void
244    {
245        global $USERINFO;
246        if (!$this->user) return;
247        if (!isset($USERINFO['mail'])) return;
248        $mail = $USERINFO['mail'];
249
250        $pos = strrpos($mail, '@');
251        if (!$pos) return;
252        $domain = substr($mail, $pos + 1);
253        if (empty($domain)) return;
254
255        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
256        $this->db->exec($sql, [$domain, $this->user]);
257    }
258
259    // endregion
260    // region internal loggers called by the dispatchers
261
262    /**
263     * Log the given referer URL
264     *
265     * @param $referer
266     * @return int|null The referer ID or null if no referer was given
267     */
268    public function logReferer($referer): ?int
269    {
270        if (!$referer) return null;
271
272        // FIXME we could check against a blacklist here
273
274        $se = new SearchEngines($referer);
275        $engine = $se->getEngine();
276
277        $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
278        return $this->db->exec($sql, [$referer, $engine]); // returns ID even if the insert was ignored
279    }
280
281    /**
282     * Resolve IP to country/city and store in database
283     *
284     * @return string The IP address as stored
285     */
286    public function logIp(): string
287    {
288        $ip = clientIP(true);
289        $hash = $ip; // @todo we could anonymize here
290
291        // check if IP already known and up-to-date
292        $result = $this->db->queryValue(
293            "SELECT ip
294             FROM   iplocation
295             WHERE  ip = ?
296               AND  lastupd > date('now', '-30 days')",
297            $hash
298        );
299        if ($result) return $hash; // already known and up-to-date
300
301        $http = $this->httpClient ?: new DokuHTTPClient();
302        $http->timeout = 7;
303        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
304
305        if (!$json) {
306            \dokuwiki\Logger::error('Statistics Plugin - Failed talk to ip-api.com.');
307            return $hash;
308        }
309        try {
310            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
311        } catch (\JsonException $e) {
312            \dokuwiki\Logger::error('Statistics Plugin - Failed to decode JSON from ip-api.com.', $e);
313            return $hash;
314        }
315        if (!isset($data['status'])) {
316            \dokuwiki\Logger::error('Statistics Plugin - Invalid ip-api.com result' . $ip, $data);
317            return $hash;
318        }
319
320        // we do not check for 'success' status here. when the API can't resolve the IP we still log it
321        // without location data, so we won't re-query it in the next 30 days.
322
323        $host = gethostbyaddr($ip); // @todo if we anonymize the IP, we should not do this
324        $this->db->exec(
325            'INSERT OR REPLACE INTO iplocation (
326                    ip, country, code, city, host, lastupd
327                 ) VALUES (
328                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
329                 )',
330            $hash,
331            $data['country'] ?? '',
332            $data['countryCode'] ?? '',
333            $data['city'] ?? '',
334            $host
335        );
336
337        return $hash;
338    }
339
340    // endregion
341    // region log dispatchers
342
343    public function logPageView(): void
344    {
345        global $INPUT;
346
347        if (!$INPUT->str('p')) return;
348
349
350        $referer = $INPUT->filter('trim')->str('r');
351        $ip = $this->logIp(); // resolve the IP address
352
353        $data = [
354            'page' => $INPUT->filter('cleanID')->str('p'),
355            'ip' => $ip,
356            'ref_id' => $this->logReferer($referer),
357            'sx' => $INPUT->int('sx'),
358            'sy' => $INPUT->int('sy'),
359            'vx' => $INPUT->int('vx'),
360            'vy' => $INPUT->int('vy'),
361            'session' => $this->session,
362        ];
363
364        $this->db->exec('
365        INSERT INTO pageviews (
366            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
367        ) VALUES (
368            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
369        )
370        ',
371            $data
372        );
373    }
374
375    /**
376     * Log a click on an external link
377     *
378     * Called from log.php
379     */
380    public function logOutgoing(): void
381    {
382        global $INPUT;
383
384        if (!$INPUT->str('ol')) return;
385
386        $link = $INPUT->filter('trim')->str('ol');
387        $session = $this->session;
388        $page = $INPUT->filter('cleanID')->str('p');
389
390        $this->db->exec(
391            'INSERT INTO outlinks (
392                dt, session, page, link
393             ) VALUES (
394                CURRENT_TIMESTAMP, ?, ?, ?
395             )',
396            $session,
397            $page,
398            $link
399        );
400    }
401
402    /**
403     * Log access to a media file
404     *
405     * Called from action.php
406     *
407     * @param string $media The media ID
408     * @param string $mime The media's mime type
409     * @param bool $inline Is this displayed inline?
410     * @param int $size Size of the media file
411     */
412    public function logMedia(string $media, string $mime, bool $inline, int $size): void
413    {
414        [$mime1, $mime2] = explode('/', strtolower($mime));
415        $inline = $inline ? 1 : 0;
416
417
418        $data = [
419            'media' => cleanID($media),
420            'ip' => $this->logIp(), // resolve the IP address
421            'session' => $this->session,
422            'size' => $size,
423            'mime1' => $mime1,
424            'mime2' => $mime2,
425            'inline' => $inline,
426        ];
427
428        $this->db->exec('
429                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
430                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
431            ',
432            $data
433        );
434    }
435
436    /**
437     * Log page edits
438     *
439     * called from action.php
440     *
441     * @param string $page The page that was edited
442     * @param string $type The type of edit (create, edit, etc.)
443     */
444    public function logEdit(string $page, string $type): void
445    {
446        $data = [
447            'page' => cleanID($page),
448            'type' => $type,
449            'ip' => $this->logIp(), // resolve the IP address
450            'session' => $this->session
451        ];
452
453        $this->db->exec(
454            'INSERT INTO edits (
455                dt, page, type, ip, session
456             ) VALUES (
457                CURRENT_TIMESTAMP, :page, :type, :ip, :session
458             )',
459            $data
460        );
461    }
462
463    /**
464     * Log login/logoffs and user creations
465     *
466     * @param string $type The type of login event (login, logout, create, failed)
467     * @param string $user The username
468     */
469    public function logLogin(string $type, string $user = ''): void
470    {
471        global $INPUT;
472
473        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
474
475        $ip = clientIP(true);
476
477        $this->db->exec(
478            'INSERT INTO logins (
479                dt, ip, user, type
480             ) VALUES (
481                CURRENT_TIMESTAMP, ?, ?, ?
482             )',
483            $ip,
484            $user,
485            $type
486        );
487    }
488
489    /**
490     * Log search data to the search related tables
491     *
492     * @param string $query The search query
493     * @param string[] $words The query split into words
494     */
495    public function logSearch(string $query, array $words): void
496    {
497        if (!$query) return;
498
499        $sid = $this->db->exec(
500            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
501            $this->logIp(), // resolve the IP address
502            $this->session,
503            $query,
504        );
505
506        foreach ($words as $word) {
507            if (!$word) continue;
508            $this->db->exec(
509                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
510                $sid,
511                $word
512            );
513        }
514    }
515
516    /**
517     * Log the current page count and size as today's history entry
518     */
519    public function logHistoryPages(): void
520    {
521        global $conf;
522
523        // use the popularity plugin's search method to find the wanted data
524        /** @var helper_plugin_popularity $pop */
525        $pop = plugin_load('helper', 'popularity');
526        $list = $this->initEmptySearchList();
527        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
528        $page_count = $list['file_count'];
529        $page_size = $list['file_size'];
530
531        $this->db->exec(
532            'INSERT OR REPLACE INTO history (
533                info, value, dt
534             ) VALUES (
535                ?, ?, CURRENT_TIMESTAMP
536             )',
537            'page_count',
538            $page_count
539        );
540        $this->db->exec(
541            'INSERT OR REPLACE INTO history (
542                info, value, dt
543             ) VALUES (
544                ?, ?, CURRENT_TIMESTAMP
545             )',
546            'page_size',
547            $page_size
548        );
549    }
550
551    /**
552     * Log the current media count and size as today's history entry
553     */
554    public function logHistoryMedia(): void
555    {
556        global $conf;
557
558        // use the popularity plugin's search method to find the wanted data
559        /** @var helper_plugin_popularity $pop */
560        $pop = plugin_load('helper', 'popularity');
561        $list = $this->initEmptySearchList();
562        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
563        $media_count = $list['file_count'];
564        $media_size = $list['file_size'];
565
566        $this->db->exec(
567            'INSERT OR REPLACE INTO history (
568                info, value, dt
569             ) VALUES (
570                ?, ?, CURRENT_TIMESTAMP
571             )',
572            'media_count',
573            $media_count
574        );
575        $this->db->exec(
576            'INSERT OR REPLACE INTO history (
577                info, value, dt
578             ) VALUES (
579                ?, ?, CURRENT_TIMESTAMP
580             )',
581            'media_size',
582            $media_size
583        );
584    }
585
586    // endregion
587
588    /**
589     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
590     * @return array
591     */
592    protected function initEmptySearchList()
593    {
594        return array_fill_keys([
595            'file_count',
596            'file_size',
597            'file_max',
598            'file_min',
599            'dir_count',
600            'dir_nest',
601            'file_oldest'
602        ], 0);
603    }
604}
605