xref: /plugin/statistics/Logger.php (revision 569a50664ac8b883b8bdf6f42f326d88d099ad6e)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5/**
6 * Exception thrown when logging should be ignored
7 */
8class IgnoreException extends \RuntimeException
9{
10}
11
12use DeviceDetector\DeviceDetector;
13use DeviceDetector\Parser\Client\Browser;
14use DeviceDetector\Parser\Device\AbstractDeviceParser;
15use DeviceDetector\Parser\OperatingSystem;
16use dokuwiki\HTTP\DokuHTTPClient;
17use dokuwiki\Input\Input;
18use dokuwiki\plugin\sqlite\SQLiteDB;
19use helper_plugin_popularity;
20use helper_plugin_statistics;
21
22class Logger
23{
24    /** @var helper_plugin_statistics The statistics helper plugin instance */
25    protected helper_plugin_statistics $hlp;
26
27    /** @var SQLiteDB The SQLite database instance */
28    protected SQLiteDB $db;
29
30    /** @var string The full user agent string */
31    protected string $uaAgent;
32
33    /** @var string The type of user agent (browser, robot, feedreader) */
34    protected string $uaType = 'browser';
35
36    /** @var string The browser/client name */
37    protected string $uaName;
38
39    /** @var string The browser/client version */
40    protected string $uaVersion;
41
42    /** @var string The operating system/platform */
43    protected string $uaPlatform;
44
45    /** @var string|null The user name, if available */
46    protected ?string $user = null;
47
48    /** @var string The unique user identifier */
49    protected string $uid;
50
51    /** @var string The session identifier */
52    protected string $session;
53
54    /** @var int|null The ID of the main access log entry if any */
55    protected ?int $hit = null;
56
57    /** @var DokuHTTPClient|null The HTTP client instance for testing */
58    protected ?DokuHTTPClient $httpClient = null;
59
60    // region lifecycle
61
62    /**
63     * Constructor
64     *
65     * Parses browser info and set internal vars
66     */
67    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
68    {
69        /** @var Input $INPUT */
70        global $INPUT;
71
72        $this->hlp = $hlp;
73        $this->db = $this->hlp->getDB();
74        $this->httpClient = $httpClient;
75
76        // FIXME if we already have a session, we should not re-parse the user agent
77
78        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
79        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
80        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
81        $dd->discardBotInformation();
82        $dd->parse();
83
84        if ($dd->isFeedReader()) {
85            $this->uaType = 'feedreader';
86        } elseif ($dd->isBot()) {
87            $this->uaType = 'robot';
88            // for now ignore bots
89            throw new IgnoreException('Bot detected, not logging');
90        }
91
92        $this->uaAgent = $ua;
93        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
94        $this->uaVersion = $dd->getClient('version') ?: '0';
95        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
96        $this->uid = $this->getUID();
97        $this->session = $this->getSession();
98        $this->user = $INPUT->server->str('REMOTE_USER', null, true);
99    }
100
101    /**
102     * Should be called before logging
103     *
104     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
105     */
106    public function begin(): void
107    {
108        $this->hlp->getDB()->getPdo()->beginTransaction();
109
110        $this->logUser();
111        $this->logGroups();
112        $this->logDomain();
113        $this->logSession();
114    }
115
116    /**
117     * Should be called after logging
118     *
119     * This commits the transaction started in begin()
120     */
121    public function end(): void
122    {
123        $this->hlp->getDB()->getPdo()->commit();
124    }
125
126    // endregion
127    // region data gathering
128
129    /**
130     * Get the unique user ID
131     *
132     * The user ID is stored in the user preferences and should stay there forever.
133     * @return string The unique user identifier
134     */
135    protected function getUID(): string
136    {
137        if(!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) {
138            // when there is no session UID set, we assume this was deliberate and we simply abort all logging
139            // @todo we may later make UID generation optional
140            throw new IgnoreException('No user ID found');
141        }
142
143        return $_SESSION[DOKU_COOKIE]['statistics']['uid'];
144    }
145
146    /**
147     * Return the user's session ID
148     *
149     * @return string The session identifier
150     */
151    protected function getSession(): string
152    {
153        if(!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) {
154            // when there is no session ID set, we assume this was deliberate and we simply abort all logging
155            throw new IgnoreException('No session ID found');
156        }
157
158        return $_SESSION[DOKU_COOKIE]['statistics']['id'];
159    }
160
161    // endregion
162    // region automatic logging
163
164    /**
165     * Log the user was seen
166     */
167    protected function logUser(): void
168    {
169        if (!$this->user) return;
170
171        $this->db->exec(
172            'INSERT INTO users (user, dt)
173                  VALUES (?, CURRENT_TIMESTAMP)
174            ON CONFLICT (user) DO UPDATE SET
175                         dt = CURRENT_TIMESTAMP
176                   WHERE excluded.user = users.user
177            ',
178            $this->user
179        );
180
181    }
182
183    /**
184     * Log the session and user agent information
185     */
186    protected function logSession(): void
187    {
188        $this->db->exec(
189            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
190                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
191             ON CONFLICT (session) DO UPDATE SET
192                         end = CURRENT_TIMESTAMP,
193                         user = excluded.user,
194                         uid = excluded.uid
195                   WHERE excluded.session = sessions.session
196             ',
197            $this->session,
198            $this->uid,
199            $this->user,
200            $this->uaAgent,
201            $this->uaName,
202            $this->uaType,
203            $this->uaVersion,
204            $this->uaPlatform
205        );
206    }
207
208    /**
209     * Log all groups for the user
210     *
211     * @todo maybe this should be done only once per session?
212     */
213    protected function logGroups(): void
214    {
215        global $USERINFO;
216
217        if (!$this->user) return;
218        if (!isset($USERINFO['grps'])) return;
219        if (!is_array($USERINFO['grps'])) return;
220        $groups = $USERINFO['grps'];
221
222        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
223
224        if( empty($groups)) {
225            return;
226        }
227
228        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
229        $params = [];
230        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
231        foreach ($groups as $group) {
232            $params[] = $this->user;
233            $params[] = $group;
234        }
235        $this->db->exec($sql, $params);
236    }
237
238    /**
239     * Log email domain
240     *
241     * @todo maybe this should be done only once per session?
242     */
243    protected function logDomain(): void
244    {
245        global $USERINFO;
246        if (!$this->user) return;
247        if (!isset($USERINFO['mail'])) return;
248        $mail = $USERINFO['mail'];
249
250        $pos = strrpos($mail, '@');
251        if (!$pos) return;
252        $domain = substr($mail, $pos + 1);
253        if (empty($domain)) return;
254
255        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
256        $this->db->exec($sql, [$domain, $this->user]);
257    }
258
259    // endregion
260    // region internal loggers called by the dispatchers
261
262    /**
263     * Log the given referer URL
264     *
265     * Note: we DO log empty referers. These are external accesses that did not provide a referer URL.
266     * We do not log referers that are our own pages though.
267     *
268     * engine set -> a search engine referer
269     * no engine set, url empty -> a direct access (bookmark, direct link, etc.)
270     * no engine set, url not empty -> a referer from another page (not a wiki page)
271     * null returned -> referer was a wiki page
272     *
273     * @param $referer
274     * @return int|null The referer ID or null if no referer was logged
275     * @todo we could check against a blacklist here
276     */
277    public function logReferer($referer): ?int
278    {
279        $referer = trim($referer);
280
281        // do not log our own pages as referers (empty referer is OK though)
282        if (!empty($referer)) {
283            $selfre = '^' . preg_quote(DOKU_URL, '/');
284            if(preg_match("/$selfre/", $referer)) {
285                return null;
286            }
287        }
288
289        // is it a search engine?
290        $se = new SearchEngines($referer);
291        $engine = $se->getEngine();
292
293        $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
294        $this->db->exec($sql, [$referer, $engine]);
295        return (int) $this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer);
296    }
297
298    /**
299     * Resolve IP to country/city and store in database
300     *
301     * @return string The IP address as stored
302     */
303    public function logIp(): string
304    {
305        $ip = clientIP(true);
306        $hash = $ip; // @todo we could anonymize here
307
308        // check if IP already known and up-to-date
309        $result = $this->db->queryValue(
310            "SELECT ip
311             FROM   iplocation
312             WHERE  ip = ?
313               AND  lastupd > date('now', '-30 days')",
314            $hash
315        );
316        if ($result) return $hash; // already known and up-to-date
317
318        $http = $this->httpClient ?: new DokuHTTPClient();
319        $http->timeout = 7;
320        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
321
322        if (!$json) {
323            \dokuwiki\Logger::error('Statistics Plugin - Failed talk to ip-api.com.');
324            return $hash;
325        }
326        try {
327            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
328        } catch (\JsonException $e) {
329            \dokuwiki\Logger::error('Statistics Plugin - Failed to decode JSON from ip-api.com.', $e);
330            return $hash;
331        }
332        if (!isset($data['status'])) {
333            \dokuwiki\Logger::error('Statistics Plugin - Invalid ip-api.com result' . $ip, $data);
334            return $hash;
335        }
336
337        // we do not check for 'success' status here. when the API can't resolve the IP we still log it
338        // without location data, so we won't re-query it in the next 30 days.
339
340        $host = gethostbyaddr($ip); // @todo if we anonymize the IP, we should not do this
341        $this->db->exec(
342            'INSERT OR REPLACE INTO iplocation (
343                    ip, country, code, city, host, lastupd
344                 ) VALUES (
345                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
346                 )',
347            $hash,
348            $data['country'] ?? '',
349            $data['countryCode'] ?? '',
350            $data['city'] ?? '',
351            $host
352        );
353
354        return $hash;
355    }
356
357    // endregion
358    // region log dispatchers
359
360    public function logPageView(): void
361    {
362        global $INPUT;
363
364        if (!$INPUT->str('p')) return;
365
366
367        $referer = $INPUT->filter('trim')->str('r');
368        $ip = $this->logIp(); // resolve the IP address
369
370        $data = [
371            'page' => $INPUT->filter('cleanID')->str('p'),
372            'ip' => $ip,
373            'ref_id' => $this->logReferer($referer),
374            'sx' => $INPUT->int('sx'),
375            'sy' => $INPUT->int('sy'),
376            'vx' => $INPUT->int('vx'),
377            'vy' => $INPUT->int('vy'),
378            'session' => $this->session,
379        ];
380
381        $this->db->exec('
382        INSERT INTO pageviews (
383            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
384        ) VALUES (
385            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
386        )
387        ',
388            $data
389        );
390    }
391
392    /**
393     * Log a click on an external link
394     *
395     * Called from log.php
396     */
397    public function logOutgoing(): void
398    {
399        global $INPUT;
400
401        if (!$INPUT->str('ol')) return;
402
403        $link = $INPUT->filter('trim')->str('ol');
404        $session = $this->session;
405        $page = $INPUT->filter('cleanID')->str('p');
406
407        $this->db->exec(
408            'INSERT INTO outlinks (
409                dt, session, page, link
410             ) VALUES (
411                CURRENT_TIMESTAMP, ?, ?, ?
412             )',
413            $session,
414            $page,
415            $link
416        );
417    }
418
419    /**
420     * Log access to a media file
421     *
422     * Called from action.php
423     *
424     * @param string $media The media ID
425     * @param string $mime The media's mime type
426     * @param bool $inline Is this displayed inline?
427     * @param int $size Size of the media file
428     */
429    public function logMedia(string $media, string $mime, bool $inline, int $size): void
430    {
431        [$mime1, $mime2] = explode('/', strtolower($mime));
432        $inline = $inline ? 1 : 0;
433
434
435        $data = [
436            'media' => cleanID($media),
437            'ip' => $this->logIp(), // resolve the IP address
438            'session' => $this->session,
439            'size' => $size,
440            'mime1' => $mime1,
441            'mime2' => $mime2,
442            'inline' => $inline,
443        ];
444
445        $this->db->exec('
446                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
447                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
448            ',
449            $data
450        );
451    }
452
453    /**
454     * Log page edits
455     *
456     * called from action.php
457     *
458     * @param string $page The page that was edited
459     * @param string $type The type of edit (create, edit, etc.)
460     */
461    public function logEdit(string $page, string $type): void
462    {
463        $data = [
464            'page' => cleanID($page),
465            'type' => $type,
466            'ip' => $this->logIp(), // resolve the IP address
467            'session' => $this->session
468        ];
469
470        $this->db->exec(
471            'INSERT INTO edits (
472                dt, page, type, ip, session
473             ) VALUES (
474                CURRENT_TIMESTAMP, :page, :type, :ip, :session
475             )',
476            $data
477        );
478    }
479
480    /**
481     * Log login/logoffs and user creations
482     *
483     * @param string $type The type of login event (login, logout, create, failed)
484     * @param string $user The username
485     */
486    public function logLogin(string $type, string $user = ''): void
487    {
488        global $INPUT;
489
490        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
491
492        $ip = clientIP(true);
493
494        $this->db->exec(
495            'INSERT INTO logins (
496                dt, ip, user, type
497             ) VALUES (
498                CURRENT_TIMESTAMP, ?, ?, ?
499             )',
500            $ip,
501            $user,
502            $type
503        );
504    }
505
506    /**
507     * Log search data to the search related tables
508     *
509     * @param string $query The search query
510     * @param string[] $words The query split into words
511     */
512    public function logSearch(string $query, array $words): void
513    {
514        if (!$query) return;
515
516        $sid = $this->db->exec(
517            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
518            $this->logIp(), // resolve the IP address
519            $this->session,
520            $query,
521        );
522
523        foreach ($words as $word) {
524            if (!$word) continue;
525            $this->db->exec(
526                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
527                $sid,
528                $word
529            );
530        }
531    }
532
533    /**
534     * Log the current page count and size as today's history entry
535     */
536    public function logHistoryPages(): void
537    {
538        global $conf;
539
540        // use the popularity plugin's search method to find the wanted data
541        /** @var helper_plugin_popularity $pop */
542        $pop = plugin_load('helper', 'popularity');
543        $list = $this->initEmptySearchList();
544        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
545        $page_count = $list['file_count'];
546        $page_size = $list['file_size'];
547
548        $this->db->exec(
549            'INSERT OR REPLACE INTO history (
550                info, value, dt
551             ) VALUES (
552                ?, ?, CURRENT_TIMESTAMP
553             )',
554            'page_count',
555            $page_count
556        );
557        $this->db->exec(
558            'INSERT OR REPLACE INTO history (
559                info, value, dt
560             ) VALUES (
561                ?, ?, CURRENT_TIMESTAMP
562             )',
563            'page_size',
564            $page_size
565        );
566    }
567
568    /**
569     * Log the current media count and size as today's history entry
570     */
571    public function logHistoryMedia(): void
572    {
573        global $conf;
574
575        // use the popularity plugin's search method to find the wanted data
576        /** @var helper_plugin_popularity $pop */
577        $pop = plugin_load('helper', 'popularity');
578        $list = $this->initEmptySearchList();
579        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
580        $media_count = $list['file_count'];
581        $media_size = $list['file_size'];
582
583        $this->db->exec(
584            'INSERT OR REPLACE INTO history (
585                info, value, dt
586             ) VALUES (
587                ?, ?, CURRENT_TIMESTAMP
588             )',
589            'media_count',
590            $media_count
591        );
592        $this->db->exec(
593            'INSERT OR REPLACE INTO history (
594                info, value, dt
595             ) VALUES (
596                ?, ?, CURRENT_TIMESTAMP
597             )',
598            'media_size',
599            $media_size
600        );
601    }
602
603    // endregion
604
605    /**
606     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
607     * @return array
608     */
609    protected function initEmptySearchList()
610    {
611        return array_fill_keys([
612            'file_count',
613            'file_size',
614            'file_max',
615            'file_min',
616            'dir_count',
617            'dir_nest',
618            'file_oldest'
619        ], 0);
620    }
621}
622