xref: /plugin/statistics/Logger.php (revision 9aec20ef18cca1f21a6264e3242a9d6305a77ea1)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\ClientHints;
6use DeviceDetector\DeviceDetector;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\Input\Input;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use helper_plugin_popularity;
12use helper_plugin_statistics;
13
14class Logger
15{
16    /** @var helper_plugin_statistics The statistics helper plugin instance */
17    protected helper_plugin_statistics $hlp;
18
19    /** @var SQLiteDB The SQLite database instance */
20    protected SQLiteDB $db;
21
22    /** @var string The full user agent string */
23    protected string $uaAgent;
24
25    /** @var string The type of user agent (browser, robot, feedreader) */
26    protected string $uaType = 'browser';
27
28    /** @var string The browser/client name */
29    protected string $uaName;
30
31    /** @var string The browser/client version */
32    protected string $uaVersion;
33
34    /** @var string The operating system/platform */
35    protected string $uaPlatform;
36
37    /** @var string|null The user name, if available */
38    protected ?string $user = null;
39
40    /** @var string The unique user identifier */
41    protected string $uid;
42
43    /** @var string The session identifier */
44    protected string $session;
45
46    /** @var int|null The ID of the main access log entry if any */
47    protected ?int $hit = null;
48
49    // region lifecycle
50
51    /**
52     * Constructor
53     *
54     * Parses browser info and set internal vars
55     */
56    public function __construct(helper_plugin_statistics $hlp)
57    {
58        /** @var Input $INPUT */
59        global $INPUT;
60
61        $this->hlp = $hlp;
62        $this->db = $this->hlp->getDB();
63
64        // FIXME if we already have a session, we should not re-parse the user agent
65
66        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
67        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
68        $dd = new DeviceDetector($ua, ClientHints::factory($_SERVER));
69        $dd->discardBotInformation();
70        $dd->parse();
71
72        if ($dd->isFeedReader()) {
73            $this->uaType = 'feedreader';
74        } elseif ($dd->isBot()) {
75            $this->uaType = 'robot';
76            // for now ignore bots
77            throw new IgnoreException('Bot detected, not logging');
78        }
79
80        $this->uaAgent = $ua;
81        $this->uaName = $dd->getClient('name') ?: 'Unknown';
82        $this->uaVersion = $dd->getClient('version') ?: '0';
83        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
84        $this->uid = $this->getUID();
85        $this->session = $this->getSession();
86
87        if (!$this->hlp->getConf('nousers')) {
88            $this->user = $INPUT->server->str('REMOTE_USER', null, true);
89        }
90    }
91
92    /**
93     * Should be called before logging
94     *
95     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
96     */
97    public function begin(): void
98    {
99        $this->hlp->getDB()->getPdo()->beginTransaction();
100
101        $this->logUser();
102        $this->logGroups();
103        $this->logDomain();
104        $this->logSession();
105        $this->logCampaign();
106    }
107
108    /**
109     * Should be called after logging
110     *
111     * This commits the transaction started in begin()
112     */
113    public function end(): void
114    {
115        $this->hlp->getDB()->getPdo()->commit();
116    }
117
118    // endregion
119    // region data gathering
120
121    /**
122     * Get the unique user ID
123     *
124     * The user ID is stored in the user preferences and should stay there forever.
125     * @return string The unique user identifier
126     */
127    protected function getUID(): string
128    {
129        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) {
130            // when there is no session UID set, we assume this was deliberate and we simply abort all logging
131            // @todo we may later make UID generation optional
132            throw new IgnoreException('No user ID found');
133        }
134
135        return $_SESSION[DOKU_COOKIE]['statistics']['uid'];
136    }
137
138    /**
139     * Return the user's session ID
140     *
141     * @return string The session identifier
142     */
143    protected function getSession(): string
144    {
145        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) {
146            // when there is no session ID set, we assume this was deliberate and we simply abort all logging
147            throw new IgnoreException('No session ID found');
148        }
149
150        return $_SESSION[DOKU_COOKIE]['statistics']['id'];
151    }
152
153    // endregion
154    // region automatic logging
155
156    /**
157     * Log the user was seen
158     */
159    protected function logUser(): void
160    {
161        if (!$this->user) return;
162
163        $this->db->exec(
164            'INSERT INTO users (user, dt)
165                  VALUES (?, CURRENT_TIMESTAMP)
166            ON CONFLICT (user) DO UPDATE SET
167                         dt = CURRENT_TIMESTAMP
168                   WHERE excluded.user = users.user
169            ',
170            $this->user
171        );
172    }
173
174    /**
175     * Log the session and user agent information
176     */
177    protected function logSession(): void
178    {
179        $this->db->exec(
180            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
181                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
182             ON CONFLICT (session) DO UPDATE SET
183                         end = CURRENT_TIMESTAMP,
184                         user = excluded.user,
185                         uid = excluded.uid
186                   WHERE excluded.session = sessions.session
187             ',
188            $this->session,
189            $this->uid,
190            $this->user,
191            $this->uaAgent,
192            $this->uaName,
193            $this->uaType,
194            $this->uaVersion,
195            $this->uaPlatform
196        );
197    }
198
199    /**
200     * Log UTM campaign data
201     *
202     * @return void
203     */
204    protected function logCampaign(): void
205    {
206        global $INPUT;
207
208        $campaign = $INPUT->filter('trim')->str('utm_campaign', null, true);
209        $source = $INPUT->filter('trim')->str('utm_source', null, true);
210        $medium = $INPUT->filter('trim')->str('utm_medium', null, true);
211
212        if (!$campaign) return;
213
214        $this->db->exec(
215            'INSERT OR IGNORE INTO campaigns (session, campaign, source, medium)
216                  VALUES (?, ?, ?, ?)',
217            $this->session,
218            $campaign,
219            $source,
220            $medium
221        );
222    }
223
224    /**
225     * Log all groups for the user
226     *
227     * @todo maybe this should be done only once per session?
228     */
229    protected function logGroups(): void
230    {
231        global $USERINFO;
232
233        if (!$this->user) return;
234        if (!isset($USERINFO['grps'])) return;
235        if (!is_array($USERINFO['grps'])) return;
236        $groups = $USERINFO['grps'];
237
238        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
239
240        if ($groups === []) {
241            return;
242        }
243
244        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
245        $params = [];
246        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
247        foreach ($groups as $group) {
248            $params[] = $this->user;
249            $params[] = $group;
250        }
251        $this->db->exec($sql, $params);
252    }
253
254    /**
255     * Log email domain
256     *
257     * @todo maybe this should be done only once per session?
258     */
259    protected function logDomain(): void
260    {
261        global $USERINFO;
262        if (!$this->user) return;
263        if (!isset($USERINFO['mail'])) return;
264        $mail = $USERINFO['mail'];
265
266        $pos = strrpos($mail, '@');
267        if (!$pos) return;
268        $domain = substr($mail, $pos + 1);
269        if (empty($domain)) return;
270
271        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
272        $this->db->exec($sql, [$domain, $this->user]);
273    }
274
275    // endregion
276    // region internal loggers called by the dispatchers
277
278    /**
279     * Log the given referer URL
280     *
281     * Note: we DO log empty referers. These are external accesses that did not provide a referer URL.
282     * We do not log referers that are our own pages though.
283     *
284     * engine set -> a search engine referer
285     * no engine set, url empty -> a direct access (bookmark, direct link, etc.)
286     * no engine set, url not empty -> a referer from another page (not a wiki page)
287     * null returned -> referer was a wiki page
288     *
289     * @param $referer
290     * @return int|null The referer ID or null if no referer was logged
291     * @todo we could check against a blacklist here
292     */
293    public function logReferer($referer): ?int
294    {
295        $referer = trim($referer);
296
297        // do not log our own pages as referers (empty referer is OK though)
298        if (!empty($referer)) {
299            $selfre = '^' . preg_quote(DOKU_URL, '/');
300            if (preg_match("/$selfre/", $referer)) {
301                return null;
302            }
303        }
304
305        // is it a search engine?
306        $se = new SearchEngines($referer);
307        $engine = $se->getEngine();
308
309        $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
310        $this->db->exec($sql, [$referer, $engine]);
311        return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer);
312    }
313
314    /**
315     * Resolve IP to country/city and store in database
316     *
317     * @return string The IP address as stored
318     */
319    public function logIp(): string
320    {
321        $ip = clientIP(true);
322
323        // anonymize the IP address for storage?
324        if ($this->hlp->getConf('anonips')) {
325            $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables
326            $host = '';
327        } else {
328            $hash = $ip;
329            $host = gethostbyaddr($ip);
330        }
331
332        if ($this->hlp->getConf('nolocation')) {
333            // if we don't resolve location data, we just return the IP address
334            return $hash;
335        }
336
337        // check if IP already known and up-to-date
338        $result = $this->db->queryValue(
339            "SELECT ip
340             FROM   iplocation
341             WHERE  ip = ?
342               AND  dt > date('now', '-30 days')",
343            $hash
344        );
345        if ($result) return $hash; // already known and up-to-date
346
347
348        // resolve the IP address to location data
349        try {
350            $data = $this->hlp->resolveIP($ip);
351        } catch (IpResolverException $e) {
352            \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details);
353            $data = [];
354        }
355
356        $this->db->exec(
357            'INSERT OR REPLACE INTO iplocation (
358                    ip, country, code, city, host, dt
359                 ) VALUES (
360                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
361                 )',
362            $hash,
363            $data['country'] ?? '',
364            $data['countryCode'] ?? '',
365            $data['city'] ?? '',
366            $host
367        );
368
369        return $hash;
370    }
371
372    // endregion
373    // region log dispatchers
374
375    public function logPageView(): void
376    {
377        global $INPUT;
378
379        if (!$INPUT->str('p')) return;
380
381
382        $referer = $INPUT->filter('trim')->str('r');
383        $ip = $this->logIp(); // resolve the IP address
384
385        $data = [
386            'page' => $INPUT->filter('cleanID')->str('p'),
387            'ip' => $ip,
388            'ref_id' => $this->logReferer($referer),
389            'sx' => $INPUT->int('sx'),
390            'sy' => $INPUT->int('sy'),
391            'vx' => $INPUT->int('vx'),
392            'vy' => $INPUT->int('vy'),
393            'session' => $this->session,
394        ];
395
396        $this->db->exec(
397            '
398        INSERT INTO pageviews (
399            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
400        ) VALUES (
401            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
402        )
403        ',
404            $data
405        );
406    }
407
408    /**
409     * Log a click on an external link
410     *
411     * Called from dispatch.php
412     */
413    public function logOutgoing(): void
414    {
415        global $INPUT;
416
417        if (!$INPUT->str('ol')) return;
418
419        $link = $INPUT->filter('trim')->str('ol');
420        $session = $this->session;
421        $page = $INPUT->filter('cleanID')->str('p');
422
423        $this->db->exec(
424            'INSERT INTO outlinks (
425                dt, session, page, link
426             ) VALUES (
427                CURRENT_TIMESTAMP, ?, ?, ?
428             )',
429            $session,
430            $page,
431            $link
432        );
433    }
434
435    /**
436     * Log access to a media file
437     *
438     * Called from action.php
439     *
440     * @param string $media The media ID
441     * @param string $mime The media's mime type
442     * @param bool $inline Is this displayed inline?
443     * @param int $size Size of the media file
444     */
445    public function logMedia(string $media, string $mime, bool $inline, int $size): void
446    {
447        [$mime1, $mime2] = explode('/', strtolower($mime));
448        $inline = $inline ? 1 : 0;
449
450
451        $data = [
452            'media' => cleanID($media),
453            'ip' => $this->logIp(), // resolve the IP address
454            'session' => $this->session,
455            'size' => $size,
456            'mime1' => $mime1,
457            'mime2' => $mime2,
458            'inline' => $inline,
459        ];
460
461        $this->db->exec(
462            '
463                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
464                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
465            ',
466            $data
467        );
468    }
469
470    /**
471     * Log page edits
472     *
473     * called from action.php
474     *
475     * @param string $page The page that was edited
476     * @param string $type The type of edit (create, edit, etc.)
477     */
478    public function logEdit(string $page, string $type): void
479    {
480        $data = [
481            'page' => cleanID($page),
482            'type' => $type,
483            'ip' => $this->logIp(), // resolve the IP address
484            'session' => $this->session
485        ];
486
487        $this->db->exec(
488            'INSERT INTO edits (
489                dt, page, type, ip, session
490             ) VALUES (
491                CURRENT_TIMESTAMP, :page, :type, :ip, :session
492             )',
493            $data
494        );
495    }
496
497    /**
498     * Log login/logoffs and user creations
499     *
500     * @param string $type The type of login event (login, logout, create, failed)
501     * @param string $user The username
502     */
503    public function logLogin(string $type, string $user = ''): void
504    {
505        global $INPUT;
506
507        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
508
509        $ip = clientIP(true);
510
511        $this->db->exec(
512            'INSERT INTO logins (
513                dt, ip, user, type
514             ) VALUES (
515                CURRENT_TIMESTAMP, ?, ?, ?
516             )',
517            $ip,
518            $user,
519            $type
520        );
521    }
522
523    /**
524     * Log search data to the search related tables
525     *
526     * @param string $query The search query
527     * @param string[] $words The query split into words
528     */
529    public function logSearch(string $query, array $words): void
530    {
531        if (!$query) return;
532
533        $sid = $this->db->exec(
534            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
535            $this->logIp(), // resolve the IP address
536            $this->session,
537            $query,
538        );
539
540        foreach ($words as $word) {
541            if (!$word) continue;
542            $this->db->exec(
543                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
544                $sid,
545                $word
546            );
547        }
548    }
549
550    /**
551     * Log the current page count and size as today's history entry
552     */
553    public function logHistoryPages(): void
554    {
555        global $conf;
556
557        // use the popularity plugin's search method to find the wanted data
558        /** @var helper_plugin_popularity $pop */
559        $pop = plugin_load('helper', 'popularity');
560        $list = $this->initEmptySearchList();
561        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
562        $page_count = $list['file_count'];
563        $page_size = $list['file_size'];
564
565        $this->db->exec(
566            'INSERT OR REPLACE INTO history (
567                info, value, dt
568             ) VALUES (
569                ?, ?, CURRENT_TIMESTAMP
570             )',
571            'page_count',
572            $page_count
573        );
574        $this->db->exec(
575            'INSERT OR REPLACE INTO history (
576                info, value, dt
577             ) VALUES (
578                ?, ?, CURRENT_TIMESTAMP
579             )',
580            'page_size',
581            $page_size
582        );
583    }
584
585    /**
586     * Log the current media count and size as today's history entry
587     */
588    public function logHistoryMedia(): void
589    {
590        global $conf;
591
592        // use the popularity plugin's search method to find the wanted data
593        /** @var helper_plugin_popularity $pop */
594        $pop = plugin_load('helper', 'popularity');
595        $list = $this->initEmptySearchList();
596        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
597        $media_count = $list['file_count'];
598        $media_size = $list['file_size'];
599
600        $this->db->exec(
601            'INSERT OR REPLACE INTO history (
602                info, value, dt
603             ) VALUES (
604                ?, ?, CURRENT_TIMESTAMP
605             )',
606            'media_count',
607            $media_count
608        );
609        $this->db->exec(
610            'INSERT OR REPLACE INTO history (
611                info, value, dt
612             ) VALUES (
613                ?, ?, CURRENT_TIMESTAMP
614             )',
615            'media_size',
616            $media_size
617        );
618    }
619
620    // endregion
621
622    /**
623     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
624     * @return array
625     */
626    protected function initEmptySearchList()
627    {
628        return array_fill_keys([
629            'file_count',
630            'file_size',
631            'file_max',
632            'file_min',
633            'dir_count',
634            'dir_nest',
635            'file_oldest'
636        ], 0);
637    }
638}
639