xref: /plugin/statistics/Logger.php (revision bd514593a53266323326d4450de3aca6ab4566d8)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\Input\Input;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use helper_plugin_popularity;
12use helper_plugin_statistics;
13
14class Logger
15{
16    /** @var helper_plugin_statistics The statistics helper plugin instance */
17    protected helper_plugin_statistics $hlp;
18
19    /** @var SQLiteDB The SQLite database instance */
20    protected SQLiteDB $db;
21
22    /** @var string The full user agent string */
23    protected string $uaAgent;
24
25    /** @var string The type of user agent (browser, robot, feedreader) */
26    protected string $uaType = 'browser';
27
28    /** @var string The browser/client name */
29    protected string $uaName;
30
31    /** @var string The browser/client version */
32    protected string $uaVersion;
33
34    /** @var string The operating system/platform */
35    protected string $uaPlatform;
36
37    /** @var string|null The user name, if available */
38    protected ?string $user = null;
39
40    /** @var string The unique user identifier */
41    protected string $uid;
42
43    /** @var string The session identifier */
44    protected string $session;
45
46    /** @var int|null The ID of the main access log entry if any */
47    protected ?int $hit = null;
48
49    // region lifecycle
50
51    /**
52     * Constructor
53     *
54     * Parses browser info and set internal vars
55     */
56    public function __construct(helper_plugin_statistics $hlp)
57    {
58        /** @var Input $INPUT */
59        global $INPUT;
60
61        $this->hlp = $hlp;
62        $this->db = $this->hlp->getDB();
63
64        // FIXME if we already have a session, we should not re-parse the user agent
65
66        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
67        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
68        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
69        $dd->discardBotInformation();
70        $dd->parse();
71
72        if ($dd->isFeedReader()) {
73            $this->uaType = 'feedreader';
74        } elseif ($dd->isBot()) {
75            $this->uaType = 'robot';
76            // for now ignore bots
77            throw new IgnoreException('Bot detected, not logging');
78        }
79
80        $this->uaAgent = $ua;
81        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
82        $this->uaVersion = $dd->getClient('version') ?: '0';
83        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
84        $this->uid = $this->getUID();
85        $this->session = $this->getSession();
86
87        if (!$this->hlp->getConf('nousers')) {
88            $this->user = $INPUT->server->str('REMOTE_USER', null, true);
89        }
90    }
91
92    /**
93     * Should be called before logging
94     *
95     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
96     */
97    public function begin(): void
98    {
99        $this->hlp->getDB()->getPdo()->beginTransaction();
100
101        $this->logUser();
102        $this->logGroups();
103        $this->logDomain();
104        $this->logSession();
105    }
106
107    /**
108     * Should be called after logging
109     *
110     * This commits the transaction started in begin()
111     */
112    public function end(): void
113    {
114        $this->hlp->getDB()->getPdo()->commit();
115    }
116
117    // endregion
118    // region data gathering
119
120    /**
121     * Get the unique user ID
122     *
123     * The user ID is stored in the user preferences and should stay there forever.
124     * @return string The unique user identifier
125     */
126    protected function getUID(): string
127    {
128        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) {
129            // when there is no session UID set, we assume this was deliberate and we simply abort all logging
130            // @todo we may later make UID generation optional
131            throw new IgnoreException('No user ID found');
132        }
133
134        return $_SESSION[DOKU_COOKIE]['statistics']['uid'];
135    }
136
137    /**
138     * Return the user's session ID
139     *
140     * @return string The session identifier
141     */
142    protected function getSession(): string
143    {
144        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) {
145            // when there is no session ID set, we assume this was deliberate and we simply abort all logging
146            throw new IgnoreException('No session ID found');
147        }
148
149        return $_SESSION[DOKU_COOKIE]['statistics']['id'];
150    }
151
152    // endregion
153    // region automatic logging
154
155    /**
156     * Log the user was seen
157     */
158    protected function logUser(): void
159    {
160        if (!$this->user) return;
161
162        $this->db->exec(
163            'INSERT INTO users (user, dt)
164                  VALUES (?, CURRENT_TIMESTAMP)
165            ON CONFLICT (user) DO UPDATE SET
166                         dt = CURRENT_TIMESTAMP
167                   WHERE excluded.user = users.user
168            ',
169            $this->user
170        );
171    }
172
173    /**
174     * Log the session and user agent information
175     */
176    protected function logSession(): void
177    {
178        $this->db->exec(
179            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
180                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
181             ON CONFLICT (session) DO UPDATE SET
182                         end = CURRENT_TIMESTAMP,
183                         user = excluded.user,
184                         uid = excluded.uid
185                   WHERE excluded.session = sessions.session
186             ',
187            $this->session,
188            $this->uid,
189            $this->user,
190            $this->uaAgent,
191            $this->uaName,
192            $this->uaType,
193            $this->uaVersion,
194            $this->uaPlatform
195        );
196    }
197
198    /**
199     * Log all groups for the user
200     *
201     * @todo maybe this should be done only once per session?
202     */
203    protected function logGroups(): void
204    {
205        global $USERINFO;
206
207        if (!$this->user) return;
208        if (!isset($USERINFO['grps'])) return;
209        if (!is_array($USERINFO['grps'])) return;
210        $groups = $USERINFO['grps'];
211
212        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
213
214        if ($groups === []) {
215            return;
216        }
217
218        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
219        $params = [];
220        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
221        foreach ($groups as $group) {
222            $params[] = $this->user;
223            $params[] = $group;
224        }
225        $this->db->exec($sql, $params);
226    }
227
228    /**
229     * Log email domain
230     *
231     * @todo maybe this should be done only once per session?
232     */
233    protected function logDomain(): void
234    {
235        global $USERINFO;
236        if (!$this->user) return;
237        if (!isset($USERINFO['mail'])) return;
238        $mail = $USERINFO['mail'];
239
240        $pos = strrpos($mail, '@');
241        if (!$pos) return;
242        $domain = substr($mail, $pos + 1);
243        if (empty($domain)) return;
244
245        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
246        $this->db->exec($sql, [$domain, $this->user]);
247    }
248
249    // endregion
250    // region internal loggers called by the dispatchers
251
252    /**
253     * Log the given referer URL
254     *
255     * Note: we DO log empty referers. These are external accesses that did not provide a referer URL.
256     * We do not log referers that are our own pages though.
257     *
258     * engine set -> a search engine referer
259     * no engine set, url empty -> a direct access (bookmark, direct link, etc.)
260     * no engine set, url not empty -> a referer from another page (not a wiki page)
261     * null returned -> referer was a wiki page
262     *
263     * @param $referer
264     * @return int|null The referer ID or null if no referer was logged
265     * @todo we could check against a blacklist here
266     */
267    public function logReferer($referer): ?int
268    {
269        $referer = trim($referer);
270
271        // do not log our own pages as referers (empty referer is OK though)
272        if (!empty($referer)) {
273            $selfre = '^' . preg_quote(DOKU_URL, '/');
274            if (preg_match("/$selfre/", $referer)) {
275                return null;
276            }
277        }
278
279        // is it a search engine?
280        $se = new SearchEngines($referer);
281        $engine = $se->getEngine();
282
283        $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
284        $this->db->exec($sql, [$referer, $engine]);
285        return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer);
286    }
287
288    /**
289     * Resolve IP to country/city and store in database
290     *
291     * @return string The IP address as stored
292     */
293    public function logIp(): string
294    {
295        $ip = clientIP(true);
296
297        // anonymize the IP address for storage?
298        if ($this->hlp->getConf('anonips')) {
299            $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables
300            $host = '';
301        } else {
302            $hash = $ip;
303            $host = gethostbyaddr($ip);
304        }
305
306        if ($this->hlp->getConf('nolocation')) {
307            // if we don't resolve location data, we just return the IP address
308            return $hash;
309        }
310
311        // check if IP already known and up-to-date
312        $result = $this->db->queryValue(
313            "SELECT ip
314             FROM   iplocation
315             WHERE  ip = ?
316               AND  lastupd > date('now', '-30 days')",
317            $hash
318        );
319        if ($result) return $hash; // already known and up-to-date
320
321
322        // resolve the IP address to location data
323        try {
324            $data = $this->hlp->resolveIP($ip);
325        } catch (IpResolverException $e) {
326            \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details);
327            $data = [];
328        }
329
330        $this->db->exec(
331            'INSERT OR REPLACE INTO iplocation (
332                    ip, country, code, city, host, lastupd
333                 ) VALUES (
334                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
335                 )',
336            $hash,
337            $data['country'] ?? '',
338            $data['countryCode'] ?? '',
339            $data['city'] ?? '',
340            $host
341        );
342
343        return $hash;
344    }
345
346    // endregion
347    // region log dispatchers
348
349    public function logPageView(): void
350    {
351        global $INPUT;
352
353        if (!$INPUT->str('p')) return;
354
355
356        $referer = $INPUT->filter('trim')->str('r');
357        $ip = $this->logIp(); // resolve the IP address
358
359        $data = [
360            'page' => $INPUT->filter('cleanID')->str('p'),
361            'ip' => $ip,
362            'ref_id' => $this->logReferer($referer),
363            'sx' => $INPUT->int('sx'),
364            'sy' => $INPUT->int('sy'),
365            'vx' => $INPUT->int('vx'),
366            'vy' => $INPUT->int('vy'),
367            'session' => $this->session,
368        ];
369
370        $this->db->exec(
371            '
372        INSERT INTO pageviews (
373            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
374        ) VALUES (
375            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
376        )
377        ',
378            $data
379        );
380    }
381
382    /**
383     * Log a click on an external link
384     *
385     * Called from log.php
386     */
387    public function logOutgoing(): void
388    {
389        global $INPUT;
390
391        if (!$INPUT->str('ol')) return;
392
393        $link = $INPUT->filter('trim')->str('ol');
394        $session = $this->session;
395        $page = $INPUT->filter('cleanID')->str('p');
396
397        $this->db->exec(
398            'INSERT INTO outlinks (
399                dt, session, page, link
400             ) VALUES (
401                CURRENT_TIMESTAMP, ?, ?, ?
402             )',
403            $session,
404            $page,
405            $link
406        );
407    }
408
409    /**
410     * Log access to a media file
411     *
412     * Called from action.php
413     *
414     * @param string $media The media ID
415     * @param string $mime The media's mime type
416     * @param bool $inline Is this displayed inline?
417     * @param int $size Size of the media file
418     */
419    public function logMedia(string $media, string $mime, bool $inline, int $size): void
420    {
421        [$mime1, $mime2] = explode('/', strtolower($mime));
422        $inline = $inline ? 1 : 0;
423
424
425        $data = [
426            'media' => cleanID($media),
427            'ip' => $this->logIp(), // resolve the IP address
428            'session' => $this->session,
429            'size' => $size,
430            'mime1' => $mime1,
431            'mime2' => $mime2,
432            'inline' => $inline,
433        ];
434
435        $this->db->exec(
436            '
437                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
438                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
439            ',
440            $data
441        );
442    }
443
444    /**
445     * Log page edits
446     *
447     * called from action.php
448     *
449     * @param string $page The page that was edited
450     * @param string $type The type of edit (create, edit, etc.)
451     */
452    public function logEdit(string $page, string $type): void
453    {
454        $data = [
455            'page' => cleanID($page),
456            'type' => $type,
457            'ip' => $this->logIp(), // resolve the IP address
458            'session' => $this->session
459        ];
460
461        $this->db->exec(
462            'INSERT INTO edits (
463                dt, page, type, ip, session
464             ) VALUES (
465                CURRENT_TIMESTAMP, :page, :type, :ip, :session
466             )',
467            $data
468        );
469    }
470
471    /**
472     * Log login/logoffs and user creations
473     *
474     * @param string $type The type of login event (login, logout, create, failed)
475     * @param string $user The username
476     */
477    public function logLogin(string $type, string $user = ''): void
478    {
479        global $INPUT;
480
481        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
482
483        $ip = clientIP(true);
484
485        $this->db->exec(
486            'INSERT INTO logins (
487                dt, ip, user, type
488             ) VALUES (
489                CURRENT_TIMESTAMP, ?, ?, ?
490             )',
491            $ip,
492            $user,
493            $type
494        );
495    }
496
497    /**
498     * Log search data to the search related tables
499     *
500     * @param string $query The search query
501     * @param string[] $words The query split into words
502     */
503    public function logSearch(string $query, array $words): void
504    {
505        if (!$query) return;
506
507        $sid = $this->db->exec(
508            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
509            $this->logIp(), // resolve the IP address
510            $this->session,
511            $query,
512        );
513
514        foreach ($words as $word) {
515            if (!$word) continue;
516            $this->db->exec(
517                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
518                $sid,
519                $word
520            );
521        }
522    }
523
524    /**
525     * Log the current page count and size as today's history entry
526     */
527    public function logHistoryPages(): void
528    {
529        global $conf;
530
531        // use the popularity plugin's search method to find the wanted data
532        /** @var helper_plugin_popularity $pop */
533        $pop = plugin_load('helper', 'popularity');
534        $list = $this->initEmptySearchList();
535        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
536        $page_count = $list['file_count'];
537        $page_size = $list['file_size'];
538
539        $this->db->exec(
540            'INSERT OR REPLACE INTO history (
541                info, value, dt
542             ) VALUES (
543                ?, ?, CURRENT_TIMESTAMP
544             )',
545            'page_count',
546            $page_count
547        );
548        $this->db->exec(
549            'INSERT OR REPLACE INTO history (
550                info, value, dt
551             ) VALUES (
552                ?, ?, CURRENT_TIMESTAMP
553             )',
554            'page_size',
555            $page_size
556        );
557    }
558
559    /**
560     * Log the current media count and size as today's history entry
561     */
562    public function logHistoryMedia(): void
563    {
564        global $conf;
565
566        // use the popularity plugin's search method to find the wanted data
567        /** @var helper_plugin_popularity $pop */
568        $pop = plugin_load('helper', 'popularity');
569        $list = $this->initEmptySearchList();
570        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
571        $media_count = $list['file_count'];
572        $media_size = $list['file_size'];
573
574        $this->db->exec(
575            'INSERT OR REPLACE INTO history (
576                info, value, dt
577             ) VALUES (
578                ?, ?, CURRENT_TIMESTAMP
579             )',
580            'media_count',
581            $media_count
582        );
583        $this->db->exec(
584            'INSERT OR REPLACE INTO history (
585                info, value, dt
586             ) VALUES (
587                ?, ?, CURRENT_TIMESTAMP
588             )',
589            'media_size',
590            $media_size
591        );
592    }
593
594    // endregion
595
596    /**
597     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
598     * @return array
599     */
600    protected function initEmptySearchList()
601    {
602        return array_fill_keys([
603            'file_count',
604            'file_size',
605            'file_max',
606            'file_min',
607            'dir_count',
608            'dir_nest',
609            'file_oldest'
610        ], 0);
611    }
612}
613