xref: /plugin/statistics/Logger.php (revision d550a4ad772c00d30c3bead8fc22362f3d1bec7a)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5/**
6 * Exception thrown when logging should be ignored
7 */
8class IgnoreException extends \RuntimeException
9{
10}
11
12use DeviceDetector\DeviceDetector;
13use DeviceDetector\Parser\Client\Browser;
14use DeviceDetector\Parser\Device\AbstractDeviceParser;
15use DeviceDetector\Parser\OperatingSystem;
16use dokuwiki\Input\Input;
17use dokuwiki\plugin\sqlite\SQLiteDB;
18use helper_plugin_popularity;
19use helper_plugin_statistics;
20
21class Logger
22{
23    /** @var helper_plugin_statistics The statistics helper plugin instance */
24    protected helper_plugin_statistics $hlp;
25
26    /** @var SQLiteDB The SQLite database instance */
27    protected SQLiteDB $db;
28
29    /** @var string The full user agent string */
30    protected string $uaAgent;
31
32    /** @var string The type of user agent (browser, robot, feedreader) */
33    protected string $uaType = 'browser';
34
35    /** @var string The browser/client name */
36    protected string $uaName;
37
38    /** @var string The browser/client version */
39    protected string $uaVersion;
40
41    /** @var string The operating system/platform */
42    protected string $uaPlatform;
43
44    /** @var string|null The user name, if available */
45    protected ?string $user = null;
46
47    /** @var string The unique user identifier */
48    protected string $uid;
49
50    /** @var string The session identifier */
51    protected string $session;
52
53    /** @var int|null The ID of the main access log entry if any */
54    protected ?int $hit = null;
55
56    // region lifecycle
57
58    /**
59     * Constructor
60     *
61     * Parses browser info and set internal vars
62     */
63    public function __construct(helper_plugin_statistics $hlp)
64    {
65        /** @var Input $INPUT */
66        global $INPUT;
67
68        $this->hlp = $hlp;
69        $this->db = $this->hlp->getDB();
70
71        // FIXME if we already have a session, we should not re-parse the user agent
72
73        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
74        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
75        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
76        $dd->discardBotInformation();
77        $dd->parse();
78
79        if ($dd->isFeedReader()) {
80            $this->uaType = 'feedreader';
81        } elseif ($dd->isBot()) {
82            $this->uaType = 'robot';
83            // for now ignore bots
84            throw new IgnoreException('Bot detected, not logging');
85        }
86
87        $this->uaAgent = $ua;
88        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
89        $this->uaVersion = $dd->getClient('version') ?: '0';
90        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
91        $this->uid = $this->getUID();
92        $this->session = $this->getSession();
93
94        if (!$this->hlp->getConf('nousers')) {
95            $this->user = $INPUT->server->str('REMOTE_USER', null, true);
96        }
97    }
98
99    /**
100     * Should be called before logging
101     *
102     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
103     */
104    public function begin(): void
105    {
106        $this->hlp->getDB()->getPdo()->beginTransaction();
107
108        $this->logUser();
109        $this->logGroups();
110        $this->logDomain();
111        $this->logSession();
112    }
113
114    /**
115     * Should be called after logging
116     *
117     * This commits the transaction started in begin()
118     */
119    public function end(): void
120    {
121        $this->hlp->getDB()->getPdo()->commit();
122    }
123
124    // endregion
125    // region data gathering
126
127    /**
128     * Get the unique user ID
129     *
130     * The user ID is stored in the user preferences and should stay there forever.
131     * @return string The unique user identifier
132     */
133    protected function getUID(): string
134    {
135        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) {
136            // when there is no session UID set, we assume this was deliberate and we simply abort all logging
137            // @todo we may later make UID generation optional
138            throw new IgnoreException('No user ID found');
139        }
140
141        return $_SESSION[DOKU_COOKIE]['statistics']['uid'];
142    }
143
144    /**
145     * Return the user's session ID
146     *
147     * @return string The session identifier
148     */
149    protected function getSession(): string
150    {
151        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) {
152            // when there is no session ID set, we assume this was deliberate and we simply abort all logging
153            throw new IgnoreException('No session ID found');
154        }
155
156        return $_SESSION[DOKU_COOKIE]['statistics']['id'];
157    }
158
159    // endregion
160    // region automatic logging
161
162    /**
163     * Log the user was seen
164     */
165    protected function logUser(): void
166    {
167        if (!$this->user) return;
168
169        $this->db->exec(
170            'INSERT INTO users (user, dt)
171                  VALUES (?, CURRENT_TIMESTAMP)
172            ON CONFLICT (user) DO UPDATE SET
173                         dt = CURRENT_TIMESTAMP
174                   WHERE excluded.user = users.user
175            ',
176            $this->user
177        );
178
179    }
180
181    /**
182     * Log the session and user agent information
183     */
184    protected function logSession(): void
185    {
186        $this->db->exec(
187            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
188                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
189             ON CONFLICT (session) DO UPDATE SET
190                         end = CURRENT_TIMESTAMP,
191                         user = excluded.user,
192                         uid = excluded.uid
193                   WHERE excluded.session = sessions.session
194             ',
195            $this->session,
196            $this->uid,
197            $this->user,
198            $this->uaAgent,
199            $this->uaName,
200            $this->uaType,
201            $this->uaVersion,
202            $this->uaPlatform
203        );
204    }
205
206    /**
207     * Log all groups for the user
208     *
209     * @todo maybe this should be done only once per session?
210     */
211    protected function logGroups(): void
212    {
213        global $USERINFO;
214
215        if (!$this->user) return;
216        if (!isset($USERINFO['grps'])) return;
217        if (!is_array($USERINFO['grps'])) return;
218        $groups = $USERINFO['grps'];
219
220        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
221
222        if (empty($groups)) {
223            return;
224        }
225
226        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
227        $params = [];
228        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
229        foreach ($groups as $group) {
230            $params[] = $this->user;
231            $params[] = $group;
232        }
233        $this->db->exec($sql, $params);
234    }
235
236    /**
237     * Log email domain
238     *
239     * @todo maybe this should be done only once per session?
240     */
241    protected function logDomain(): void
242    {
243        global $USERINFO;
244        if (!$this->user) return;
245        if (!isset($USERINFO['mail'])) return;
246        $mail = $USERINFO['mail'];
247
248        $pos = strrpos($mail, '@');
249        if (!$pos) return;
250        $domain = substr($mail, $pos + 1);
251        if (empty($domain)) return;
252
253        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
254        $this->db->exec($sql, [$domain, $this->user]);
255    }
256
257    // endregion
258    // region internal loggers called by the dispatchers
259
260    /**
261     * Log the given referer URL
262     *
263     * Note: we DO log empty referers. These are external accesses that did not provide a referer URL.
264     * We do not log referers that are our own pages though.
265     *
266     * engine set -> a search engine referer
267     * no engine set, url empty -> a direct access (bookmark, direct link, etc.)
268     * no engine set, url not empty -> a referer from another page (not a wiki page)
269     * null returned -> referer was a wiki page
270     *
271     * @param $referer
272     * @return int|null The referer ID or null if no referer was logged
273     * @todo we could check against a blacklist here
274     */
275    public function logReferer($referer): ?int
276    {
277        $referer = trim($referer);
278
279        // do not log our own pages as referers (empty referer is OK though)
280        if (!empty($referer)) {
281            $selfre = '^' . preg_quote(DOKU_URL, '/');
282            if (preg_match("/$selfre/", $referer)) {
283                return null;
284            }
285        }
286
287        // is it a search engine?
288        $se = new SearchEngines($referer);
289        $engine = $se->getEngine();
290
291        $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
292        $this->db->exec($sql, [$referer, $engine]);
293        return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer);
294    }
295
296    /**
297     * Resolve IP to country/city and store in database
298     *
299     * @return string The IP address as stored
300     */
301    public function logIp(): string
302    {
303        $ip = clientIP(true);
304
305        // anonymize the IP address for storage?
306        if ($this->hlp->getConf('anonips')) {
307            $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables
308            $host = '';
309        } else {
310            $hash = $ip;
311            $host = gethostbyaddr($ip);
312        }
313
314        if ($this->hlp->getConf('nolocation')) {
315            // if we don't resolve location data, we just return the IP address
316            return $hash;
317        }
318
319        // check if IP already known and up-to-date
320        $result = $this->db->queryValue(
321            "SELECT ip
322             FROM   iplocation
323             WHERE  ip = ?
324               AND  lastupd > date('now', '-30 days')",
325            $hash
326        );
327        if ($result) return $hash; // already known and up-to-date
328
329
330        // resolve the IP address to location data
331        try {
332            $data = $this->hlp->resolveIP($ip);
333        } catch (IpResolverException $e) {
334            \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details);
335            $data = [];
336        }
337
338        $this->db->exec(
339            'INSERT OR REPLACE INTO iplocation (
340                    ip, country, code, city, host, lastupd
341                 ) VALUES (
342                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
343                 )',
344            $hash,
345            $data['country'] ?? '',
346            $data['countryCode'] ?? '',
347            $data['city'] ?? '',
348            $host
349        );
350
351        return $hash;
352    }
353
354    // endregion
355    // region log dispatchers
356
357    public function logPageView(): void
358    {
359        global $INPUT;
360
361        if (!$INPUT->str('p')) return;
362
363
364        $referer = $INPUT->filter('trim')->str('r');
365        $ip = $this->logIp(); // resolve the IP address
366
367        $data = [
368            'page' => $INPUT->filter('cleanID')->str('p'),
369            'ip' => $ip,
370            'ref_id' => $this->logReferer($referer),
371            'sx' => $INPUT->int('sx'),
372            'sy' => $INPUT->int('sy'),
373            'vx' => $INPUT->int('vx'),
374            'vy' => $INPUT->int('vy'),
375            'session' => $this->session,
376        ];
377
378        $this->db->exec('
379        INSERT INTO pageviews (
380            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
381        ) VALUES (
382            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
383        )
384        ',
385            $data
386        );
387    }
388
389    /**
390     * Log a click on an external link
391     *
392     * Called from log.php
393     */
394    public function logOutgoing(): void
395    {
396        global $INPUT;
397
398        if (!$INPUT->str('ol')) return;
399
400        $link = $INPUT->filter('trim')->str('ol');
401        $session = $this->session;
402        $page = $INPUT->filter('cleanID')->str('p');
403
404        $this->db->exec(
405            'INSERT INTO outlinks (
406                dt, session, page, link
407             ) VALUES (
408                CURRENT_TIMESTAMP, ?, ?, ?
409             )',
410            $session,
411            $page,
412            $link
413        );
414    }
415
416    /**
417     * Log access to a media file
418     *
419     * Called from action.php
420     *
421     * @param string $media The media ID
422     * @param string $mime The media's mime type
423     * @param bool $inline Is this displayed inline?
424     * @param int $size Size of the media file
425     */
426    public function logMedia(string $media, string $mime, bool $inline, int $size): void
427    {
428        [$mime1, $mime2] = explode('/', strtolower($mime));
429        $inline = $inline ? 1 : 0;
430
431
432        $data = [
433            'media' => cleanID($media),
434            'ip' => $this->logIp(), // resolve the IP address
435            'session' => $this->session,
436            'size' => $size,
437            'mime1' => $mime1,
438            'mime2' => $mime2,
439            'inline' => $inline,
440        ];
441
442        $this->db->exec('
443                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
444                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
445            ',
446            $data
447        );
448    }
449
450    /**
451     * Log page edits
452     *
453     * called from action.php
454     *
455     * @param string $page The page that was edited
456     * @param string $type The type of edit (create, edit, etc.)
457     */
458    public function logEdit(string $page, string $type): void
459    {
460        $data = [
461            'page' => cleanID($page),
462            'type' => $type,
463            'ip' => $this->logIp(), // resolve the IP address
464            'session' => $this->session
465        ];
466
467        $this->db->exec(
468            'INSERT INTO edits (
469                dt, page, type, ip, session
470             ) VALUES (
471                CURRENT_TIMESTAMP, :page, :type, :ip, :session
472             )',
473            $data
474        );
475    }
476
477    /**
478     * Log login/logoffs and user creations
479     *
480     * @param string $type The type of login event (login, logout, create, failed)
481     * @param string $user The username
482     */
483    public function logLogin(string $type, string $user = ''): void
484    {
485        global $INPUT;
486
487        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
488
489        $ip = clientIP(true);
490
491        $this->db->exec(
492            'INSERT INTO logins (
493                dt, ip, user, type
494             ) VALUES (
495                CURRENT_TIMESTAMP, ?, ?, ?
496             )',
497            $ip,
498            $user,
499            $type
500        );
501    }
502
503    /**
504     * Log search data to the search related tables
505     *
506     * @param string $query The search query
507     * @param string[] $words The query split into words
508     */
509    public function logSearch(string $query, array $words): void
510    {
511        if (!$query) return;
512
513        $sid = $this->db->exec(
514            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
515            $this->logIp(), // resolve the IP address
516            $this->session,
517            $query,
518        );
519
520        foreach ($words as $word) {
521            if (!$word) continue;
522            $this->db->exec(
523                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
524                $sid,
525                $word
526            );
527        }
528    }
529
530    /**
531     * Log the current page count and size as today's history entry
532     */
533    public function logHistoryPages(): void
534    {
535        global $conf;
536
537        // use the popularity plugin's search method to find the wanted data
538        /** @var helper_plugin_popularity $pop */
539        $pop = plugin_load('helper', 'popularity');
540        $list = $this->initEmptySearchList();
541        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
542        $page_count = $list['file_count'];
543        $page_size = $list['file_size'];
544
545        $this->db->exec(
546            'INSERT OR REPLACE INTO history (
547                info, value, dt
548             ) VALUES (
549                ?, ?, CURRENT_TIMESTAMP
550             )',
551            'page_count',
552            $page_count
553        );
554        $this->db->exec(
555            'INSERT OR REPLACE INTO history (
556                info, value, dt
557             ) VALUES (
558                ?, ?, CURRENT_TIMESTAMP
559             )',
560            'page_size',
561            $page_size
562        );
563    }
564
565    /**
566     * Log the current media count and size as today's history entry
567     */
568    public function logHistoryMedia(): void
569    {
570        global $conf;
571
572        // use the popularity plugin's search method to find the wanted data
573        /** @var helper_plugin_popularity $pop */
574        $pop = plugin_load('helper', 'popularity');
575        $list = $this->initEmptySearchList();
576        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
577        $media_count = $list['file_count'];
578        $media_size = $list['file_size'];
579
580        $this->db->exec(
581            'INSERT OR REPLACE INTO history (
582                info, value, dt
583             ) VALUES (
584                ?, ?, CURRENT_TIMESTAMP
585             )',
586            'media_count',
587            $media_count
588        );
589        $this->db->exec(
590            'INSERT OR REPLACE INTO history (
591                info, value, dt
592             ) VALUES (
593                ?, ?, CURRENT_TIMESTAMP
594             )',
595            'media_size',
596            $media_size
597        );
598    }
599
600    // endregion
601
602    /**
603     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
604     * @return array
605     */
606    protected function initEmptySearchList()
607    {
608        return array_fill_keys([
609            'file_count',
610            'file_size',
611            'file_max',
612            'file_min',
613            'dir_count',
614            'dir_nest',
615            'file_oldest'
616        ], 0);
617    }
618}
619