xref: /plugin/statistics/Logger.php (revision 1c4e3694d34de9696954292b37ba9c070a747526)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\ClientHints;
6use DeviceDetector\DeviceDetector;
7use DeviceDetector\Parser\Client\Browser;
8use DeviceDetector\Parser\Device\AbstractDeviceParser;
9use DeviceDetector\Parser\OperatingSystem;
10use dokuwiki\Input\Input;
11use dokuwiki\plugin\sqlite\SQLiteDB;
12use helper_plugin_popularity;
13use helper_plugin_statistics;
14
15class Logger
16{
17    /** @var helper_plugin_statistics The statistics helper plugin instance */
18    protected helper_plugin_statistics $hlp;
19
20    /** @var SQLiteDB The SQLite database instance */
21    protected SQLiteDB $db;
22
23    /** @var string The full user agent string */
24    protected string $uaAgent;
25
26    /** @var string The type of user agent (browser, robot, feedreader) */
27    protected string $uaType = 'browser';
28
29    /** @var string The browser/client name */
30    protected string $uaName;
31
32    /** @var string The browser/client version */
33    protected string $uaVersion;
34
35    /** @var string The operating system/platform */
36    protected string $uaPlatform;
37
38    /** @var string|null The user name, if available */
39    protected ?string $user = null;
40
41    /** @var string The unique user identifier */
42    protected string $uid;
43
44    /** @var string The session identifier */
45    protected string $session;
46
47    /** @var int|null The ID of the main access log entry if any */
48    protected ?int $hit = null;
49
50    // region lifecycle
51
52    /**
53     * Constructor
54     *
55     * Parses browser info and set internal vars
56     */
57    public function __construct(helper_plugin_statistics $hlp)
58    {
59        /** @var Input $INPUT */
60        global $INPUT;
61
62        $this->hlp = $hlp;
63        $this->db = $this->hlp->getDB();
64
65        // FIXME if we already have a session, we should not re-parse the user agent
66
67        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
68        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
69        $dd = new DeviceDetector($ua, ClientHints::factory($_SERVER));
70        $dd->discardBotInformation();
71        $dd->parse();
72
73        if ($dd->isFeedReader()) {
74            $this->uaType = 'feedreader';
75        } elseif ($dd->isBot()) {
76            $this->uaType = 'robot';
77            // for now ignore bots
78            throw new IgnoreException('Bot detected, not logging');
79        }
80
81        $this->uaAgent = $ua;
82        $this->uaName = $dd->getClient('name') ?: 'Unknown';
83        $this->uaVersion = $dd->getClient('version') ?: '0';
84        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
85        $this->uid = $this->getUID();
86        $this->session = $this->getSession();
87
88        if (!$this->hlp->getConf('nousers')) {
89            $this->user = $INPUT->server->str('REMOTE_USER', null, true);
90        }
91    }
92
93    /**
94     * Should be called before logging
95     *
96     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
97     */
98    public function begin(): void
99    {
100        $this->hlp->getDB()->getPdo()->beginTransaction();
101
102        $this->logUser();
103        $this->logGroups();
104        $this->logDomain();
105        $this->logSession();
106    }
107
108    /**
109     * Should be called after logging
110     *
111     * This commits the transaction started in begin()
112     */
113    public function end(): void
114    {
115        $this->hlp->getDB()->getPdo()->commit();
116    }
117
118    // endregion
119    // region data gathering
120
121    /**
122     * Get the unique user ID
123     *
124     * The user ID is stored in the user preferences and should stay there forever.
125     * @return string The unique user identifier
126     */
127    protected function getUID(): string
128    {
129        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) {
130            // when there is no session UID set, we assume this was deliberate and we simply abort all logging
131            // @todo we may later make UID generation optional
132            throw new IgnoreException('No user ID found');
133        }
134
135        return $_SESSION[DOKU_COOKIE]['statistics']['uid'];
136    }
137
138    /**
139     * Return the user's session ID
140     *
141     * @return string The session identifier
142     */
143    protected function getSession(): string
144    {
145        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) {
146            // when there is no session ID set, we assume this was deliberate and we simply abort all logging
147            throw new IgnoreException('No session ID found');
148        }
149
150        return $_SESSION[DOKU_COOKIE]['statistics']['id'];
151    }
152
153    // endregion
154    // region automatic logging
155
156    /**
157     * Log the user was seen
158     */
159    protected function logUser(): void
160    {
161        if (!$this->user) return;
162
163        $this->db->exec(
164            'INSERT INTO users (user, dt)
165                  VALUES (?, CURRENT_TIMESTAMP)
166            ON CONFLICT (user) DO UPDATE SET
167                         dt = CURRENT_TIMESTAMP
168                   WHERE excluded.user = users.user
169            ',
170            $this->user
171        );
172    }
173
174    /**
175     * Log the session and user agent information
176     */
177    protected function logSession(): void
178    {
179        $this->db->exec(
180            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
181                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
182             ON CONFLICT (session) DO UPDATE SET
183                         end = CURRENT_TIMESTAMP,
184                         user = excluded.user,
185                         uid = excluded.uid
186                   WHERE excluded.session = sessions.session
187             ',
188            $this->session,
189            $this->uid,
190            $this->user,
191            $this->uaAgent,
192            $this->uaName,
193            $this->uaType,
194            $this->uaVersion,
195            $this->uaPlatform
196        );
197    }
198
199    /**
200     * Log all groups for the user
201     *
202     * @todo maybe this should be done only once per session?
203     */
204    protected function logGroups(): void
205    {
206        global $USERINFO;
207
208        if (!$this->user) return;
209        if (!isset($USERINFO['grps'])) return;
210        if (!is_array($USERINFO['grps'])) return;
211        $groups = $USERINFO['grps'];
212
213        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
214
215        if ($groups === []) {
216            return;
217        }
218
219        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
220        $params = [];
221        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
222        foreach ($groups as $group) {
223            $params[] = $this->user;
224            $params[] = $group;
225        }
226        $this->db->exec($sql, $params);
227    }
228
229    /**
230     * Log email domain
231     *
232     * @todo maybe this should be done only once per session?
233     */
234    protected function logDomain(): void
235    {
236        global $USERINFO;
237        if (!$this->user) return;
238        if (!isset($USERINFO['mail'])) return;
239        $mail = $USERINFO['mail'];
240
241        $pos = strrpos($mail, '@');
242        if (!$pos) return;
243        $domain = substr($mail, $pos + 1);
244        if (empty($domain)) return;
245
246        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
247        $this->db->exec($sql, [$domain, $this->user]);
248    }
249
250    // endregion
251    // region internal loggers called by the dispatchers
252
253    /**
254     * Log the given referer URL
255     *
256     * Note: we DO log empty referers. These are external accesses that did not provide a referer URL.
257     * We do not log referers that are our own pages though.
258     *
259     * engine set -> a search engine referer
260     * no engine set, url empty -> a direct access (bookmark, direct link, etc.)
261     * no engine set, url not empty -> a referer from another page (not a wiki page)
262     * null returned -> referer was a wiki page
263     *
264     * @param $referer
265     * @return int|null The referer ID or null if no referer was logged
266     * @todo we could check against a blacklist here
267     */
268    public function logReferer($referer): ?int
269    {
270        $referer = trim($referer);
271
272        // do not log our own pages as referers (empty referer is OK though)
273        if (!empty($referer)) {
274            $selfre = '^' . preg_quote(DOKU_URL, '/');
275            if (preg_match("/$selfre/", $referer)) {
276                return null;
277            }
278        }
279
280        // is it a search engine?
281        $se = new SearchEngines($referer);
282        $engine = $se->getEngine();
283
284        $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
285        $this->db->exec($sql, [$referer, $engine]);
286        return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer);
287    }
288
289    /**
290     * Resolve IP to country/city and store in database
291     *
292     * @return string The IP address as stored
293     */
294    public function logIp(): string
295    {
296        $ip = clientIP(true);
297
298        // anonymize the IP address for storage?
299        if ($this->hlp->getConf('anonips')) {
300            $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables
301            $host = '';
302        } else {
303            $hash = $ip;
304            $host = gethostbyaddr($ip);
305        }
306
307        if ($this->hlp->getConf('nolocation')) {
308            // if we don't resolve location data, we just return the IP address
309            return $hash;
310        }
311
312        // check if IP already known and up-to-date
313        $result = $this->db->queryValue(
314            "SELECT ip
315             FROM   iplocation
316             WHERE  ip = ?
317               AND  dt > date('now', '-30 days')",
318            $hash
319        );
320        if ($result) return $hash; // already known and up-to-date
321
322
323        // resolve the IP address to location data
324        try {
325            $data = $this->hlp->resolveIP($ip);
326        } catch (IpResolverException $e) {
327            \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details);
328            $data = [];
329        }
330
331        $this->db->exec(
332            'INSERT OR REPLACE INTO iplocation (
333                    ip, country, code, city, host, dt
334                 ) VALUES (
335                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
336                 )',
337            $hash,
338            $data['country'] ?? '',
339            $data['countryCode'] ?? '',
340            $data['city'] ?? '',
341            $host
342        );
343
344        return $hash;
345    }
346
347    // endregion
348    // region log dispatchers
349
350    public function logPageView(): void
351    {
352        global $INPUT;
353
354        if (!$INPUT->str('p')) return;
355
356
357        $referer = $INPUT->filter('trim')->str('r');
358        $ip = $this->logIp(); // resolve the IP address
359
360        $data = [
361            'page' => $INPUT->filter('cleanID')->str('p'),
362            'ip' => $ip,
363            'ref_id' => $this->logReferer($referer),
364            'sx' => $INPUT->int('sx'),
365            'sy' => $INPUT->int('sy'),
366            'vx' => $INPUT->int('vx'),
367            'vy' => $INPUT->int('vy'),
368            'session' => $this->session,
369        ];
370
371        $this->db->exec(
372            '
373        INSERT INTO pageviews (
374            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
375        ) VALUES (
376            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
377        )
378        ',
379            $data
380        );
381    }
382
383    /**
384     * Log a click on an external link
385     *
386     * Called from dispatch.php
387     */
388    public function logOutgoing(): void
389    {
390        global $INPUT;
391
392        if (!$INPUT->str('ol')) return;
393
394        $link = $INPUT->filter('trim')->str('ol');
395        $session = $this->session;
396        $page = $INPUT->filter('cleanID')->str('p');
397
398        $this->db->exec(
399            'INSERT INTO outlinks (
400                dt, session, page, link
401             ) VALUES (
402                CURRENT_TIMESTAMP, ?, ?, ?
403             )',
404            $session,
405            $page,
406            $link
407        );
408    }
409
410    /**
411     * Log access to a media file
412     *
413     * Called from action.php
414     *
415     * @param string $media The media ID
416     * @param string $mime The media's mime type
417     * @param bool $inline Is this displayed inline?
418     * @param int $size Size of the media file
419     */
420    public function logMedia(string $media, string $mime, bool $inline, int $size): void
421    {
422        [$mime1, $mime2] = explode('/', strtolower($mime));
423        $inline = $inline ? 1 : 0;
424
425
426        $data = [
427            'media' => cleanID($media),
428            'ip' => $this->logIp(), // resolve the IP address
429            'session' => $this->session,
430            'size' => $size,
431            'mime1' => $mime1,
432            'mime2' => $mime2,
433            'inline' => $inline,
434        ];
435
436        $this->db->exec(
437            '
438                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
439                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
440            ',
441            $data
442        );
443    }
444
445    /**
446     * Log page edits
447     *
448     * called from action.php
449     *
450     * @param string $page The page that was edited
451     * @param string $type The type of edit (create, edit, etc.)
452     */
453    public function logEdit(string $page, string $type): void
454    {
455        $data = [
456            'page' => cleanID($page),
457            'type' => $type,
458            'ip' => $this->logIp(), // resolve the IP address
459            'session' => $this->session
460        ];
461
462        $this->db->exec(
463            'INSERT INTO edits (
464                dt, page, type, ip, session
465             ) VALUES (
466                CURRENT_TIMESTAMP, :page, :type, :ip, :session
467             )',
468            $data
469        );
470    }
471
472    /**
473     * Log login/logoffs and user creations
474     *
475     * @param string $type The type of login event (login, logout, create, failed)
476     * @param string $user The username
477     */
478    public function logLogin(string $type, string $user = ''): void
479    {
480        global $INPUT;
481
482        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
483
484        $ip = clientIP(true);
485
486        $this->db->exec(
487            'INSERT INTO logins (
488                dt, ip, user, type
489             ) VALUES (
490                CURRENT_TIMESTAMP, ?, ?, ?
491             )',
492            $ip,
493            $user,
494            $type
495        );
496    }
497
498    /**
499     * Log search data to the search related tables
500     *
501     * @param string $query The search query
502     * @param string[] $words The query split into words
503     */
504    public function logSearch(string $query, array $words): void
505    {
506        if (!$query) return;
507
508        $sid = $this->db->exec(
509            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
510            $this->logIp(), // resolve the IP address
511            $this->session,
512            $query,
513        );
514
515        foreach ($words as $word) {
516            if (!$word) continue;
517            $this->db->exec(
518                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
519                $sid,
520                $word
521            );
522        }
523    }
524
525    /**
526     * Log the current page count and size as today's history entry
527     */
528    public function logHistoryPages(): void
529    {
530        global $conf;
531
532        // use the popularity plugin's search method to find the wanted data
533        /** @var helper_plugin_popularity $pop */
534        $pop = plugin_load('helper', 'popularity');
535        $list = $this->initEmptySearchList();
536        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
537        $page_count = $list['file_count'];
538        $page_size = $list['file_size'];
539
540        $this->db->exec(
541            'INSERT OR REPLACE INTO history (
542                info, value, dt
543             ) VALUES (
544                ?, ?, CURRENT_TIMESTAMP
545             )',
546            'page_count',
547            $page_count
548        );
549        $this->db->exec(
550            'INSERT OR REPLACE INTO history (
551                info, value, dt
552             ) VALUES (
553                ?, ?, CURRENT_TIMESTAMP
554             )',
555            'page_size',
556            $page_size
557        );
558    }
559
560    /**
561     * Log the current media count and size as today's history entry
562     */
563    public function logHistoryMedia(): void
564    {
565        global $conf;
566
567        // use the popularity plugin's search method to find the wanted data
568        /** @var helper_plugin_popularity $pop */
569        $pop = plugin_load('helper', 'popularity');
570        $list = $this->initEmptySearchList();
571        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
572        $media_count = $list['file_count'];
573        $media_size = $list['file_size'];
574
575        $this->db->exec(
576            'INSERT OR REPLACE INTO history (
577                info, value, dt
578             ) VALUES (
579                ?, ?, CURRENT_TIMESTAMP
580             )',
581            'media_count',
582            $media_count
583        );
584        $this->db->exec(
585            'INSERT OR REPLACE INTO history (
586                info, value, dt
587             ) VALUES (
588                ?, ?, CURRENT_TIMESTAMP
589             )',
590            'media_size',
591            $media_size
592        );
593    }
594
595    // endregion
596
597    /**
598     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
599     * @return array
600     */
601    protected function initEmptySearchList()
602    {
603        return array_fill_keys([
604            'file_count',
605            'file_size',
606            'file_max',
607            'file_min',
608            'dir_count',
609            'dir_nest',
610            'file_oldest'
611        ], 0);
612    }
613}
614