xref: /plugin/statistics/Logger.php (revision ba6b3b10263c8439a170c848fb8e0ce174e4f468)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5/**
6 * Exception thrown when logging should be ignored
7 */
8class IgnoreException extends \RuntimeException
9{
10}
11
12use DeviceDetector\DeviceDetector;
13use DeviceDetector\Parser\Client\Browser;
14use DeviceDetector\Parser\Device\AbstractDeviceParser;
15use DeviceDetector\Parser\OperatingSystem;
16use dokuwiki\HTTP\DokuHTTPClient;
17use dokuwiki\Input\Input;
18use dokuwiki\plugin\sqlite\SQLiteDB;
19use helper_plugin_popularity;
20use helper_plugin_statistics;
21
22class Logger
23{
24    /** @var helper_plugin_statistics The statistics helper plugin instance */
25    protected helper_plugin_statistics $hlp;
26
27    /** @var SQLiteDB The SQLite database instance */
28    protected SQLiteDB $db;
29
30    /** @var string The full user agent string */
31    protected string $uaAgent;
32
33    /** @var string The type of user agent (browser, robot, feedreader) */
34    protected string $uaType = 'browser';
35
36    /** @var string The browser/client name */
37    protected string $uaName;
38
39    /** @var string The browser/client version */
40    protected string $uaVersion;
41
42    /** @var string The operating system/platform */
43    protected string $uaPlatform;
44
45    /** @var string|null The user name, if available */
46    protected ?string $user = null;
47
48    /** @var string The unique user identifier */
49    protected string $uid;
50
51    /** @var string The session identifier */
52    protected string $session;
53
54    /** @var int|null The ID of the main access log entry if any */
55    protected ?int $hit = null;
56
57    // region lifecycle
58
59    /**
60     * Constructor
61     *
62     * Parses browser info and set internal vars
63     */
64    public function __construct(helper_plugin_statistics $hlp)
65    {
66        /** @var Input $INPUT */
67        global $INPUT;
68
69        $this->hlp = $hlp;
70        $this->db = $this->hlp->getDB();
71
72        // FIXME if we already have a session, we should not re-parse the user agent
73
74        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
75        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
76        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
77        $dd->discardBotInformation();
78        $dd->parse();
79
80        if ($dd->isFeedReader()) {
81            $this->uaType = 'feedreader';
82        } elseif ($dd->isBot()) {
83            $this->uaType = 'robot';
84            // for now ignore bots
85            throw new IgnoreException('Bot detected, not logging');
86        }
87
88        $this->uaAgent = $ua;
89        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
90        $this->uaVersion = $dd->getClient('version') ?: '0';
91        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
92        $this->uid = $this->getUID();
93        $this->session = $this->getSession();
94        $this->user = $INPUT->server->str('REMOTE_USER', null, true);
95    }
96
97    /**
98     * Should be called before logging
99     *
100     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
101     */
102    public function begin(): void
103    {
104        $this->hlp->getDB()->getPdo()->beginTransaction();
105
106        $this->logUser();
107        $this->logGroups();
108        $this->logDomain();
109        $this->logSession();
110    }
111
112    /**
113     * Should be called after logging
114     *
115     * This commits the transaction started in begin()
116     */
117    public function end(): void
118    {
119        $this->hlp->getDB()->getPdo()->commit();
120    }
121
122    // endregion
123    // region data gathering
124
125    /**
126     * Get the unique user ID
127     *
128     * The user ID is stored in the user preferences and should stay there forever.
129     * @return string The unique user identifier
130     */
131    protected function getUID(): string
132    {
133        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) {
134            // when there is no session UID set, we assume this was deliberate and we simply abort all logging
135            // @todo we may later make UID generation optional
136            throw new IgnoreException('No user ID found');
137        }
138
139        return $_SESSION[DOKU_COOKIE]['statistics']['uid'];
140    }
141
142    /**
143     * Return the user's session ID
144     *
145     * @return string The session identifier
146     */
147    protected function getSession(): string
148    {
149        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) {
150            // when there is no session ID set, we assume this was deliberate and we simply abort all logging
151            throw new IgnoreException('No session ID found');
152        }
153
154        return $_SESSION[DOKU_COOKIE]['statistics']['id'];
155    }
156
157    // endregion
158    // region automatic logging
159
160    /**
161     * Log the user was seen
162     */
163    protected function logUser(): void
164    {
165        if (!$this->user) return;
166
167        $this->db->exec(
168            'INSERT INTO users (user, dt)
169                  VALUES (?, CURRENT_TIMESTAMP)
170            ON CONFLICT (user) DO UPDATE SET
171                         dt = CURRENT_TIMESTAMP
172                   WHERE excluded.user = users.user
173            ',
174            $this->user
175        );
176
177    }
178
179    /**
180     * Log the session and user agent information
181     */
182    protected function logSession(): void
183    {
184        $this->db->exec(
185            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
186                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
187             ON CONFLICT (session) DO UPDATE SET
188                         end = CURRENT_TIMESTAMP,
189                         user = excluded.user,
190                         uid = excluded.uid
191                   WHERE excluded.session = sessions.session
192             ',
193            $this->session,
194            $this->uid,
195            $this->user,
196            $this->uaAgent,
197            $this->uaName,
198            $this->uaType,
199            $this->uaVersion,
200            $this->uaPlatform
201        );
202    }
203
204    /**
205     * Log all groups for the user
206     *
207     * @todo maybe this should be done only once per session?
208     */
209    protected function logGroups(): void
210    {
211        global $USERINFO;
212
213        if (!$this->user) return;
214        if (!isset($USERINFO['grps'])) return;
215        if (!is_array($USERINFO['grps'])) return;
216        $groups = $USERINFO['grps'];
217
218        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
219
220        if (empty($groups)) {
221            return;
222        }
223
224        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
225        $params = [];
226        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
227        foreach ($groups as $group) {
228            $params[] = $this->user;
229            $params[] = $group;
230        }
231        $this->db->exec($sql, $params);
232    }
233
234    /**
235     * Log email domain
236     *
237     * @todo maybe this should be done only once per session?
238     */
239    protected function logDomain(): void
240    {
241        global $USERINFO;
242        if (!$this->user) return;
243        if (!isset($USERINFO['mail'])) return;
244        $mail = $USERINFO['mail'];
245
246        $pos = strrpos($mail, '@');
247        if (!$pos) return;
248        $domain = substr($mail, $pos + 1);
249        if (empty($domain)) return;
250
251        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
252        $this->db->exec($sql, [$domain, $this->user]);
253    }
254
255    // endregion
256    // region internal loggers called by the dispatchers
257
258    /**
259     * Log the given referer URL
260     *
261     * Note: we DO log empty referers. These are external accesses that did not provide a referer URL.
262     * We do not log referers that are our own pages though.
263     *
264     * engine set -> a search engine referer
265     * no engine set, url empty -> a direct access (bookmark, direct link, etc.)
266     * no engine set, url not empty -> a referer from another page (not a wiki page)
267     * null returned -> referer was a wiki page
268     *
269     * @param $referer
270     * @return int|null The referer ID or null if no referer was logged
271     * @todo we could check against a blacklist here
272     */
273    public function logReferer($referer): ?int
274    {
275        $referer = trim($referer);
276
277        // do not log our own pages as referers (empty referer is OK though)
278        if (!empty($referer)) {
279            $selfre = '^' . preg_quote(DOKU_URL, '/');
280            if (preg_match("/$selfre/", $referer)) {
281                return null;
282            }
283        }
284
285        // is it a search engine?
286        $se = new SearchEngines($referer);
287        $engine = $se->getEngine();
288
289        $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
290        $this->db->exec($sql, [$referer, $engine]);
291        return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer);
292    }
293
294    /**
295     * Resolve IP to country/city and store in database
296     *
297     * @return string The IP address as stored
298     */
299    public function logIp(): string
300    {
301        $ip = clientIP(true);
302
303        // anonymize the IP address for storage?
304        if ($this->hlp->getConf('anonips')) {
305            $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables
306            $host = '';
307        } else {
308            $hash = $ip;
309            $host = gethostbyaddr($ip);
310        }
311
312        if($this->hlp->getConf('nolocation')) {
313            // if we don't resolve location data, we just return the IP address
314            return $hash;
315        }
316
317        // check if IP already known and up-to-date
318        $result = $this->db->queryValue(
319            "SELECT ip
320             FROM   iplocation
321             WHERE  ip = ?
322               AND  lastupd > date('now', '-30 days')",
323            $hash
324        );
325        if ($result) return $hash; // already known and up-to-date
326
327
328        // resolve the IP address to location data
329        try {
330            $data = $this->hlp->resolveIP($ip);
331        } catch (IpResolverException $e) {
332            \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details);
333            $data = [];
334        }
335
336        $this->db->exec(
337            'INSERT OR REPLACE INTO iplocation (
338                    ip, country, code, city, host, lastupd
339                 ) VALUES (
340                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
341                 )',
342            $hash,
343            $data['country'] ?? '',
344            $data['countryCode'] ?? '',
345            $data['city'] ?? '',
346            $host
347        );
348
349        return $hash;
350    }
351
352    // endregion
353    // region log dispatchers
354
355    public function logPageView(): void
356    {
357        global $INPUT;
358
359        if (!$INPUT->str('p')) return;
360
361
362        $referer = $INPUT->filter('trim')->str('r');
363        $ip = $this->logIp(); // resolve the IP address
364
365        $data = [
366            'page' => $INPUT->filter('cleanID')->str('p'),
367            'ip' => $ip,
368            'ref_id' => $this->logReferer($referer),
369            'sx' => $INPUT->int('sx'),
370            'sy' => $INPUT->int('sy'),
371            'vx' => $INPUT->int('vx'),
372            'vy' => $INPUT->int('vy'),
373            'session' => $this->session,
374        ];
375
376        $this->db->exec('
377        INSERT INTO pageviews (
378            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
379        ) VALUES (
380            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
381        )
382        ',
383            $data
384        );
385    }
386
387    /**
388     * Log a click on an external link
389     *
390     * Called from log.php
391     */
392    public function logOutgoing(): void
393    {
394        global $INPUT;
395
396        if (!$INPUT->str('ol')) return;
397
398        $link = $INPUT->filter('trim')->str('ol');
399        $session = $this->session;
400        $page = $INPUT->filter('cleanID')->str('p');
401
402        $this->db->exec(
403            'INSERT INTO outlinks (
404                dt, session, page, link
405             ) VALUES (
406                CURRENT_TIMESTAMP, ?, ?, ?
407             )',
408            $session,
409            $page,
410            $link
411        );
412    }
413
414    /**
415     * Log access to a media file
416     *
417     * Called from action.php
418     *
419     * @param string $media The media ID
420     * @param string $mime The media's mime type
421     * @param bool $inline Is this displayed inline?
422     * @param int $size Size of the media file
423     */
424    public function logMedia(string $media, string $mime, bool $inline, int $size): void
425    {
426        [$mime1, $mime2] = explode('/', strtolower($mime));
427        $inline = $inline ? 1 : 0;
428
429
430        $data = [
431            'media' => cleanID($media),
432            'ip' => $this->logIp(), // resolve the IP address
433            'session' => $this->session,
434            'size' => $size,
435            'mime1' => $mime1,
436            'mime2' => $mime2,
437            'inline' => $inline,
438        ];
439
440        $this->db->exec('
441                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
442                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
443            ',
444            $data
445        );
446    }
447
448    /**
449     * Log page edits
450     *
451     * called from action.php
452     *
453     * @param string $page The page that was edited
454     * @param string $type The type of edit (create, edit, etc.)
455     */
456    public function logEdit(string $page, string $type): void
457    {
458        $data = [
459            'page' => cleanID($page),
460            'type' => $type,
461            'ip' => $this->logIp(), // resolve the IP address
462            'session' => $this->session
463        ];
464
465        $this->db->exec(
466            'INSERT INTO edits (
467                dt, page, type, ip, session
468             ) VALUES (
469                CURRENT_TIMESTAMP, :page, :type, :ip, :session
470             )',
471            $data
472        );
473    }
474
475    /**
476     * Log login/logoffs and user creations
477     *
478     * @param string $type The type of login event (login, logout, create, failed)
479     * @param string $user The username
480     */
481    public function logLogin(string $type, string $user = ''): void
482    {
483        global $INPUT;
484
485        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
486
487        $ip = clientIP(true);
488
489        $this->db->exec(
490            'INSERT INTO logins (
491                dt, ip, user, type
492             ) VALUES (
493                CURRENT_TIMESTAMP, ?, ?, ?
494             )',
495            $ip,
496            $user,
497            $type
498        );
499    }
500
501    /**
502     * Log search data to the search related tables
503     *
504     * @param string $query The search query
505     * @param string[] $words The query split into words
506     */
507    public function logSearch(string $query, array $words): void
508    {
509        if (!$query) return;
510
511        $sid = $this->db->exec(
512            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
513            $this->logIp(), // resolve the IP address
514            $this->session,
515            $query,
516        );
517
518        foreach ($words as $word) {
519            if (!$word) continue;
520            $this->db->exec(
521                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
522                $sid,
523                $word
524            );
525        }
526    }
527
528    /**
529     * Log the current page count and size as today's history entry
530     */
531    public function logHistoryPages(): void
532    {
533        global $conf;
534
535        // use the popularity plugin's search method to find the wanted data
536        /** @var helper_plugin_popularity $pop */
537        $pop = plugin_load('helper', 'popularity');
538        $list = $this->initEmptySearchList();
539        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
540        $page_count = $list['file_count'];
541        $page_size = $list['file_size'];
542
543        $this->db->exec(
544            'INSERT OR REPLACE INTO history (
545                info, value, dt
546             ) VALUES (
547                ?, ?, CURRENT_TIMESTAMP
548             )',
549            'page_count',
550            $page_count
551        );
552        $this->db->exec(
553            'INSERT OR REPLACE INTO history (
554                info, value, dt
555             ) VALUES (
556                ?, ?, CURRENT_TIMESTAMP
557             )',
558            'page_size',
559            $page_size
560        );
561    }
562
563    /**
564     * Log the current media count and size as today's history entry
565     */
566    public function logHistoryMedia(): void
567    {
568        global $conf;
569
570        // use the popularity plugin's search method to find the wanted data
571        /** @var helper_plugin_popularity $pop */
572        $pop = plugin_load('helper', 'popularity');
573        $list = $this->initEmptySearchList();
574        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
575        $media_count = $list['file_count'];
576        $media_size = $list['file_size'];
577
578        $this->db->exec(
579            'INSERT OR REPLACE INTO history (
580                info, value, dt
581             ) VALUES (
582                ?, ?, CURRENT_TIMESTAMP
583             )',
584            'media_count',
585            $media_count
586        );
587        $this->db->exec(
588            'INSERT OR REPLACE INTO history (
589                info, value, dt
590             ) VALUES (
591                ?, ?, CURRENT_TIMESTAMP
592             )',
593            'media_size',
594            $media_size
595        );
596    }
597
598    // endregion
599
600    /**
601     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
602     * @return array
603     */
604    protected function initEmptySearchList()
605    {
606        return array_fill_keys([
607            'file_count',
608            'file_size',
609            'file_max',
610            'file_min',
611            'dir_count',
612            'dir_nest',
613            'file_oldest'
614        ], 0);
615    }
616}
617