1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\ClientHints;
6use DeviceDetector\DeviceDetector;
7use DeviceDetector\Parser\AbstractParser;
8use DeviceDetector\Parser\Device\AbstractDeviceParser;
9use DeviceDetector\Parser\OperatingSystem;
10use dokuwiki\Input\Input;
11use dokuwiki\plugin\sqlite\SQLiteDB;
12use helper_plugin_popularity;
13use helper_plugin_statistics;
14
15class Logger
16{
17    /** @var helper_plugin_statistics The statistics helper plugin instance */
18    protected helper_plugin_statistics $hlp;
19
20    /** @var SQLiteDB The SQLite database instance */
21    protected SQLiteDB $db;
22
23    /** @var string The full user agent string */
24    protected string $uaAgent;
25
26    /** @var string The type of user agent (browser, robot, feedreader) */
27    protected string $uaType = 'browser';
28
29    /** @var string The browser/client name */
30    protected string $uaName;
31
32    /** @var string The browser/client version */
33    protected string $uaVersion;
34
35    /** @var string The operating system/platform */
36    protected string $uaPlatform;
37
38    /** @var string|null The user name, if available */
39    protected ?string $user = null;
40
41    /** @var string The unique user identifier */
42    protected string $uid;
43
44    /** @var string The session identifier */
45    protected string $session;
46
47    /** @var int|null The ID of the main access log entry if any */
48    protected ?int $hit = null;
49
50    // region lifecycle
51
52    /**
53     * Constructor
54     *
55     * Parses browser info and set internal vars
56     * @throws IgnoreException
57     */
58    public function __construct(helper_plugin_statistics $hlp)
59    {
60        /** @var Input $INPUT */
61        global $INPUT;
62
63        $this->hlp = $hlp;
64        $this->db = $this->hlp->getDB();
65
66        // FIXME if we already have a session, we should not re-parse the user agent
67
68        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
69        AbstractDeviceParser::setVersionTruncation(AbstractParser::VERSION_TRUNCATION_MAJOR);
70        $dd = new DeviceDetector($ua, ClientHints::factory($_SERVER));
71        $dd->discardBotInformation();
72        $dd->parse();
73
74        if ($dd->isFeedReader()) {
75            $this->uaType = 'feedreader';
76        } elseif ($dd->isBot()) {
77            $this->uaType = 'robot';
78            // for now ignore bots
79            throw new IgnoreException('Bot detected, not logging');
80        }
81
82        $this->uaAgent = $ua;
83        $this->uaName = $dd->getClient('name') ?: 'Unknown';
84        $this->uaVersion = $dd->getClient('version') ?: '0';
85        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
86        $this->uid = $this->getUID();
87        $this->session = $this->getSession();
88
89        if (!$this->hlp->getConf('nousers')) {
90            $this->user = $INPUT->server->str('REMOTE_USER', null, true);
91        }
92    }
93
94    /**
95     * Should be called before logging
96     *
97     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
98     */
99    public function begin(): void
100    {
101        $this->db->getPdo()->beginTransaction();
102
103        $this->logUser();
104        $this->logGroups();
105        $this->logDomain();
106        $this->logSession();
107        $this->logCampaign();
108    }
109
110    /**
111     * Should be called after logging
112     *
113     * This commits the transaction started in begin()
114     */
115    public function end(): void
116    {
117        $this->db->getPdo()->commit();
118    }
119
120    // endregion
121    // region data gathering
122
123    /**
124     * Get the unique user ID
125     *
126     * The user ID is stored in the user preferences and should stay there forever.
127     * @return string The unique user identifier
128     * @throws IgnoreException
129     */
130    protected function getUID(): string
131    {
132        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) {
133            // when there is no session UID set, we assume this was deliberate and we simply abort all logging
134            // @todo we may later make UID generation optional
135            throw new IgnoreException('No user ID found');
136        }
137
138        return $_SESSION[DOKU_COOKIE]['statistics']['uid'];
139    }
140
141    /**
142     * Return the user's session ID
143     *
144     * @return string The session identifier
145     * @throws IgnoreException
146     */
147    protected function getSession(): string
148    {
149        if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) {
150            // when there is no session ID set, we assume this was deliberate and we simply abort all logging
151            throw new IgnoreException('No session ID found');
152        }
153
154        return $_SESSION[DOKU_COOKIE]['statistics']['id'];
155    }
156
157    // endregion
158    // region automatic logging
159
160    /**
161     * Log the user was seen
162     */
163    protected function logUser(): void
164    {
165        if (!$this->user) return;
166
167        $this->db->exec(
168            'INSERT INTO users (user, dt)
169                  VALUES (?, CURRENT_TIMESTAMP)
170            ON CONFLICT (user) DO UPDATE SET
171                         dt = CURRENT_TIMESTAMP
172                   WHERE excluded.user = users.user
173            ',
174            $this->user
175        );
176    }
177
178    /**
179     * Log the session and user agent information
180     */
181    protected function logSession(): void
182    {
183        $this->db->exec(
184            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
185                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
186             ON CONFLICT (session) DO UPDATE SET
187                         end = CURRENT_TIMESTAMP,
188                         user = excluded.user,
189                         uid = excluded.uid
190                   WHERE excluded.session = sessions.session
191             ',
192            $this->session,
193            $this->uid,
194            $this->user,
195            $this->uaAgent,
196            $this->uaName,
197            $this->uaType,
198            $this->uaVersion,
199            $this->uaPlatform
200        );
201    }
202
203    /**
204     * Log UTM campaign data
205     *
206     * @return void
207     */
208    protected function logCampaign(): void
209    {
210        global $INPUT;
211
212        $campaign = $INPUT->filter('trim')->str('utm_campaign', null, true);
213        $source = $INPUT->filter('trim')->str('utm_source', null, true);
214        $medium = $INPUT->filter('trim')->str('utm_medium', null, true);
215
216        if (!$campaign && !$source && !$medium) return;
217
218        $this->db->exec(
219            'INSERT OR IGNORE INTO campaigns (session, campaign, source, medium)
220                  VALUES (?, ?, ?, ?)',
221            $this->session,
222            $campaign,
223            $source,
224            $medium
225        );
226    }
227
228    /**
229     * Log all groups for the user
230     *
231     * @todo maybe this should be done only once per session?
232     */
233    protected function logGroups(): void
234    {
235        global $USERINFO;
236
237        if (!$this->user) return;
238        if (!isset($USERINFO['grps'])) return;
239        if (!is_array($USERINFO['grps'])) return;
240        $groups = $USERINFO['grps'];
241
242        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
243
244        if ($groups === []) {
245            return;
246        }
247
248        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
249        $params = [];
250        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
251        foreach ($groups as $group) {
252            $params[] = $this->user;
253            $params[] = $group;
254        }
255        $this->db->exec($sql, $params);
256    }
257
258    /**
259     * Log email domain
260     *
261     * @todo maybe this should be done only once per session?
262     */
263    protected function logDomain(): void
264    {
265        global $USERINFO;
266        if (!$this->user) return;
267        if (!isset($USERINFO['mail'])) return;
268        $mail = $USERINFO['mail'];
269
270        $pos = strrpos($mail, '@');
271        if (!$pos) return;
272        $domain = substr($mail, $pos + 1);
273        if (empty($domain)) return;
274
275        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
276        $this->db->exec($sql, [$domain, $this->user]);
277    }
278
279    // endregion
280    // region internal loggers called by the dispatchers
281
282    /**
283     * Log the given referer URL
284     *
285     * Note: we DO log empty referers. These are external accesses that did not provide a referer URL.
286     * We do not log referers that are our own pages though.
287     *
288     * engine set -> a search engine referer
289     * no engine set, url empty -> a direct access (bookmark, direct link, etc.)
290     * no engine set, url not empty -> a referer from another page (not a wiki page)
291     * null returned -> referer was a wiki page
292     *
293     * @param $referer
294     * @return int|null The referer ID or null if no referer was logged
295     * @todo we could check against a blacklist here
296     */
297    public function logReferer($referer): ?int
298    {
299        $referer = trim($referer);
300
301        // do not log our own pages as referers (empty referer is OK though)
302        if (!empty($referer)) {
303            $selfre = '^' . preg_quote(DOKU_URL, '/');
304            if (preg_match("/$selfre/", $referer)) {
305                return null;
306            }
307        }
308
309        // is it a search engine?
310        $se = new SearchEngines($referer);
311        $engine = $se->getEngine();
312
313        $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
314        $this->db->exec($sql, [$referer, $engine]);
315        return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer);
316    }
317
318    /**
319     * Resolve IP to country/city and store in database
320     *
321     * @return string The IP address as stored
322     */
323    public function logIp(): string
324    {
325        $ip = clientIP(true);
326
327        // anonymize the IP address for storage?
328        if ($this->hlp->getConf('anonips')) {
329            $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables
330            $host = '';
331        } else {
332            $hash = $ip;
333            $host = gethostbyaddr($ip);
334        }
335
336        if ($this->hlp->getConf('nolocation')) {
337            // if we don't resolve location data, we just return the IP address
338            return $hash;
339        }
340
341        // check if IP already known and up-to-date
342        $result = $this->db->queryValue(
343            "SELECT ip
344             FROM   iplocation
345             WHERE  ip = ?
346               AND  dt > date('now', '-30 days')",
347            $hash
348        );
349        if ($result) return $hash; // already known and up-to-date
350
351
352        // resolve the IP address to location data
353        try {
354            $data = $this->hlp->resolveIP($ip);
355        } catch (IpResolverException $e) {
356            \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details);
357            $data = [];
358        }
359
360        $this->db->exec(
361            'INSERT OR REPLACE INTO iplocation (
362                    ip, country, code, city, host, dt
363                 ) VALUES (
364                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
365                 )',
366            $hash,
367            $data['country'] ?? '',
368            $data['countryCode'] ?? '',
369            $data['city'] ?? '',
370            $host
371        );
372
373        return $hash;
374    }
375
376    // endregion
377    // region log dispatchers
378
379    public function logPageView(): void
380    {
381        global $INPUT;
382
383        if (!$INPUT->str('p')) return;
384
385
386        $referer = $INPUT->filter('trim')->str('r');
387        $ip = $this->logIp(); // resolve the IP address
388
389        $data = [
390            'page' => $INPUT->filter('cleanID')->str('p'),
391            'ip' => $ip,
392            'ref_id' => $this->logReferer($referer),
393            'sx' => $INPUT->int('sx'),
394            'sy' => $INPUT->int('sy'),
395            'vx' => $INPUT->int('vx'),
396            'vy' => $INPUT->int('vy'),
397            'session' => $this->session,
398        ];
399
400        $this->db->exec(
401            '
402        INSERT INTO pageviews (
403            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
404        ) VALUES (
405            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
406        )
407        ',
408            $data
409        );
410    }
411
412    /**
413     * Log a click on an external link
414     *
415     * Called from dispatch.php
416     */
417    public function logOutgoing(): void
418    {
419        global $INPUT;
420
421        if (!$INPUT->str('ol')) return;
422
423        $link = $INPUT->filter('trim')->str('ol');
424        $session = $this->session;
425        $page = $INPUT->filter('cleanID')->str('p');
426
427        $this->db->exec(
428            'INSERT INTO outlinks (
429                dt, session, page, link
430             ) VALUES (
431                CURRENT_TIMESTAMP, ?, ?, ?
432             )',
433            $session,
434            $page,
435            $link
436        );
437    }
438
439    /**
440     * Log access to a media file
441     *
442     * Called from action.php
443     *
444     * @param string $media The media ID
445     * @param string $mime The media's mime type
446     * @param bool $inline Is this displayed inline?
447     * @param int $size Size of the media file
448     */
449    public function logMedia(string $media, string $mime, bool $inline, int $size): void
450    {
451        [$mime1, $mime2] = explode('/', strtolower($mime));
452        $inline = $inline ? 1 : 0;
453
454
455        $data = [
456            'media' => cleanID($media),
457            'ip' => $this->logIp(), // resolve the IP address
458            'session' => $this->session,
459            'size' => $size,
460            'mime1' => $mime1,
461            'mime2' => $mime2,
462            'inline' => $inline,
463        ];
464
465        $this->db->exec(
466            '
467                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
468                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
469            ',
470            $data
471        );
472    }
473
474    /**
475     * Log page edits
476     *
477     * called from action.php
478     *
479     * @param string $page The page that was edited
480     * @param string $type The type of edit (create, edit, etc.)
481     */
482    public function logEdit(string $page, string $type): void
483    {
484        $data = [
485            'page' => cleanID($page),
486            'type' => $type,
487            'ip' => $this->logIp(), // resolve the IP address
488            'session' => $this->session
489        ];
490
491        $this->db->exec(
492            'INSERT INTO edits (
493                dt, page, type, ip, session
494             ) VALUES (
495                CURRENT_TIMESTAMP, :page, :type, :ip, :session
496             )',
497            $data
498        );
499    }
500
501    /**
502     * Log login/logoffs and user creations
503     *
504     * @param string $type The type of login event (login, logout, create, failed)
505     * @param string $user The username
506     */
507    public function logLogin(string $type, string $user = ''): void
508    {
509        global $INPUT;
510
511        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
512
513        $ip = clientIP(true);
514
515        $this->db->exec(
516            'INSERT INTO logins (
517                dt, ip, user, type
518             ) VALUES (
519                CURRENT_TIMESTAMP, ?, ?, ?
520             )',
521            $ip,
522            $user,
523            $type
524        );
525    }
526
527    /**
528     * Log search data to the search related tables
529     *
530     * @param string $query The search query
531     * @param string[] $words The query split into words
532     */
533    public function logSearch(string $query, array $words): void
534    {
535        if (!$query) return;
536
537        $sid = $this->db->exec(
538            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
539            $this->logIp(), // resolve the IP address
540            $this->session,
541            $query,
542        );
543
544        foreach ($words as $word) {
545            if (!$word) continue;
546            $this->db->exec(
547                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
548                $sid,
549                $word
550            );
551        }
552    }
553
554    /**
555     * Log the current page count and size as today's history entry
556     */
557    public function logHistoryPages(): void
558    {
559        global $conf;
560
561        // use the popularity plugin's search method to find the wanted data
562        /** @var helper_plugin_popularity $pop */
563        $pop = plugin_load('helper', 'popularity');
564        $list = $this->initEmptySearchList();
565        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
566        $page_count = $list['file_count'];
567        $page_size = $list['file_size'];
568
569        $this->db->exec(
570            'INSERT OR REPLACE INTO history (
571                info, value, dt
572             ) VALUES (
573                ?, ?, CURRENT_TIMESTAMP
574             )',
575            'page_count',
576            $page_count
577        );
578        $this->db->exec(
579            'INSERT OR REPLACE INTO history (
580                info, value, dt
581             ) VALUES (
582                ?, ?, CURRENT_TIMESTAMP
583             )',
584            'page_size',
585            $page_size
586        );
587    }
588
589    /**
590     * Log the current media count and size as today's history entry
591     */
592    public function logHistoryMedia(): void
593    {
594        global $conf;
595
596        // use the popularity plugin's search method to find the wanted data
597        /** @var helper_plugin_popularity $pop */
598        $pop = plugin_load('helper', 'popularity');
599        $list = $this->initEmptySearchList();
600        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
601        $media_count = $list['file_count'];
602        $media_size = $list['file_size'];
603
604        $this->db->exec(
605            'INSERT OR REPLACE INTO history (
606                info, value, dt
607             ) VALUES (
608                ?, ?, CURRENT_TIMESTAMP
609             )',
610            'media_count',
611            $media_count
612        );
613        $this->db->exec(
614            'INSERT OR REPLACE INTO history (
615                info, value, dt
616             ) VALUES (
617                ?, ?, CURRENT_TIMESTAMP
618             )',
619            'media_size',
620            $media_size
621        );
622    }
623
624    // endregion
625
626    /**
627     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
628     * @return array
629     */
630    protected function initEmptySearchList()
631    {
632        return array_fill_keys([
633            'file_count',
634            'file_size',
635            'file_max',
636            'file_min',
637            'dir_count',
638            'dir_nest',
639            'file_oldest'
640        ], 0);
641    }
642}
643