xref: /plugin/statistics/Logger.php (revision 2a30f557f7f24294810e1d10019698ec841fa49b)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5/**
6 * Exception thrown when logging should be ignored
7 */
8class IgnoreException extends \RuntimeException
9{
10}
11
12use DeviceDetector\DeviceDetector;
13use DeviceDetector\Parser\Client\Browser;
14use DeviceDetector\Parser\Device\AbstractDeviceParser;
15use DeviceDetector\Parser\OperatingSystem;
16use dokuwiki\HTTP\DokuHTTPClient;
17use dokuwiki\Input\Input;
18use dokuwiki\plugin\sqlite\SQLiteDB;
19use helper_plugin_popularity;
20use helper_plugin_statistics;
21
22class Logger
23{
24    /** @var helper_plugin_statistics The statistics helper plugin instance */
25    protected helper_plugin_statistics $hlp;
26
27    /** @var SQLiteDB The SQLite database instance */
28    protected SQLiteDB $db;
29
30    /** @var string The full user agent string */
31    protected string $uaAgent;
32
33    /** @var string The type of user agent (browser, robot, feedreader) */
34    protected string $uaType = 'browser';
35
36    /** @var string The browser/client name */
37    protected string $uaName;
38
39    /** @var string The browser/client version */
40    protected string $uaVersion;
41
42    /** @var string The operating system/platform */
43    protected string $uaPlatform;
44
45    /** @var string|null The user name, if available */
46    protected ?string $user = null;
47
48    /** @var string The unique user identifier */
49    protected string $uid;
50
51    /** @var string The session identifier */
52    protected string $session;
53
54    /** @var int|null The ID of the main access log entry if any */
55    protected ?int $hit = null;
56
57    /** @var DokuHTTPClient|null The HTTP client instance for testing */
58    protected ?DokuHTTPClient $httpClient = null;
59
60    // region lifecycle
61
62    /**
63     * Constructor
64     *
65     * Parses browser info and set internal vars
66     */
67    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
68    {
69        /** @var Input $INPUT */
70        global $INPUT;
71
72        $this->hlp = $hlp;
73        $this->db = $this->hlp->getDB();
74        $this->httpClient = $httpClient;
75
76        // FIXME if we already have a session, we should not re-parse the user agent
77
78        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
79        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
80        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
81        $dd->discardBotInformation();
82        $dd->parse();
83
84        if ($dd->isFeedReader()) {
85            $this->uaType = 'feedreader';
86        } elseif ($dd->isBot()) {
87            $this->uaType = 'robot';
88            // for now ignore bots
89            throw new IgnoreException('Bot detected, not logging');
90        }
91
92        $this->uaAgent = $ua;
93        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
94        $this->uaVersion = $dd->getClient('version') ?: '0';
95        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
96        $this->uid = $this->getUID();
97        $this->session = $this->getSession();
98        $this->user = $INPUT->server->str('REMOTE_USER', null, true);
99    }
100
101    /**
102     * Should be called before logging
103     *
104     * This starts a transaction, so all logging is done in one go. It also logs the user and session data.
105     */
106    public function begin(): void
107    {
108        $this->hlp->getDB()->getPdo()->beginTransaction();
109
110        $this->logUser();
111        $this->logGroups();
112        $this->logDomain();
113        $this->logSession();
114    }
115
116    /**
117     * Should be called after logging
118     *
119     * This commits the transaction started in begin()
120     */
121    public function end(): void
122    {
123        $this->hlp->getDB()->getPdo()->commit();
124    }
125
126    // endregion
127    // region data gathering
128
129    /**
130     * Get the unique user ID
131     *
132     * The user ID is stored in the user preferences and should stay there forever.
133     * @return string The unique user identifier
134     */
135    protected function getUID(): string
136    {
137        if(!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) {
138            // when there is no session UID set, we assume this was deliberate and we simply abort all logging
139            // @todo we may later make UID generation optional
140            throw new IgnoreException('No user ID found');
141        }
142
143        return $_SESSION[DOKU_COOKIE]['statistics']['uid'];
144    }
145
146    /**
147     * Return the user's session ID
148     *
149     * @return string The session identifier
150     */
151    protected function getSession(): string
152    {
153        if(!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) {
154            // when there is no session ID set, we assume this was deliberate and we simply abort all logging
155            throw new IgnoreException('No session ID found');
156        }
157
158        return $_SESSION[DOKU_COOKIE]['statistics']['id'];
159    }
160
161    // endregion
162    // region automatic logging
163
164    /**
165     * Log the user was seen
166     */
167    protected function logUser(): void
168    {
169        if (!$this->user) return;
170
171        $this->db->exec(
172            'INSERT INTO users (user, dt)
173                  VALUES (?, CURRENT_TIMESTAMP)
174            ON CONFLICT (user) DO UPDATE SET
175                         dt = CURRENT_TIMESTAMP
176                   WHERE excluded.user = users.user
177            ',
178            $this->user
179        );
180
181    }
182
183    /**
184     * Log the session and user agent information
185     */
186    protected function logSession(): void
187    {
188        $this->db->exec(
189            'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os)
190                  VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?)
191             ON CONFLICT (session) DO UPDATE SET
192                         end = CURRENT_TIMESTAMP,
193                         user = excluded.user,
194                         uid = excluded.uid
195                   WHERE excluded.session = sessions.session
196             ',
197            $this->session,
198            $this->uid,
199            $this->user,
200            $this->uaAgent,
201            $this->uaName,
202            $this->uaType,
203            $this->uaVersion,
204            $this->uaPlatform
205        );
206    }
207
208    /**
209     * Log all groups for the user
210     *
211     * @todo maybe this should be done only once per session?
212     */
213    protected function logGroups(): void
214    {
215        global $USERINFO;
216
217        if (!$this->user) return;
218        if (!isset($USERINFO['grps'])) return;
219        if (!is_array($USERINFO['grps'])) return;
220        $groups = $USERINFO['grps'];
221
222        $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user);
223
224        if( empty($groups)) {
225            return;
226        }
227
228        $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)'));
229        $params = [];
230        $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders";
231        foreach ($groups as $group) {
232            $params[] = $this->user;
233            $params[] = $group;
234        }
235        $this->db->exec($sql, $params);
236    }
237
238    /**
239     * Log email domain
240     *
241     * @todo maybe this should be done only once per session?
242     */
243    protected function logDomain(): void
244    {
245        global $USERINFO;
246        if (!$this->user) return;
247        if (!isset($USERINFO['mail'])) return;
248        $mail = $USERINFO['mail'];
249
250        $pos = strrpos($mail, '@');
251        if (!$pos) return;
252        $domain = substr($mail, $pos + 1);
253        if (empty($domain)) return;
254
255        $sql = 'UPDATE users SET domain = ? WHERE user = ?';
256        $this->db->exec($sql, [$domain, $this->user]);
257    }
258
259    // endregion
260    // region internal loggers called by the dispatchers
261
262    /**
263     * Log the given referer URL
264     *
265     * Note: we DO log empty referers. These are external accesses that did not provide a referer URL.
266     * We do not log referers that are our own pages though.
267     *
268     * engine set -> a search engine referer
269     * no engine set, url empty -> a direct access (bookmark, direct link, etc.)
270     * no engine set, url not empty -> a referer from another page (not a wiki page)
271     * null returned -> referer was a wiki page
272     *
273     * @param $referer
274     * @return int|null The referer ID or null if no referer was logged
275     * @todo we could check against a blacklist here
276     */
277    public function logReferer($referer): ?int
278    {
279        $referer = trim($referer);
280
281        // do not log our own pages as referers
282        $selfre = '^' . preg_quote(DOKU_URL, '/') . '$';
283        if(preg_match("/$selfre/", $referer)) {
284            return null;
285        }
286
287        // is it a search engine?
288        $se = new SearchEngines($referer);
289        $engine = $se->getEngine();
290
291        $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)';
292        return $this->db->exec($sql, [$referer, $engine]); // returns ID even if the insert was ignored
293    }
294
295    /**
296     * Resolve IP to country/city and store in database
297     *
298     * @return string The IP address as stored
299     */
300    public function logIp(): string
301    {
302        $ip = clientIP(true);
303        $hash = $ip; // @todo we could anonymize here
304
305        // check if IP already known and up-to-date
306        $result = $this->db->queryValue(
307            "SELECT ip
308             FROM   iplocation
309             WHERE  ip = ?
310               AND  lastupd > date('now', '-30 days')",
311            $hash
312        );
313        if ($result) return $hash; // already known and up-to-date
314
315        $http = $this->httpClient ?: new DokuHTTPClient();
316        $http->timeout = 7;
317        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
318
319        if (!$json) {
320            \dokuwiki\Logger::error('Statistics Plugin - Failed talk to ip-api.com.');
321            return $hash;
322        }
323        try {
324            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
325        } catch (\JsonException $e) {
326            \dokuwiki\Logger::error('Statistics Plugin - Failed to decode JSON from ip-api.com.', $e);
327            return $hash;
328        }
329        if (!isset($data['status'])) {
330            \dokuwiki\Logger::error('Statistics Plugin - Invalid ip-api.com result' . $ip, $data);
331            return $hash;
332        }
333
334        // we do not check for 'success' status here. when the API can't resolve the IP we still log it
335        // without location data, so we won't re-query it in the next 30 days.
336
337        $host = gethostbyaddr($ip); // @todo if we anonymize the IP, we should not do this
338        $this->db->exec(
339            'INSERT OR REPLACE INTO iplocation (
340                    ip, country, code, city, host, lastupd
341                 ) VALUES (
342                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
343                 )',
344            $hash,
345            $data['country'] ?? '',
346            $data['countryCode'] ?? '',
347            $data['city'] ?? '',
348            $host
349        );
350
351        return $hash;
352    }
353
354    // endregion
355    // region log dispatchers
356
357    public function logPageView(): void
358    {
359        global $INPUT;
360
361        if (!$INPUT->str('p')) return;
362
363
364        $referer = $INPUT->filter('trim')->str('r');
365        $ip = $this->logIp(); // resolve the IP address
366
367        $data = [
368            'page' => $INPUT->filter('cleanID')->str('p'),
369            'ip' => $ip,
370            'ref_id' => $this->logReferer($referer),
371            'sx' => $INPUT->int('sx'),
372            'sy' => $INPUT->int('sy'),
373            'vx' => $INPUT->int('vx'),
374            'vy' => $INPUT->int('vy'),
375            'session' => $this->session,
376        ];
377
378        $this->db->exec('
379        INSERT INTO pageviews (
380            dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session
381        ) VALUES (
382            CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session
383        )
384        ',
385            $data
386        );
387    }
388
389    /**
390     * Log a click on an external link
391     *
392     * Called from log.php
393     */
394    public function logOutgoing(): void
395    {
396        global $INPUT;
397
398        if (!$INPUT->str('ol')) return;
399
400        $link = $INPUT->filter('trim')->str('ol');
401        $session = $this->session;
402        $page = $INPUT->filter('cleanID')->str('p');
403
404        $this->db->exec(
405            'INSERT INTO outlinks (
406                dt, session, page, link
407             ) VALUES (
408                CURRENT_TIMESTAMP, ?, ?, ?
409             )',
410            $session,
411            $page,
412            $link
413        );
414    }
415
416    /**
417     * Log access to a media file
418     *
419     * Called from action.php
420     *
421     * @param string $media The media ID
422     * @param string $mime The media's mime type
423     * @param bool $inline Is this displayed inline?
424     * @param int $size Size of the media file
425     */
426    public function logMedia(string $media, string $mime, bool $inline, int $size): void
427    {
428        [$mime1, $mime2] = explode('/', strtolower($mime));
429        $inline = $inline ? 1 : 0;
430
431
432        $data = [
433            'media' => cleanID($media),
434            'ip' => $this->logIp(), // resolve the IP address
435            'session' => $this->session,
436            'size' => $size,
437            'mime1' => $mime1,
438            'mime2' => $mime2,
439            'inline' => $inline,
440        ];
441
442        $this->db->exec('
443                INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline )
444                     VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline)
445            ',
446            $data
447        );
448    }
449
450    /**
451     * Log page edits
452     *
453     * called from action.php
454     *
455     * @param string $page The page that was edited
456     * @param string $type The type of edit (create, edit, etc.)
457     */
458    public function logEdit(string $page, string $type): void
459    {
460        $data = [
461            'page' => cleanID($page),
462            'type' => $type,
463            'ip' => $this->logIp(), // resolve the IP address
464            'session' => $this->session
465        ];
466
467        $this->db->exec(
468            'INSERT INTO edits (
469                dt, page, type, ip, session
470             ) VALUES (
471                CURRENT_TIMESTAMP, :page, :type, :ip, :session
472             )',
473            $data
474        );
475    }
476
477    /**
478     * Log login/logoffs and user creations
479     *
480     * @param string $type The type of login event (login, logout, create, failed)
481     * @param string $user The username
482     */
483    public function logLogin(string $type, string $user = ''): void
484    {
485        global $INPUT;
486
487        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
488
489        $ip = clientIP(true);
490
491        $this->db->exec(
492            'INSERT INTO logins (
493                dt, ip, user, type
494             ) VALUES (
495                CURRENT_TIMESTAMP, ?, ?, ?
496             )',
497            $ip,
498            $user,
499            $type
500        );
501    }
502
503    /**
504     * Log search data to the search related tables
505     *
506     * @param string $query The search query
507     * @param string[] $words The query split into words
508     */
509    public function logSearch(string $query, array $words): void
510    {
511        if (!$query) return;
512
513        $sid = $this->db->exec(
514            'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)',
515            $this->logIp(), // resolve the IP address
516            $this->session,
517            $query,
518        );
519
520        foreach ($words as $word) {
521            if (!$word) continue;
522            $this->db->exec(
523                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
524                $sid,
525                $word
526            );
527        }
528    }
529
530    /**
531     * Log the current page count and size as today's history entry
532     */
533    public function logHistoryPages(): void
534    {
535        global $conf;
536
537        // use the popularity plugin's search method to find the wanted data
538        /** @var helper_plugin_popularity $pop */
539        $pop = plugin_load('helper', 'popularity');
540        $list = $this->initEmptySearchList();
541        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
542        $page_count = $list['file_count'];
543        $page_size = $list['file_size'];
544
545        $this->db->exec(
546            'INSERT OR REPLACE INTO history (
547                info, value, dt
548             ) VALUES (
549                ?, ?, CURRENT_TIMESTAMP
550             )',
551            'page_count',
552            $page_count
553        );
554        $this->db->exec(
555            'INSERT OR REPLACE INTO history (
556                info, value, dt
557             ) VALUES (
558                ?, ?, CURRENT_TIMESTAMP
559             )',
560            'page_size',
561            $page_size
562        );
563    }
564
565    /**
566     * Log the current media count and size as today's history entry
567     */
568    public function logHistoryMedia(): void
569    {
570        global $conf;
571
572        // use the popularity plugin's search method to find the wanted data
573        /** @var helper_plugin_popularity $pop */
574        $pop = plugin_load('helper', 'popularity');
575        $list = $this->initEmptySearchList();
576        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
577        $media_count = $list['file_count'];
578        $media_size = $list['file_size'];
579
580        $this->db->exec(
581            'INSERT OR REPLACE INTO history (
582                info, value, dt
583             ) VALUES (
584                ?, ?, CURRENT_TIMESTAMP
585             )',
586            'media_count',
587            $media_count
588        );
589        $this->db->exec(
590            'INSERT OR REPLACE INTO history (
591                info, value, dt
592             ) VALUES (
593                ?, ?, CURRENT_TIMESTAMP
594             )',
595            'media_size',
596            $media_size
597        );
598    }
599
600    // endregion
601
602    /**
603     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
604     * @return array
605     */
606    protected function initEmptySearchList()
607    {
608        return array_fill_keys([
609            'file_count',
610            'file_size',
611            'file_max',
612            'file_min',
613            'dir_count',
614            'dir_nest',
615            'file_oldest'
616        ], 0);
617    }
618}
619