xref: /plugin/statistics/Logger.php (revision 762f48070f8d46fc261c987fad5da39924f2b19d)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\HTTP\DokuHTTPClient;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use dokuwiki\Utf8\Clean;
12use dokuwiki\Utf8\PhpString;
13use helper_plugin_popularity;
14use helper_plugin_statistics;
15
16
17class Logger
18{
19    /** @var helper_plugin_statistics The statistics helper plugin instance */
20    protected helper_plugin_statistics $hlp;
21
22    /** @var SQLiteDB The SQLite database instance */
23    protected SQLiteDB $db;
24
25    /** @var string The full user agent string */
26    protected string $uaAgent;
27
28    /** @var string The type of user agent (browser, robot, feedreader) */
29    protected string $uaType = 'browser';
30
31    /** @var string The browser/client name */
32    protected string $uaName;
33
34    /** @var string The browser/client version */
35    protected string $uaVersion;
36
37    /** @var string The operating system/platform */
38    protected string $uaPlatform;
39
40    /** @var string The unique user identifier */
41    protected string $uid;
42
43
44    /**
45     * Constructor
46     *
47     * Parses browser info and set internal vars
48     */
49    public function __construct(helper_plugin_statistics $hlp)
50    {
51        global $INPUT;
52
53        $this->hlp = $hlp;
54        $this->db = $this->hlp->getDB();
55
56        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
57
58        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
59        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
60        $dd->discardBotInformation();
61        $dd->parse();
62
63        if ($dd->isBot()) {
64            $this->uaType = 'robot';
65
66            // for now ignore bots
67            throw new \RuntimeException('Bot detected, not logging');
68        }
69
70        $this->uaAgent = $ua;
71        $this->uaName = Browser::getBrowserFamily($dd->getClient('name'));
72        $this->uaVersion = $dd->getClient('version');
73        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name'));
74        $this->uid = $this->getUID();
75
76        if ($dd->isFeedReader()) {
77            $this->uaType = 'feedreader';
78        }
79
80        $this->logLastseen();
81    }
82
83    /**
84     * Should be called before logging
85     *
86     * This starts a transaction, so all logging is done in one go
87     */
88    public function begin(): void
89    {
90        $this->hlp->getDB()->getPdo()->beginTransaction();
91    }
92
93    /**
94     * Should be called after logging
95     *
96     * This commits the transaction started in begin()
97     */
98    public function end(): void
99    {
100        $this->hlp->getDB()->getPdo()->commit();
101    }
102
103    /**
104     * Get the unique user ID
105     *
106     * @return string The unique user identifier
107     */
108    protected function getUID(): string
109    {
110        global $INPUT;
111
112        $uid = $INPUT->str('uid');
113        if (!$uid) $uid = get_doku_pref('plgstats', false);
114        if (!$uid) $uid = session_id();
115        return $uid;
116    }
117
118    /**
119     * Return the user's session ID
120     *
121     * This is usually our own managed session, not a PHP session (only in fallback)
122     *
123     * @return string The session identifier
124     */
125    protected function getSession(): string
126    {
127        global $INPUT;
128
129        $ses = $INPUT->str('ses');
130        if (!$ses) $ses = get_doku_pref('plgstatsses', false);
131        if (!$ses) $ses = session_id();
132        return $ses;
133    }
134
135    /**
136     * Log that we've seen the user (authenticated only)
137     */
138    public function logLastseen(): void
139    {
140        global $INPUT;
141
142        if (empty($INPUT->server->str('REMOTE_USER'))) return;
143
144        $this->db->exec(
145            'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)',
146            $INPUT->server->str('REMOTE_USER'),
147        );
148    }
149
150    /**
151     * Log actions by groups
152     *
153     * @param string $type The type of access to log ('view','edit')
154     * @param array $groups The groups to log
155     */
156    public function logGroups(string $type, array $groups): void
157    {
158        if (!is_array($groups)) {
159            return;
160        }
161
162        $tolog = (array)$this->hlp->getConf('loggroups');
163        $groups = array_intersect($groups, $tolog);
164        if ($groups === []) {
165            return;
166        }
167
168
169        $params = [];
170        $sql = "INSERT INTO groups (`type`, `group`) VALUES ";
171        foreach ($groups as $group) {
172            $sql .= '(?, ?),';
173            $params[] = $type;
174            $params[] = $group;
175        }
176        $sql = rtrim($sql, ',');
177        $this->db->exec($sql, $params);
178    }
179
180    /**
181     * Log external search queries
182     *
183     * Will not write anything if the referer isn't a search engine
184     *
185     * @param string $referer The HTTP referer URL
186     * @param string $type Reference to the type variable that will be modified
187     */
188    public function logExternalSearch(string $referer, string &$type): void
189    {
190        global $INPUT;
191
192        $searchEngine = new SearchEngines($referer);
193
194        if (!$searchEngine->isSearchEngine()) {
195            return; // not a search engine
196        }
197
198        $type = 'search';
199        $query = $searchEngine->getQuery();
200
201        // log it!
202        $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*'));
203        $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine());
204    }
205
206    /**
207     * Log search data to the search related tables
208     *
209     * @param string $page The page being searched from
210     * @param string $query The search query
211     * @param array $words Array of search words
212     * @param string $engine The search engine name
213     */
214    public function logSearch(string $page, string $query, array $words, string $engine): void
215    {
216        $sid = $this->db->exec(
217            'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)',
218            $page, $query, $engine
219        );
220        if (!$sid) return;
221
222        foreach ($words as $word) {
223            if (!$word) continue;
224            $this->db->exec(
225                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
226                $sid, $word
227            );
228        }
229    }
230
231    /**
232     * Log that the session was seen
233     *
234     * This is used to calculate the time people spend on the whole site
235     * during their session
236     *
237     * Viewcounts are used for bounce calculation
238     *
239     * @param int $addview set to 1 to count a view
240     */
241    public function logSession(int $addview = 0): void
242    {
243        // only log browser sessions
244        if ($this->uaType != 'browser') return;
245
246        $session = $this->getSession();
247        $this->db->exec(
248            'INSERT OR REPLACE INTO session (
249                session, dt, end, views, uid
250             ) VALUES (
251                ?,
252                CURRENT_TIMESTAMP,
253                CURRENT_TIMESTAMP,
254                COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?),
255                ?
256             )',
257            $session, $session, $addview, $addview, $this->uid
258        );
259    }
260
261    /**
262     * Resolve IP to country/city and store in database
263     *
264     * @param string $ip The IP address to resolve
265     */
266    public function logIp(string $ip): void
267    {
268        // check if IP already known and up-to-date
269        $result = $this->db->queryValue(
270            "SELECT ip
271             FROM   iplocation
272             WHERE  ip = ?
273               AND  lastupd > date('now', '-30 days')",
274            $ip
275        );
276        if ($result) return;
277
278        $http = new DokuHTTPClient();
279        $http->timeout = 10;
280        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
281
282        if (!$json) return; // FIXME log error
283        try {
284            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
285        } catch (\JsonException $e) {
286            return; // FIXME log error
287        }
288
289        $host = gethostbyaddr($ip);
290        $this->db->exec(
291            'INSERT OR REPLACE INTO iplocation (
292                    ip, country, code, city, host, lastupd
293                 ) VALUES (
294                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
295                 )',
296            $ip, $data['country'], $data['countryCode'], $data['city'], $host
297        );
298    }
299
300    /**
301     * Log a click on an external link
302     *
303     * Called from log.php
304     */
305    public function logOutgoing(): void
306    {
307        global $INPUT;
308
309        if (!$INPUT->str('ol')) return;
310
311        $link = $INPUT->str('ol');
312        $link_md5 = md5($link);
313        $session = $this->getSession();
314        $page = $INPUT->str('p');
315
316        $this->db->exec(
317            'INSERT INTO outlinks (
318                dt, session, page, link_md5, link
319             ) VALUES (
320                CURRENT_TIMESTAMP, ?, ?, ?, ?
321             )',
322            $session, $page, $link_md5, $link
323        );
324    }
325
326    /**
327     * Log a page access
328     *
329     * Called from log.php
330     */
331    public function logAccess(): void
332    {
333        global $INPUT, $USERINFO;
334
335        if (!$INPUT->str('p')) return;
336
337        # FIXME check referer against blacklist and drop logging for bad boys
338
339        // handle referer
340        $referer = trim($INPUT->str('r'));
341        if ($referer) {
342            $ref = $referer;
343            $ref_md5 = md5($referer);
344            if (str_starts_with($referer, DOKU_URL)) {
345                $ref_type = 'internal';
346            } else {
347                $ref_type = 'external';
348                $this->logExternalSearch($referer, $ref_type);
349            }
350        } else {
351            $ref = '';
352            $ref_md5 = '';
353            $ref_type = '';
354        }
355
356        $page = $INPUT->str('p');
357        $ip = clientIP(true);
358        $sx = $INPUT->int('sx');
359        $sy = $INPUT->int('sy');
360        $vx = $INPUT->int('vx');
361        $vy = $INPUT->int('vy');
362        $js = $INPUT->int('js');
363        $user = $INPUT->server->str('REMOTE_USER');
364        $session = $this->getSession();
365
366        $this->db->exec(
367            'INSERT INTO access (
368                dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type,
369                screen_x, screen_y, view_x, view_y, js, user, session, uid
370             ) VALUES (
371                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
372                ?, ?, ?, ?, ?, ?, ?, ?
373             )',
374            $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
375            $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid
376        );
377
378        if ($ref_md5) {
379            $this->db->exec(
380                'INSERT OR IGNORE INTO refseen (
381                    ref_md5, dt
382                 ) VALUES (
383                    ?, CURRENT_TIMESTAMP
384                 )',
385                $ref_md5
386            );
387        }
388
389        // log group access
390        if (isset($USERINFO['grps'])) {
391            $this->logGroups('view', $USERINFO['grps']);
392        }
393
394        // resolve the IP
395        $this->logIp(clientIP(true));
396    }
397
398    /**
399     * Log access to a media file
400     *
401     * Called from action.php
402     *
403     * @param string $media The media ID
404     * @param string $mime The media's mime type
405     * @param bool $inline Is this displayed inline?
406     * @param int $size Size of the media file
407     */
408    public function logMedia(string $media, string $mime, bool $inline, int $size): void
409    {
410        global $INPUT;
411
412        [$mime1, $mime2] = explode('/', strtolower($mime));
413        $inline = $inline ? 1 : 0;
414        $size = (int)$size;
415
416        $ip = clientIP(true);
417        $user = $INPUT->server->str('REMOTE_USER');
418        $session = $this->getSession();
419
420        $this->db->exec(
421            'INSERT INTO media (
422                dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid,
423                size, mime1, mime2, inline
424             ) VALUES (
425                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
426                ?, ?, ?, ?
427             )',
428            $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
429            $user, $session, $this->uid, $size, $mime1, $mime2, $inline
430        );
431    }
432
433    /**
434     * Log page edits
435     *
436     * @param string $page The page that was edited
437     * @param string $type The type of edit (create, edit, etc.)
438     */
439    public function logEdit(string $page, string $type): void
440    {
441        global $INPUT, $USERINFO;
442
443        $ip = clientIP(true);
444        $user = $INPUT->server->str('REMOTE_USER');
445        $session = $this->getSession();
446
447        $this->db->exec(
448            'INSERT INTO edits (
449                dt, page, type, ip, user, session, uid
450             ) VALUES (
451                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?
452             )',
453            $page, $type, $ip, $user, $session, $this->uid
454        );
455
456        // log group access
457        if (isset($USERINFO['grps'])) {
458            $this->logGroups('edit', $USERINFO['grps']);
459        }
460    }
461
462    /**
463     * Log login/logoffs and user creations
464     *
465     * @param string $type The type of login event (login, logout, create)
466     * @param string $user The username (optional, will use current user if empty)
467     */
468    public function logLogin(string $type, string $user = ''): void
469    {
470        global $INPUT;
471
472        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
473
474        $ip = clientIP(true);
475        $session = $this->getSession();
476
477        $this->db->exec(
478            'INSERT INTO logins (
479                dt, type, ip, user, session, uid
480             ) VALUES (
481                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
482             )',
483            $type, $ip, $user, $session, $this->uid
484        );
485    }
486
487    /**
488     * Log the current page count and size as today's history entry
489     */
490    public function logHistoryPages(): void
491    {
492        global $conf;
493
494        // use the popularity plugin's search method to find the wanted data
495        /** @var helper_plugin_popularity $pop */
496        $pop = plugin_load('helper', 'popularity');
497        $list = [];
498        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
499        $page_count = $list['file_count'];
500        $page_size = $list['file_size'];
501
502        $this->db->exec(
503            'INSERT OR REPLACE INTO history (
504                info, value, dt
505             ) VALUES (
506                ?, ?, date("now")
507             )',
508            'page_count', $page_count
509        );
510        $this->db->exec(
511            'INSERT OR REPLACE INTO history (
512                info, value, dt
513             ) VALUES (
514                ?, ?, date("now")
515             )',
516            'page_size', $page_size
517        );
518    }
519
520    /**
521     * Log the current media count and size as today's history entry
522     */
523    public function logHistoryMedia(): void
524    {
525        global $conf;
526
527        // use the popularity plugin's search method to find the wanted data
528        /** @var helper_plugin_popularity $pop */
529        $pop = plugin_load('helper', 'popularity');
530        $list = [];
531        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
532        $media_count = $list['file_count'];
533        $media_size = $list['file_size'];
534
535        $this->db->exec(
536            'INSERT OR REPLACE INTO history (
537                info, value, dt
538             ) VALUES (
539                ?, ?, date("now")
540             )',
541            'media_count', $media_count
542        );
543        $this->db->exec(
544            'INSERT OR REPLACE INTO history (
545                info, value, dt
546             ) VALUES (
547                ?, ?, date("now")
548             )',
549            'media_size', $media_size
550        );
551    }
552}
553