xref: /plugin/statistics/Logger.php (revision b188870f1ad67993240d4a303df1f5c23d8ee910)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\HTTP\DokuHTTPClient;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use dokuwiki\Utf8\Clean;
12use helper_plugin_popularity;
13use helper_plugin_statistics;
14
15
16class Logger
17{
18    /** @var helper_plugin_statistics The statistics helper plugin instance */
19    protected helper_plugin_statistics $hlp;
20
21    /** @var SQLiteDB The SQLite database instance */
22    protected SQLiteDB $db;
23
24    /** @var string The full user agent string */
25    protected string $uaAgent;
26
27    /** @var string The type of user agent (browser, robot, feedreader) */
28    protected string $uaType = 'browser';
29
30    /** @var string The browser/client name */
31    protected string $uaName;
32
33    /** @var string The browser/client version */
34    protected string $uaVersion;
35
36    /** @var string The operating system/platform */
37    protected string $uaPlatform;
38
39    /** @var string The unique user identifier */
40    protected string $uid;
41
42    /** @var DokuHTTPClient|null The HTTP client instance for testing */
43    protected ?DokuHTTPClient $httpClient = null;
44
45
46    /**
47     * Constructor
48     *
49     * Parses browser info and set internal vars
50     */
51    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
52    {
53        global $INPUT;
54
55        $this->hlp = $hlp;
56        $this->db = $this->hlp->getDB();
57        $this->httpClient = $httpClient;
58
59        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
60
61        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
62        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
63        $dd->discardBotInformation();
64        $dd->parse();
65
66        if ($dd->isFeedReader()) {
67            $this->uaType = 'feedreader';
68        } else if ($dd->isBot()) {
69            $this->uaType = 'robot';
70
71            // for now ignore bots
72            throw new \RuntimeException('Bot detected, not logging');
73        }
74
75        $this->uaAgent = $ua;
76        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
77        $this->uaVersion = $dd->getClient('version') ?: '0';
78        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
79        $this->uid = $this->getUID();
80
81
82        $this->logLastseen();
83    }
84
85    /**
86     * Should be called before logging
87     *
88     * This starts a transaction, so all logging is done in one go
89     */
90    public function begin(): void
91    {
92        $this->hlp->getDB()->getPdo()->beginTransaction();
93    }
94
95    /**
96     * Should be called after logging
97     *
98     * This commits the transaction started in begin()
99     */
100    public function end(): void
101    {
102        $this->hlp->getDB()->getPdo()->commit();
103    }
104
105    /**
106     * Get the unique user ID
107     *
108     * @return string The unique user identifier
109     */
110    protected function getUID(): string
111    {
112        global $INPUT;
113
114        $uid = $INPUT->str('uid');
115        if (!$uid) $uid = get_doku_pref('plgstats', false);
116        if (!$uid) $uid = session_id();
117        set_doku_pref('plgstats', $uid);
118        return $uid;
119    }
120
121    /**
122     * Return the user's session ID
123     *
124     * This is usually our own managed session, not a PHP session (only in fallback)
125     *
126     * @return string The session identifier
127     */
128    protected function getSession(): string
129    {
130        global $INPUT;
131
132        $ses = $INPUT->str('ses');
133        if (!$ses) $ses = get_doku_pref('plgstatsses', false);
134        if (!$ses) $ses = session_id();
135        set_doku_pref('plgstatsses', $ses);
136        return $ses;
137    }
138
139    /**
140     * Log that we've seen the user (authenticated only)
141     */
142    public function logLastseen(): void
143    {
144        global $INPUT;
145
146        if (empty($INPUT->server->str('REMOTE_USER'))) return;
147
148        $this->db->exec(
149            'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)',
150            $INPUT->server->str('REMOTE_USER'),
151        );
152    }
153
154    /**
155     * Log actions by groups
156     *
157     * @param string $type The type of access to log ('view','edit')
158     * @param array $groups The groups to log
159     */
160    public function logGroups(string $type, array $groups): void
161    {
162        if (!$groups) return;
163
164        $toLog = (array)$this->hlp->getConf('loggroups');
165        $groups = array_intersect($groups, $toLog);
166        if (!$groups) return;
167
168        $placeholders = join(',', array_fill(0, count($groups), '(?, ?)'));
169        $params = [];
170        $sql = "INSERT INTO groups (`type`, `group`) VALUES $placeholders";
171        foreach ($groups as $group) {
172            $params[] = $type;
173            $params[] = $group;
174        }
175        $sql = rtrim($sql, ',');
176        $this->db->exec($sql, $params);
177    }
178
179    /**
180     * Log external search queries
181     *
182     * Will not write anything if the referer isn't a search engine
183     *
184     * @param string $referer The HTTP referer URL
185     * @param string $type Reference to the type variable that will be modified
186     */
187    public function logExternalSearch(string $referer, string &$type): void
188    {
189        global $INPUT;
190
191        $searchEngine = new SearchEngines($referer);
192
193        if (!$searchEngine->isSearchEngine()) {
194            return; // not a search engine
195        }
196
197        $type = 'search';
198        $query = $searchEngine->getQuery();
199
200        // log it!
201        $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*'));
202        $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine());
203    }
204
205    /**
206     * Log search data to the search related tables
207     *
208     * @param string $page The page being searched from
209     * @param string $query The search query
210     * @param array $words Array of search words
211     * @param string $engine The search engine name
212     */
213    public function logSearch(string $page, string $query, array $words, string $engine): void
214    {
215        $sid = $this->db->exec(
216            'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)',
217            $page, $query, $engine
218        );
219        if (!$sid) return;
220
221        foreach ($words as $word) {
222            if (!$word) continue;
223            $this->db->exec(
224                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
225                $sid, $word
226            );
227        }
228    }
229
230    /**
231     * Log that the session was seen
232     *
233     * This is used to calculate the time people spend on the whole site
234     * during their session
235     *
236     * Viewcounts are used for bounce calculation
237     *
238     * @param int $addview set to 1 to count a view
239     */
240    public function logSession(int $addview = 0): void
241    {
242        // only log browser sessions
243        if ($this->uaType != 'browser') return;
244
245        $session = $this->getSession();
246        $this->db->exec(
247            'INSERT OR REPLACE INTO session (
248                session, dt, end, views, uid
249             ) VALUES (
250                ?,
251                CURRENT_TIMESTAMP,
252                CURRENT_TIMESTAMP,
253                COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?),
254                ?
255             )',
256            $session, $session, $addview, $addview, $this->uid
257        );
258    }
259
260    /**
261     * Resolve IP to country/city and store in database
262     *
263     * @param string $ip The IP address to resolve
264     */
265    public function logIp(string $ip): void
266    {
267        // check if IP already known and up-to-date
268        $result = $this->db->queryValue(
269            "SELECT ip
270             FROM   iplocation
271             WHERE  ip = ?
272               AND  lastupd > date('now', '-30 days')",
273            $ip
274        );
275        if ($result) return;
276
277        $http = $this->httpClient ?: new DokuHTTPClient();
278        $http->timeout = 10;
279        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
280
281        if (!$json) return; // FIXME log error
282        try {
283            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
284        } catch (\JsonException $e) {
285            return; // FIXME log error
286        }
287        if (!isset($data['status']) || $data['status'] !== 'success') {
288            return; // FIXME log error
289        }
290
291        $host = gethostbyaddr($ip);
292        $this->db->exec(
293            'INSERT OR REPLACE INTO iplocation (
294                    ip, country, code, city, host, lastupd
295                 ) VALUES (
296                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
297                 )',
298            $ip, $data['country'], $data['countryCode'], $data['city'], $host
299        );
300    }
301
302    /**
303     * Log a click on an external link
304     *
305     * Called from log.php
306     */
307    public function logOutgoing(): void
308    {
309        global $INPUT;
310
311        if (!$INPUT->str('ol')) return;
312
313        $link = $INPUT->str('ol');
314        $link_md5 = md5($link);
315        $session = $this->getSession();
316        $page = $INPUT->str('p');
317
318        $this->db->exec(
319            'INSERT INTO outlinks (
320                dt, session, page, link_md5, link
321             ) VALUES (
322                CURRENT_TIMESTAMP, ?, ?, ?, ?
323             )',
324            $session, $page, $link_md5, $link
325        );
326    }
327
328    /**
329     * Log a page access
330     *
331     * Called from log.php
332     */
333    public function logAccess(): void
334    {
335        global $INPUT, $USERINFO;
336
337        if (!$INPUT->str('p')) return;
338
339        # FIXME check referer against blacklist and drop logging for bad boys
340
341        // handle referer
342        $referer = trim($INPUT->str('r'));
343        if ($referer) {
344            $ref = $referer;
345            $ref_md5 = md5($referer);
346            if (str_starts_with($referer, DOKU_URL)) {
347                $ref_type = 'internal';
348            } else {
349                $ref_type = 'external';
350                $this->logExternalSearch($referer, $ref_type);
351            }
352        } else {
353            $ref = '';
354            $ref_md5 = '';
355            $ref_type = '';
356        }
357
358        $page = $INPUT->str('p');
359        $ip = clientIP(true);
360        $sx = $INPUT->int('sx');
361        $sy = $INPUT->int('sy');
362        $vx = $INPUT->int('vx');
363        $vy = $INPUT->int('vy');
364        $js = $INPUT->int('js');
365        $user = $INPUT->server->str('REMOTE_USER');
366        $session = $this->getSession();
367
368        $this->db->exec(
369            'INSERT INTO access (
370                dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type,
371                screen_x, screen_y, view_x, view_y, js, user, session, uid
372             ) VALUES (
373                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
374                ?, ?, ?, ?, ?, ?, ?, ?
375             )',
376            $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
377            $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid
378        );
379
380        if ($ref_md5) {
381            $this->db->exec(
382                'INSERT OR IGNORE INTO refseen (
383                    ref_md5, dt
384                 ) VALUES (
385                    ?, CURRENT_TIMESTAMP
386                 )',
387                $ref_md5
388            );
389        }
390
391        // log group access
392        if (isset($USERINFO['grps'])) {
393            $this->logGroups('view', $USERINFO['grps']);
394        }
395
396        // resolve the IP
397        $this->logIp(clientIP(true));
398    }
399
400    /**
401     * Log access to a media file
402     *
403     * Called from action.php
404     *
405     * @param string $media The media ID
406     * @param string $mime The media's mime type
407     * @param bool $inline Is this displayed inline?
408     * @param int $size Size of the media file
409     */
410    public function logMedia(string $media, string $mime, bool $inline, int $size): void
411    {
412        global $INPUT;
413
414        [$mime1, $mime2] = explode('/', strtolower($mime));
415        $inline = $inline ? 1 : 0;
416        $size = (int)$size;
417
418        $ip = clientIP(true);
419        $user = $INPUT->server->str('REMOTE_USER');
420        $session = $this->getSession();
421
422        $this->db->exec(
423            'INSERT INTO media (
424                dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid,
425                size, mime1, mime2, inline
426             ) VALUES (
427                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
428                ?, ?, ?, ?
429             )',
430            $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
431            $user, $session, $this->uid, $size, $mime1, $mime2, $inline
432        );
433    }
434
435    /**
436     * Log page edits
437     *
438     * @param string $page The page that was edited
439     * @param string $type The type of edit (create, edit, etc.)
440     */
441    public function logEdit(string $page, string $type): void
442    {
443        global $INPUT, $USERINFO;
444
445        $ip = clientIP(true);
446        $user = $INPUT->server->str('REMOTE_USER');
447        $session = $this->getSession();
448
449        $this->db->exec(
450            'INSERT INTO edits (
451                dt, page, type, ip, user, session, uid
452             ) VALUES (
453                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?
454             )',
455            $page, $type, $ip, $user, $session, $this->uid
456        );
457
458        // log group access
459        if (isset($USERINFO['grps'])) {
460            $this->logGroups('edit', $USERINFO['grps']);
461        }
462    }
463
464    /**
465     * Log login/logoffs and user creations
466     *
467     * @param string $type The type of login event (login, logout, create)
468     * @param string $user The username (optional, will use current user if empty)
469     */
470    public function logLogin(string $type, string $user = ''): void
471    {
472        global $INPUT;
473
474        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
475
476        $ip = clientIP(true);
477        $session = $this->getSession();
478
479        $this->db->exec(
480            'INSERT INTO logins (
481                dt, type, ip, user, session, uid
482             ) VALUES (
483                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
484             )',
485            $type, $ip, $user, $session, $this->uid
486        );
487    }
488
489    /**
490     * Log the current page count and size as today's history entry
491     */
492    public function logHistoryPages(): void
493    {
494        global $conf;
495
496        // use the popularity plugin's search method to find the wanted data
497        /** @var helper_plugin_popularity $pop */
498        $pop = plugin_load('helper', 'popularity');
499        $list = $this->initEmptySearchList();
500        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
501        $page_count = $list['file_count'];
502        $page_size = $list['file_size'];
503
504        $this->db->exec(
505            'INSERT OR REPLACE INTO history (
506                info, value, dt
507             ) VALUES (
508                ?, ?, CURRENT_TIMESTAMP
509             )',
510            'page_count', $page_count
511        );
512        $this->db->exec(
513            'INSERT OR REPLACE INTO history (
514                info, value, dt
515             ) VALUES (
516                ?, ?, CURRENT_TIMESTAMP
517             )',
518            'page_size', $page_size
519        );
520    }
521
522    /**
523     * Log the current media count and size as today's history entry
524     */
525    public function logHistoryMedia(): void
526    {
527        global $conf;
528
529        // use the popularity plugin's search method to find the wanted data
530        /** @var helper_plugin_popularity $pop */
531        $pop = plugin_load('helper', 'popularity');
532        $list = $this->initEmptySearchList();
533        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
534        $media_count = $list['file_count'];
535        $media_size = $list['file_size'];
536
537        $this->db->exec(
538            'INSERT OR REPLACE INTO history (
539                info, value, dt
540             ) VALUES (
541                ?, ?, CURRENT_TIMESTAMP
542             )',
543            'media_count', $media_count
544        );
545        $this->db->exec(
546            'INSERT OR REPLACE INTO history (
547                info, value, dt
548             ) VALUES (
549                ?, ?, CURRENT_TIMESTAMP
550             )',
551            'media_size', $media_size
552        );
553    }
554
555    /**
556     * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public
557     * @return array
558     */
559    protected function initEmptySearchList()
560    {
561        return array_fill_keys([
562            'file_count',
563            'file_size',
564            'file_max',
565            'file_min',
566            'dir_count',
567            'dir_nest',
568            'file_oldest'
569        ], 0);
570    }
571}
572