xref: /plugin/statistics/Logger.php (revision 05786d8378a67b4ca56349b43f42b22e0f63c6a3)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\HTTP\DokuHTTPClient;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use dokuwiki\Utf8\Clean;
12use helper_plugin_popularity;
13use helper_plugin_statistics;
14
15
16class Logger
17{
18    /** @var helper_plugin_statistics The statistics helper plugin instance */
19    protected helper_plugin_statistics $hlp;
20
21    /** @var SQLiteDB The SQLite database instance */
22    protected SQLiteDB $db;
23
24    /** @var string The full user agent string */
25    protected string $uaAgent;
26
27    /** @var string The type of user agent (browser, robot, feedreader) */
28    protected string $uaType = 'browser';
29
30    /** @var string The browser/client name */
31    protected string $uaName;
32
33    /** @var string The browser/client version */
34    protected string $uaVersion;
35
36    /** @var string The operating system/platform */
37    protected string $uaPlatform;
38
39    /** @var string The unique user identifier */
40    protected string $uid;
41
42    /** @var DokuHTTPClient|null The HTTP client instance for testing */
43    protected ?DokuHTTPClient $httpClient = null;
44
45
46    /**
47     * Constructor
48     *
49     * Parses browser info and set internal vars
50     */
51    public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null)
52    {
53        global $INPUT;
54
55        $this->hlp = $hlp;
56        $this->db = $this->hlp->getDB();
57        $this->httpClient = $httpClient;
58
59        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
60
61        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
62        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
63        $dd->discardBotInformation();
64        $dd->parse();
65
66        if ($dd->isBot()) {
67            $this->uaType = 'robot';
68
69            // for now ignore bots
70            throw new \RuntimeException('Bot detected, not logging');
71        }
72
73        $this->uaAgent = $ua;
74        $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown';
75        $this->uaVersion = $dd->getClient('version') ?: '';
76        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown';
77        $this->uid = $this->getUID();
78
79        if ($dd->isFeedReader()) {
80            $this->uaType = 'feedreader';
81        }
82
83        $this->logLastseen();
84    }
85
86    /**
87     * Should be called before logging
88     *
89     * This starts a transaction, so all logging is done in one go
90     */
91    public function begin(): void
92    {
93        $this->hlp->getDB()->getPdo()->beginTransaction();
94    }
95
96    /**
97     * Should be called after logging
98     *
99     * This commits the transaction started in begin()
100     */
101    public function end(): void
102    {
103        $this->hlp->getDB()->getPdo()->commit();
104    }
105
106    /**
107     * Get the unique user ID
108     *
109     * @return string The unique user identifier
110     */
111    protected function getUID(): string
112    {
113        global $INPUT;
114
115        $uid = $INPUT->str('uid');
116        if (!$uid) $uid = get_doku_pref('plgstats', false);
117        if (!$uid) $uid = session_id();
118        return $uid;
119    }
120
121    /**
122     * Return the user's session ID
123     *
124     * This is usually our own managed session, not a PHP session (only in fallback)
125     *
126     * @return string The session identifier
127     */
128    protected function getSession(): string
129    {
130        global $INPUT;
131
132        $ses = $INPUT->str('ses');
133        if (!$ses) $ses = get_doku_pref('plgstatsses', false);
134        if (!$ses) $ses = session_id();
135        return $ses;
136    }
137
138    /**
139     * Log that we've seen the user (authenticated only)
140     */
141    public function logLastseen(): void
142    {
143        global $INPUT;
144
145        if (empty($INPUT->server->str('REMOTE_USER'))) return;
146
147        $this->db->exec(
148            'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)',
149            $INPUT->server->str('REMOTE_USER'),
150        );
151    }
152
153    /**
154     * Log actions by groups
155     *
156     * @param string $type The type of access to log ('view','edit')
157     * @param array $groups The groups to log
158     */
159    public function logGroups(string $type, array $groups): void
160    {
161        if (!$groups) return;
162
163        $toLog = (array)$this->hlp->getConf('loggroups');
164        $groups = array_intersect($groups, $toLog);
165        if (!$groups) return;
166
167        $placeholders = join(',', array_fill(0, count($groups), '(?, ?)'));
168        $params = [];
169        $sql = "INSERT INTO groups (`type`, `group`) VALUES $placeholders";
170        foreach ($groups as $group) {
171            $params[] = $type;
172            $params[] = $group;
173        }
174        $sql = rtrim($sql, ',');
175        $this->db->exec($sql, $params);
176    }
177
178    /**
179     * Log external search queries
180     *
181     * Will not write anything if the referer isn't a search engine
182     *
183     * @param string $referer The HTTP referer URL
184     * @param string $type Reference to the type variable that will be modified
185     */
186    public function logExternalSearch(string $referer, string &$type): void
187    {
188        global $INPUT;
189
190        $searchEngine = new SearchEngines($referer);
191
192        if (!$searchEngine->isSearchEngine()) {
193            return; // not a search engine
194        }
195
196        $type = 'search';
197        $query = $searchEngine->getQuery();
198
199        // log it!
200        $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*'));
201        $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine());
202    }
203
204    /**
205     * Log search data to the search related tables
206     *
207     * @param string $page The page being searched from
208     * @param string $query The search query
209     * @param array $words Array of search words
210     * @param string $engine The search engine name
211     */
212    public function logSearch(string $page, string $query, array $words, string $engine): void
213    {
214        $sid = $this->db->exec(
215            'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)',
216            $page, $query, $engine
217        );
218        if (!$sid) return;
219
220        foreach ($words as $word) {
221            if (!$word) continue;
222            $this->db->exec(
223                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
224                $sid, $word
225            );
226        }
227    }
228
229    /**
230     * Log that the session was seen
231     *
232     * This is used to calculate the time people spend on the whole site
233     * during their session
234     *
235     * Viewcounts are used for bounce calculation
236     *
237     * @param int $addview set to 1 to count a view
238     */
239    public function logSession(int $addview = 0): void
240    {
241        // only log browser sessions
242        if ($this->uaType != 'browser') return;
243
244        $session = $this->getSession();
245        $this->db->exec(
246            'INSERT OR REPLACE INTO session (
247                session, dt, end, views, uid
248             ) VALUES (
249                ?,
250                CURRENT_TIMESTAMP,
251                CURRENT_TIMESTAMP,
252                COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?),
253                ?
254             )',
255            $session, $session, $addview, $addview, $this->uid
256        );
257    }
258
259    /**
260     * Resolve IP to country/city and store in database
261     *
262     * @param string $ip The IP address to resolve
263     */
264    public function logIp(string $ip): void
265    {
266        // check if IP already known and up-to-date
267        $result = $this->db->queryValue(
268            "SELECT ip
269             FROM   iplocation
270             WHERE  ip = ?
271               AND  lastupd > date('now', '-30 days')",
272            $ip
273        );
274        if ($result) return;
275
276        $http = $this->httpClient ?: new DokuHTTPClient();
277        $http->timeout = 10;
278        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
279
280        if (!$json) return; // FIXME log error
281        try {
282            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
283        } catch (\JsonException $e) {
284            return; // FIXME log error
285        }
286        if (!isset($data['status']) || $data['status'] !== 'success') {
287            return; // FIXME log error
288        }
289
290        $host = gethostbyaddr($ip);
291        $this->db->exec(
292            'INSERT OR REPLACE INTO iplocation (
293                    ip, country, code, city, host, lastupd
294                 ) VALUES (
295                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
296                 )',
297            $ip, $data['country'], $data['countryCode'], $data['city'], $host
298        );
299    }
300
301    /**
302     * Log a click on an external link
303     *
304     * Called from log.php
305     */
306    public function logOutgoing(): void
307    {
308        global $INPUT;
309
310        if (!$INPUT->str('ol')) return;
311
312        $link = $INPUT->str('ol');
313        $link_md5 = md5($link);
314        $session = $this->getSession();
315        $page = $INPUT->str('p');
316
317        $this->db->exec(
318            'INSERT INTO outlinks (
319                dt, session, page, link_md5, link
320             ) VALUES (
321                CURRENT_TIMESTAMP, ?, ?, ?, ?
322             )',
323            $session, $page, $link_md5, $link
324        );
325    }
326
327    /**
328     * Log a page access
329     *
330     * Called from log.php
331     */
332    public function logAccess(): void
333    {
334        global $INPUT, $USERINFO;
335
336        if (!$INPUT->str('p')) return;
337
338        # FIXME check referer against blacklist and drop logging for bad boys
339
340        // handle referer
341        $referer = trim($INPUT->str('r'));
342        if ($referer) {
343            $ref = $referer;
344            $ref_md5 = md5($referer);
345            if (str_starts_with($referer, DOKU_URL)) {
346                $ref_type = 'internal';
347            } else {
348                $ref_type = 'external';
349                $this->logExternalSearch($referer, $ref_type);
350            }
351        } else {
352            $ref = '';
353            $ref_md5 = '';
354            $ref_type = '';
355        }
356
357        $page = $INPUT->str('p');
358        $ip = clientIP(true);
359        $sx = $INPUT->int('sx');
360        $sy = $INPUT->int('sy');
361        $vx = $INPUT->int('vx');
362        $vy = $INPUT->int('vy');
363        $js = $INPUT->int('js');
364        $user = $INPUT->server->str('REMOTE_USER');
365        $session = $this->getSession();
366
367        $this->db->exec(
368            'INSERT INTO access (
369                dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type,
370                screen_x, screen_y, view_x, view_y, js, user, session, uid
371             ) VALUES (
372                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
373                ?, ?, ?, ?, ?, ?, ?, ?
374             )',
375            $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
376            $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid
377        );
378
379        if ($ref_md5) {
380            $this->db->exec(
381                'INSERT OR IGNORE INTO refseen (
382                    ref_md5, dt
383                 ) VALUES (
384                    ?, CURRENT_TIMESTAMP
385                 )',
386                $ref_md5
387            );
388        }
389
390        // log group access
391        if (isset($USERINFO['grps'])) {
392            $this->logGroups('view', $USERINFO['grps']);
393        }
394
395        // resolve the IP
396        $this->logIp(clientIP(true));
397    }
398
399    /**
400     * Log access to a media file
401     *
402     * Called from action.php
403     *
404     * @param string $media The media ID
405     * @param string $mime The media's mime type
406     * @param bool $inline Is this displayed inline?
407     * @param int $size Size of the media file
408     */
409    public function logMedia(string $media, string $mime, bool $inline, int $size): void
410    {
411        global $INPUT;
412
413        [$mime1, $mime2] = explode('/', strtolower($mime));
414        $inline = $inline ? 1 : 0;
415        $size = (int)$size;
416
417        $ip = clientIP(true);
418        $user = $INPUT->server->str('REMOTE_USER');
419        $session = $this->getSession();
420
421        $this->db->exec(
422            'INSERT INTO media (
423                dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid,
424                size, mime1, mime2, inline
425             ) VALUES (
426                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
427                ?, ?, ?, ?
428             )',
429            $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
430            $user, $session, $this->uid, $size, $mime1, $mime2, $inline
431        );
432    }
433
434    /**
435     * Log page edits
436     *
437     * @param string $page The page that was edited
438     * @param string $type The type of edit (create, edit, etc.)
439     */
440    public function logEdit(string $page, string $type): void
441    {
442        global $INPUT, $USERINFO;
443
444        $ip = clientIP(true);
445        $user = $INPUT->server->str('REMOTE_USER');
446        $session = $this->getSession();
447
448        $this->db->exec(
449            'INSERT INTO edits (
450                dt, page, type, ip, user, session, uid
451             ) VALUES (
452                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?
453             )',
454            $page, $type, $ip, $user, $session, $this->uid
455        );
456
457        // log group access
458        if (isset($USERINFO['grps'])) {
459            $this->logGroups('edit', $USERINFO['grps']);
460        }
461    }
462
463    /**
464     * Log login/logoffs and user creations
465     *
466     * @param string $type The type of login event (login, logout, create)
467     * @param string $user The username (optional, will use current user if empty)
468     */
469    public function logLogin(string $type, string $user = ''): void
470    {
471        global $INPUT;
472
473        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
474
475        $ip = clientIP(true);
476        $session = $this->getSession();
477
478        $this->db->exec(
479            'INSERT INTO logins (
480                dt, type, ip, user, session, uid
481             ) VALUES (
482                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
483             )',
484            $type, $ip, $user, $session, $this->uid
485        );
486    }
487
488    /**
489     * Log the current page count and size as today's history entry
490     */
491    public function logHistoryPages(): void
492    {
493        global $conf;
494
495        // use the popularity plugin's search method to find the wanted data
496        /** @var helper_plugin_popularity $pop */
497        $pop = plugin_load('helper', 'popularity');
498        $list = [];
499        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
500        $page_count = $list['file_count'];
501        $page_size = $list['file_size'];
502
503        $this->db->exec(
504            'INSERT OR REPLACE INTO history (
505                info, value, dt
506             ) VALUES (
507                ?, ?, CURRENT_TIMESTAMP
508             )',
509            'page_count', $page_count
510        );
511        $this->db->exec(
512            'INSERT OR REPLACE INTO history (
513                info, value, dt
514             ) VALUES (
515                ?, ?, CURRENT_TIMESTAMP
516             )',
517            'page_size', $page_size
518        );
519    }
520
521    /**
522     * Log the current media count and size as today's history entry
523     */
524    public function logHistoryMedia(): void
525    {
526        global $conf;
527
528        // use the popularity plugin's search method to find the wanted data
529        /** @var helper_plugin_popularity $pop */
530        $pop = plugin_load('helper', 'popularity');
531        $list = [];
532        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
533        $media_count = $list['file_count'];
534        $media_size = $list['file_size'];
535
536        $this->db->exec(
537            'INSERT OR REPLACE INTO history (
538                info, value, dt
539             ) VALUES (
540                ?, ?, CURRENT_TIMESTAMP
541             )',
542            'media_count', $media_count
543        );
544        $this->db->exec(
545            'INSERT OR REPLACE INTO history (
546                info, value, dt
547             ) VALUES (
548                ?, ?, CURRENT_TIMESTAMP
549             )',
550            'media_size', $media_size
551        );
552    }
553}
554