xref: /plugin/statistics/Logger.php (revision a10aed88c50dc33e6ca4199888a06200a919160e)
1<?php
2
3namespace dokuwiki\plugin\statistics;
4
5use DeviceDetector\DeviceDetector;
6use DeviceDetector\Parser\Client\Browser;
7use DeviceDetector\Parser\Device\AbstractDeviceParser;
8use DeviceDetector\Parser\OperatingSystem;
9use dokuwiki\HTTP\DokuHTTPClient;
10use dokuwiki\plugin\sqlite\SQLiteDB;
11use dokuwiki\Utf8\Clean;
12use dokuwiki\Utf8\PhpString;
13use helper_plugin_popularity;
14use helper_plugin_statistics;
15
16
17class Logger
18{
19    /** @var helper_plugin_statistics The statistics helper plugin instance */
20    protected helper_plugin_statistics $hlp;
21
22    /** @var SQLiteDB The SQLite database instance */
23    protected SQLiteDB $db;
24
25    /** @var string The full user agent string */
26    protected string $uaAgent;
27
28    /** @var string The type of user agent (browser, robot, feedreader) */
29    protected string $uaType = 'browser';
30
31    /** @var string The browser/client name */
32    protected string $uaName;
33
34    /** @var string The browser/client version */
35    protected string $uaVersion;
36
37    /** @var string The operating system/platform */
38    protected string $uaPlatform;
39
40    /** @var string The unique user identifier */
41    protected string $uid;
42
43
44    /**
45     * Constructor
46     *
47     * Parses browser info and set internal vars
48     */
49    public function __construct(helper_plugin_statistics $hlp)
50    {
51        global $INPUT;
52
53        $this->hlp = $hlp;
54        $this->db = $this->hlp->getDB();
55
56        $ua = trim($INPUT->server->str('HTTP_USER_AGENT'));
57
58        AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR);
59        $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers
60        $dd->discardBotInformation();
61        $dd->parse();
62
63        if ($dd->isBot()) {
64            $this->uaType = 'robot';
65
66            // for now ignore bots
67            throw new \RuntimeException('Bot detected, not logging');
68        }
69
70        $this->uaAgent = $ua;
71        $this->uaName = Browser::getBrowserFamily($dd->getClient('name'));
72        $this->uaVersion = $dd->getClient('version');
73        $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name'));
74        $this->uid = $this->getUID();
75
76        if ($dd->isFeedReader()) {
77            $this->uaType = 'feedreader';
78        }
79
80        $this->logLastseen();
81    }
82
83    /**
84     * Should be called before logging
85     *
86     * This starts a transaction, so all logging is done in one go
87     */
88    public function begin(): void
89    {
90        $this->hlp->getDB()->getPdo()->beginTransaction();
91    }
92
93    /**
94     * Should be called after logging
95     *
96     * This commits the transaction started in begin()
97     */
98    public function end(): void
99    {
100        $this->hlp->getDB()->getPdo()->commit();
101    }
102
103    /**
104     * Get the unique user ID
105     *
106     * @return string The unique user identifier
107     */
108    protected function getUID(): string
109    {
110        global $INPUT;
111
112        $uid = $INPUT->str('uid');
113        if (!$uid) $uid = get_doku_pref('plgstats', false);
114        if (!$uid) $uid = session_id();
115        return $uid;
116    }
117
118    /**
119     * Return the user's session ID
120     *
121     * This is usually our own managed session, not a PHP session (only in fallback)
122     *
123     * @return string The session identifier
124     */
125    protected function getSession(): string
126    {
127        global $INPUT;
128
129        $ses = $INPUT->str('ses');
130        if (!$ses) $ses = get_doku_pref('plgstatsses', false);
131        if (!$ses) $ses = session_id();
132        return $ses;
133    }
134
135    /**
136     * Log that we've seen the user (authenticated only)
137     */
138    public function logLastseen(): void
139    {
140        global $INPUT;
141
142        if (empty($INPUT->server->str('REMOTE_USER'))) return;
143
144        $this->db->exec(
145            'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)',
146            $INPUT->server->str('REMOTE_USER'),
147        );
148    }
149
150    /**
151     * Log actions by groups
152     *
153     * @param string $type The type of access to log ('view','edit')
154     * @param array $groups The groups to log
155     */
156    public function logGroups(string $type, array $groups): void
157    {
158        if (!is_array($groups)) {
159            return;
160        }
161
162        $tolog = (array)$this->hlp->getConf('loggroups');
163        $groups = array_intersect($groups, $tolog);
164        if ($groups === []) {
165            return;
166        }
167
168
169        $params = [];
170        $sql = "INSERT INTO groups (`type`, `group`) VALUES ";
171        foreach ($groups as $group) {
172            $sql .= '(?, ?),';
173            $params[] = $type;
174            $params[] = $group;
175        }
176        $sql = rtrim($sql, ',');
177        $this->db->exec($sql, $params);
178    }
179
180    /**
181     * Log external search queries
182     *
183     * Will not write anything if the referer isn't a search engine
184     *
185     * @param string $referer The HTTP referer URL
186     * @param string $type Reference to the type variable that will be modified
187     */
188    public function logExternalSearch(string $referer, string &$type): void
189    {
190        global $INPUT;
191
192        $searchEngine = new SearchEngines($referer);
193
194        if (!$searchEngine->isSearchEngine()) {
195            return; // not a search engine
196        }
197
198        $type = 'search';
199        $query = $searchEngine->getQuery();
200
201        // log it!
202        $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*'));
203        $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine());
204    }
205
206    /**
207     * Log search data to the search related tables
208     *
209     * @param string $page The page being searched from
210     * @param string $query The search query
211     * @param array $words Array of search words
212     * @param string $engine The search engine name
213     */
214    public function logSearch(string $page, string $query, array $words, string $engine): void
215    {
216        $sid = $this->db->exec(
217            'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)',
218            $page, $query, $engine
219        );
220        if (!$sid) return;
221
222        foreach ($words as $word) {
223            if (!$word) continue;
224            $this->db->exec(
225                'INSERT INTO searchwords (sid, word) VALUES (?, ?)',
226                $sid, $word
227            );
228        }
229    }
230
231    /**
232     * Log that the session was seen
233     *
234     * This is used to calculate the time people spend on the whole site
235     * during their session
236     *
237     * Viewcounts are used for bounce calculation
238     *
239     * @param int $addview set to 1 to count a view
240     */
241    public function logSession(int $addview = 0): void
242    {
243        // only log browser sessions
244        if ($this->uaType != 'browser') return;
245
246        $session = $this->getSession();
247        $this->db->exec(
248            'INSERT OR REPLACE INTO session (
249                session, dt, end, views, uid
250             ) VALUES (
251                ?,
252                CURRENT_TIMESTAMP,
253                CURRENT_TIMESTAMP,
254                COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?),
255                ?
256             )',
257            $session, $session, $addview, $addview, $this->uid
258        );
259    }
260
261    /**
262     * Resolve IP to country/city and store in database
263     *
264     * @param string $ip The IP address to resolve
265     */
266    public function logIp(string $ip): void
267    {
268        // check if IP already known and up-to-date
269        $result = $this->db->queryValue(
270            "SELECT ip
271             FROM   iplocation
272             WHERE  ip = ?
273               AND  lastupd > date('now', '-30 days')",
274            $ip
275        );
276        if ($result) return;
277
278        $http = new DokuHTTPClient();
279        $http->timeout = 10;
280        $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only
281
282        if (!$json) return; // FIXME log error
283        try {
284            $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
285        } catch (\JsonException $e) {
286            return; // FIXME log error
287        }
288        if(!isset($data['status']) || $data['status'] !== 'success') {
289            return; // FIXME log error
290        }
291
292        $host = gethostbyaddr($ip);
293        $this->db->exec(
294            'INSERT OR REPLACE INTO iplocation (
295                    ip, country, code, city, host, lastupd
296                 ) VALUES (
297                    ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
298                 )',
299            $ip, $data['country'], $data['countryCode'], $data['city'], $host
300        );
301    }
302
303    /**
304     * Log a click on an external link
305     *
306     * Called from log.php
307     */
308    public function logOutgoing(): void
309    {
310        global $INPUT;
311
312        if (!$INPUT->str('ol')) return;
313
314        $link = $INPUT->str('ol');
315        $link_md5 = md5($link);
316        $session = $this->getSession();
317        $page = $INPUT->str('p');
318
319        $this->db->exec(
320            'INSERT INTO outlinks (
321                dt, session, page, link_md5, link
322             ) VALUES (
323                CURRENT_TIMESTAMP, ?, ?, ?, ?
324             )',
325            $session, $page, $link_md5, $link
326        );
327    }
328
329    /**
330     * Log a page access
331     *
332     * Called from log.php
333     */
334    public function logAccess(): void
335    {
336        global $INPUT, $USERINFO;
337
338        if (!$INPUT->str('p')) return;
339
340        # FIXME check referer against blacklist and drop logging for bad boys
341
342        // handle referer
343        $referer = trim($INPUT->str('r'));
344        if ($referer) {
345            $ref = $referer;
346            $ref_md5 = md5($referer);
347            if (str_starts_with($referer, DOKU_URL)) {
348                $ref_type = 'internal';
349            } else {
350                $ref_type = 'external';
351                $this->logExternalSearch($referer, $ref_type);
352            }
353        } else {
354            $ref = '';
355            $ref_md5 = '';
356            $ref_type = '';
357        }
358
359        $page = $INPUT->str('p');
360        $ip = clientIP(true);
361        $sx = $INPUT->int('sx');
362        $sy = $INPUT->int('sy');
363        $vx = $INPUT->int('vx');
364        $vy = $INPUT->int('vy');
365        $js = $INPUT->int('js');
366        $user = $INPUT->server->str('REMOTE_USER');
367        $session = $this->getSession();
368
369        $this->db->exec(
370            'INSERT INTO access (
371                dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type,
372                screen_x, screen_y, view_x, view_y, js, user, session, uid
373             ) VALUES (
374                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
375                ?, ?, ?, ?, ?, ?, ?, ?
376             )',
377            $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
378            $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid
379        );
380
381        if ($ref_md5) {
382            $this->db->exec(
383                'INSERT OR IGNORE INTO refseen (
384                    ref_md5, dt
385                 ) VALUES (
386                    ?, CURRENT_TIMESTAMP
387                 )',
388                $ref_md5
389            );
390        }
391
392        // log group access
393        if (isset($USERINFO['grps'])) {
394            $this->logGroups('view', $USERINFO['grps']);
395        }
396
397        // resolve the IP
398        $this->logIp(clientIP(true));
399    }
400
401    /**
402     * Log access to a media file
403     *
404     * Called from action.php
405     *
406     * @param string $media The media ID
407     * @param string $mime The media's mime type
408     * @param bool $inline Is this displayed inline?
409     * @param int $size Size of the media file
410     */
411    public function logMedia(string $media, string $mime, bool $inline, int $size): void
412    {
413        global $INPUT;
414
415        [$mime1, $mime2] = explode('/', strtolower($mime));
416        $inline = $inline ? 1 : 0;
417        $size = (int)$size;
418
419        $ip = clientIP(true);
420        $user = $INPUT->server->str('REMOTE_USER');
421        $session = $this->getSession();
422
423        $this->db->exec(
424            'INSERT INTO media (
425                dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid,
426                size, mime1, mime2, inline
427             ) VALUES (
428                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
429                ?, ?, ?, ?
430             )',
431            $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform,
432            $user, $session, $this->uid, $size, $mime1, $mime2, $inline
433        );
434    }
435
436    /**
437     * Log page edits
438     *
439     * @param string $page The page that was edited
440     * @param string $type The type of edit (create, edit, etc.)
441     */
442    public function logEdit(string $page, string $type): void
443    {
444        global $INPUT, $USERINFO;
445
446        $ip = clientIP(true);
447        $user = $INPUT->server->str('REMOTE_USER');
448        $session = $this->getSession();
449
450        $this->db->exec(
451            'INSERT INTO edits (
452                dt, page, type, ip, user, session, uid
453             ) VALUES (
454                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?
455             )',
456            $page, $type, $ip, $user, $session, $this->uid
457        );
458
459        // log group access
460        if (isset($USERINFO['grps'])) {
461            $this->logGroups('edit', $USERINFO['grps']);
462        }
463    }
464
465    /**
466     * Log login/logoffs and user creations
467     *
468     * @param string $type The type of login event (login, logout, create)
469     * @param string $user The username (optional, will use current user if empty)
470     */
471    public function logLogin(string $type, string $user = ''): void
472    {
473        global $INPUT;
474
475        if (!$user) $user = $INPUT->server->str('REMOTE_USER');
476
477        $ip = clientIP(true);
478        $session = $this->getSession();
479
480        $this->db->exec(
481            'INSERT INTO logins (
482                dt, type, ip, user, session, uid
483             ) VALUES (
484                CURRENT_TIMESTAMP, ?, ?, ?, ?, ?
485             )',
486            $type, $ip, $user, $session, $this->uid
487        );
488    }
489
490    /**
491     * Log the current page count and size as today's history entry
492     */
493    public function logHistoryPages(): void
494    {
495        global $conf;
496
497        // use the popularity plugin's search method to find the wanted data
498        /** @var helper_plugin_popularity $pop */
499        $pop = plugin_load('helper', 'popularity');
500        $list = [];
501        search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], '');
502        $page_count = $list['file_count'];
503        $page_size = $list['file_size'];
504
505        $this->db->exec(
506            'INSERT OR REPLACE INTO history (
507                info, value, dt
508             ) VALUES (
509                ?, ?, date("now")
510             )',
511            'page_count', $page_count
512        );
513        $this->db->exec(
514            'INSERT OR REPLACE INTO history (
515                info, value, dt
516             ) VALUES (
517                ?, ?, date("now")
518             )',
519            'page_size', $page_size
520        );
521    }
522
523    /**
524     * Log the current media count and size as today's history entry
525     */
526    public function logHistoryMedia(): void
527    {
528        global $conf;
529
530        // use the popularity plugin's search method to find the wanted data
531        /** @var helper_plugin_popularity $pop */
532        $pop = plugin_load('helper', 'popularity');
533        $list = [];
534        search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], '');
535        $media_count = $list['file_count'];
536        $media_size = $list['file_size'];
537
538        $this->db->exec(
539            'INSERT OR REPLACE INTO history (
540                info, value, dt
541             ) VALUES (
542                ?, ?, date("now")
543             )',
544            'media_count', $media_count
545        );
546        $this->db->exec(
547            'INSERT OR REPLACE INTO history (
548                info, value, dt
549             ) VALUES (
550                ?, ?, date("now")
551             )',
552            'media_size', $media_size
553        );
554    }
555}
556