1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\HTTP\DokuHTTPClient; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use dokuwiki\Utf8\Clean; 12use helper_plugin_popularity; 13use helper_plugin_statistics; 14 15class Logger 16{ 17 /** @var helper_plugin_statistics The statistics helper plugin instance */ 18 protected helper_plugin_statistics $hlp; 19 20 /** @var SQLiteDB The SQLite database instance */ 21 protected SQLiteDB $db; 22 23 /** @var string The full user agent string */ 24 protected string $uaAgent; 25 26 /** @var string The type of user agent (browser, robot, feedreader) */ 27 protected string $uaType = 'browser'; 28 29 /** @var string The browser/client name */ 30 protected string $uaName; 31 32 /** @var string The browser/client version */ 33 protected string $uaVersion; 34 35 /** @var string The operating system/platform */ 36 protected string $uaPlatform; 37 38 /** @var string The unique user identifier */ 39 protected string $uid; 40 41 /** @var DokuHTTPClient|null The HTTP client instance for testing */ 42 protected ?DokuHTTPClient $httpClient = null; 43 44 45 /** 46 * Constructor 47 * 48 * Parses browser info and set internal vars 49 */ 50 public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null) 51 { 52 global $INPUT; 53 54 $this->hlp = $hlp; 55 $this->db = $this->hlp->getDB(); 56 $this->httpClient = $httpClient; 57 58 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 59 60 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 61 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 62 $dd->discardBotInformation(); 63 $dd->parse(); 64 65 if ($dd->isFeedReader()) { 66 $this->uaType = 'feedreader'; 67 } elseif ($dd->isBot()) { 68 $this->uaType = 'robot'; 69 // for now ignore bots 70 throw new IgnoreException('Bot detected, not logging'); 71 } 72 73 $this->uaAgent = $ua; 74 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 75 $this->uaVersion = $dd->getClient('version') ?: '0'; 76 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 77 $this->uid = $this->getUID(); 78 79 80 $this->logLastseen(); 81 } 82 83 /** 84 * Should be called before logging 85 * 86 * This starts a transaction, so all logging is done in one go 87 */ 88 public function begin(): void 89 { 90 $this->hlp->getDB()->getPdo()->beginTransaction(); 91 } 92 93 /** 94 * Should be called after logging 95 * 96 * This commits the transaction started in begin() 97 */ 98 public function end(): void 99 { 100 $this->hlp->getDB()->getPdo()->commit(); 101 } 102 103 /** 104 * Get the unique user ID 105 * 106 * @return string The unique user identifier 107 */ 108 protected function getUID(): string 109 { 110 global $INPUT; 111 112 $uid = $INPUT->str('uid'); 113 if (!$uid) $uid = get_doku_pref('plgstats', false); 114 if (!$uid) $uid = session_id(); 115 set_doku_pref('plgstats', $uid); 116 return $uid; 117 } 118 119 /** 120 * Return the user's session ID 121 * 122 * This is usually our own managed session, not a PHP session (only in fallback) 123 * 124 * @return string The session identifier 125 */ 126 protected function getSession(): string 127 { 128 global $INPUT; 129 130 $ses = $INPUT->str('ses'); 131 if (!$ses) $ses = get_doku_pref('plgstatsses', false); 132 if (!$ses) $ses = session_id(); 133 set_doku_pref('plgstatsses', $ses); 134 return $ses; 135 } 136 137 /** 138 * Log that we've seen the user (authenticated only) 139 */ 140 public function logLastseen(): void 141 { 142 global $INPUT; 143 144 if (empty($INPUT->server->str('REMOTE_USER'))) return; 145 146 $this->db->exec( 147 'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)', 148 $INPUT->server->str('REMOTE_USER'), 149 ); 150 } 151 152 /** 153 * Log actions by groups 154 * 155 * @param int $pid Id of access data row (foreign key) 156 * @param string $type The type of access to log ('view','edit') 157 * @param array $groups The groups to log 158 */ 159 public function logGroups(int $pid, string $type, array $groups): void 160 { 161 if ($groups === [] || !$pid) return; 162 163 $toLog = (array)$this->hlp->getConf('loggroups'); 164 165 // if specific groups are configured, limit logging to them only 166 $groups = empty(array_filter($toLog)) ? $groups : array_intersect($groups, $toLog); 167 if (!$groups) return; 168 169 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?, ?)')); 170 $params = []; 171 $sql = "INSERT INTO groups (`pid`, `type`, `group`) VALUES $placeholders"; 172 foreach ($groups as $group) { 173 $params[] = $pid; 174 $params[] = $type; 175 $params[] = $group; 176 } 177 $sql = rtrim($sql, ','); 178 $this->db->exec($sql, $params); 179 } 180 181 /** 182 * Log email domain, skip logging if no domain is found 183 * 184 * @param int $pid Id of access data row (foreign key) 185 * @param string $type The type of access to log ('view','edit') 186 * @param string $mail The email to extract the domain from 187 */ 188 public function logDomain(int $pid, string $type, string $mail): void 189 { 190 if (!$pid) return; 191 192 $pos = strrpos($mail, '@'); 193 if (!$pos) return; 194 $domain = substr($mail, $pos + 1); 195 if (empty($domain)) return; 196 197 $sql = "INSERT INTO domain (`pid`, `type`, `domain`) VALUES (?, ?, ?)"; 198 $this->db->exec($sql, [$pid, $type, $domain]); 199 } 200 201 /** 202 * Log external search queries 203 * 204 * Will not write anything if the referer isn't a search engine 205 * 206 * @param string $referer The HTTP referer URL 207 * @param string $type Reference to the type variable that will be modified 208 */ 209 public function logExternalSearch(string $referer, string &$type): void 210 { 211 global $INPUT; 212 213 $searchEngine = new SearchEngines($referer); 214 215 if (!$searchEngine->isSearchEngine()) { 216 return; // not a search engine 217 } 218 219 $type = 'search'; 220 $query = $searchEngine->getQuery(); 221 222 // log it! 223 $words = []; 224 if ($query) { 225 $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*')); 226 } 227 $this->logSearch($INPUT->str('p'), $searchEngine->getEngine(), $query, $words); 228 } 229 230 /** 231 * Log search data to the search related tables 232 * 233 * @param string $page The page being searched from 234 * @param string $engine The search engine name 235 * @param string|null $query The search query 236 * @param array|null $words Array of search words 237 */ 238 public function logSearch(string $page, string $engine, ?string $query, ?array $words): void 239 { 240 $sid = $this->db->exec( 241 'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)', 242 $page, 243 $query ?? '', 244 $engine 245 ); 246 if (!$sid) return; 247 248 foreach ($words as $word) { 249 if (!$word) continue; 250 $this->db->exec( 251 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 252 $sid, 253 $word 254 ); 255 } 256 } 257 258 /** 259 * Log that the session was seen 260 * 261 * This is used to calculate the time people spend on the whole site 262 * during their session 263 * 264 * Viewcounts are used for bounce calculation 265 * 266 * @param int $addview set to 1 to count a view 267 */ 268 public function logSession(int $addview = 0): void 269 { 270 // only log browser sessions 271 if ($this->uaType != 'browser') return; 272 273 $session = $this->getSession(); 274 $this->db->exec( 275 'INSERT OR REPLACE INTO session ( 276 session, dt, end, views, uid 277 ) VALUES ( 278 ?, 279 CURRENT_TIMESTAMP, 280 CURRENT_TIMESTAMP, 281 COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?), 282 ? 283 )', 284 $session, 285 $session, 286 $addview, 287 $addview, 288 $this->uid 289 ); 290 } 291 292 /** 293 * Resolve IP to country/city and store in database 294 * 295 * @param string $ip The IP address to resolve 296 */ 297 public function logIp(string $ip): void 298 { 299 // check if IP already known and up-to-date 300 $result = $this->db->queryValue( 301 "SELECT ip 302 FROM iplocation 303 WHERE ip = ? 304 AND lastupd > date('now', '-30 days')", 305 $ip 306 ); 307 if ($result) return; 308 309 $http = $this->httpClient ?: new DokuHTTPClient(); 310 $http->timeout = 10; 311 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 312 313 if (!$json) return; // FIXME log error 314 try { 315 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 316 } catch (\JsonException $e) { 317 return; // FIXME log error 318 } 319 if (!isset($data['status']) || $data['status'] !== 'success') { 320 return; // FIXME log error 321 } 322 323 $host = gethostbyaddr($ip); 324 $this->db->exec( 325 'INSERT OR REPLACE INTO iplocation ( 326 ip, country, code, city, host, lastupd 327 ) VALUES ( 328 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 329 )', 330 $ip, 331 $data['country'], 332 $data['countryCode'], 333 $data['city'], 334 $host 335 ); 336 } 337 338 /** 339 * Log a click on an external link 340 * 341 * Called from log.php 342 */ 343 public function logOutgoing(): void 344 { 345 global $INPUT; 346 347 if (!$INPUT->str('ol')) return; 348 349 $link = $INPUT->str('ol'); 350 $link_md5 = md5($link); 351 $session = $this->getSession(); 352 $page = $INPUT->str('p'); 353 354 $this->db->exec( 355 'INSERT INTO outlinks ( 356 dt, session, page, link_md5, link 357 ) VALUES ( 358 CURRENT_TIMESTAMP, ?, ?, ?, ? 359 )', 360 $session, 361 $page, 362 $link_md5, 363 $link 364 ); 365 } 366 367 /** 368 * Log a page access 369 * 370 * Called from log.php 371 */ 372 public function logAccess(): void 373 { 374 global $INPUT, $USERINFO; 375 376 if (!$INPUT->str('p')) return; 377 378 # FIXME check referer against blacklist and drop logging for bad boys 379 380 // handle referer 381 $referer = trim($INPUT->str('r')); 382 if ($referer) { 383 $ref = $referer; 384 $ref_md5 = md5($referer); 385 if (str_starts_with($referer, DOKU_URL)) { 386 $ref_type = 'internal'; 387 } else { 388 $ref_type = 'external'; 389 $this->logExternalSearch($referer, $ref_type); 390 } 391 } else { 392 $ref = ''; 393 $ref_md5 = ''; 394 $ref_type = ''; 395 } 396 397 $page = $INPUT->str('p'); 398 $ip = clientIP(true); 399 $sx = $INPUT->int('sx'); 400 $sy = $INPUT->int('sy'); 401 $vx = $INPUT->int('vx'); 402 $vy = $INPUT->int('vy'); 403 $js = $INPUT->int('js'); 404 $user = $INPUT->server->str('REMOTE_USER'); 405 $session = $this->getSession(); 406 407 $accessId = $this->db->exec( 408 'INSERT INTO access ( 409 dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type, 410 screen_x, screen_y, view_x, view_y, js, user, session, uid 411 ) VALUES ( 412 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 413 ?, ?, ?, ?, ?, ?, ?, ? 414 )', 415 $page, 416 $ip, 417 $this->uaAgent, 418 $this->uaName, 419 $this->uaType, 420 $this->uaVersion, 421 $this->uaPlatform, 422 $ref, 423 $ref_md5, 424 $ref_type, 425 $sx, 426 $sy, 427 $vx, 428 $vy, 429 $js, 430 $user, 431 $session, 432 $this->uid 433 ); 434 435 if ($ref_md5) { 436 $this->db->exec( 437 'INSERT OR IGNORE INTO refseen ( 438 ref_md5, dt 439 ) VALUES ( 440 ?, CURRENT_TIMESTAMP 441 )', 442 $ref_md5 443 ); 444 } 445 446 // log group access 447 if (isset($USERINFO['grps'])) { 448 $this->logGroups($accessId, 'view', $USERINFO['grps']); 449 } 450 // log email domain 451 if (!empty($USERINFO['mail'])) { 452 $this->logDomain($accessId, 'view', $USERINFO['mail']); 453 } 454 455 // resolve the IP 456 $this->logIp(clientIP(true)); 457 } 458 459 /** 460 * Log access to a media file 461 * 462 * Called from action.php 463 * 464 * @param string $media The media ID 465 * @param string $mime The media's mime type 466 * @param bool $inline Is this displayed inline? 467 * @param int $size Size of the media file 468 */ 469 public function logMedia(string $media, string $mime, bool $inline, int $size): void 470 { 471 global $INPUT; 472 473 [$mime1, $mime2] = explode('/', strtolower($mime)); 474 $inline = $inline ? 1 : 0; 475 476 $ip = clientIP(true); 477 $user = $INPUT->server->str('REMOTE_USER'); 478 $session = $this->getSession(); 479 480 $this->db->exec( 481 'INSERT INTO media ( 482 dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid, 483 size, mime1, mime2, inline 484 ) VALUES ( 485 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 486 ?, ?, ?, ? 487 )', 488 $media, 489 $ip, 490 $this->uaAgent, 491 $this->uaName, 492 $this->uaType, 493 $this->uaVersion, 494 $this->uaPlatform, 495 $user, 496 $session, 497 $this->uid, 498 $size, 499 $mime1, 500 $mime2, 501 $inline 502 ); 503 } 504 505 /** 506 * Log page edits 507 * 508 * @param string $page The page that was edited 509 * @param string $type The type of edit (create, edit, etc.) 510 */ 511 public function logEdit(string $page, string $type): void 512 { 513 global $INPUT, $USERINFO; 514 515 $ip = clientIP(true); 516 $user = $INPUT->server->str('REMOTE_USER'); 517 $session = $this->getSession(); 518 519 $editId = $this->db->exec( 520 'INSERT INTO edits ( 521 dt, page, type, ip, user, session, uid 522 ) VALUES ( 523 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ? 524 )', 525 $page, 526 $type, 527 $ip, 528 $user, 529 $session, 530 $this->uid 531 ); 532 533 // log group access 534 if (isset($USERINFO['grps'])) { 535 $this->logGroups($editId, 'edit', $USERINFO['grps']); 536 } 537 538 // log email domain 539 if (!empty($USERINFO['mail'])) { 540 $this->logDomain($editId, 'edit', $USERINFO['mail']); 541 } 542 } 543 544 /** 545 * Log login/logoffs and user creations 546 * 547 * @param string $type The type of login event (login, logout, create) 548 * @param string $user The username (optional, will use current user if empty) 549 */ 550 public function logLogin(string $type, string $user = ''): void 551 { 552 global $INPUT; 553 554 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 555 556 $ip = clientIP(true); 557 $session = $this->getSession(); 558 559 $this->db->exec( 560 'INSERT INTO logins ( 561 dt, type, ip, user, session, uid 562 ) VALUES ( 563 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 564 )', 565 $type, 566 $ip, 567 $user, 568 $session, 569 $this->uid 570 ); 571 } 572 573 /** 574 * Log the current page count and size as today's history entry 575 */ 576 public function logHistoryPages(): void 577 { 578 global $conf; 579 580 // use the popularity plugin's search method to find the wanted data 581 /** @var helper_plugin_popularity $pop */ 582 $pop = plugin_load('helper', 'popularity'); 583 $list = $this->initEmptySearchList(); 584 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 585 $page_count = $list['file_count']; 586 $page_size = $list['file_size']; 587 588 $this->db->exec( 589 'INSERT OR REPLACE INTO history ( 590 info, value, dt 591 ) VALUES ( 592 ?, ?, CURRENT_TIMESTAMP 593 )', 594 'page_count', 595 $page_count 596 ); 597 $this->db->exec( 598 'INSERT OR REPLACE INTO history ( 599 info, value, dt 600 ) VALUES ( 601 ?, ?, CURRENT_TIMESTAMP 602 )', 603 'page_size', 604 $page_size 605 ); 606 } 607 608 /** 609 * Log the current media count and size as today's history entry 610 */ 611 public function logHistoryMedia(): void 612 { 613 global $conf; 614 615 // use the popularity plugin's search method to find the wanted data 616 /** @var helper_plugin_popularity $pop */ 617 $pop = plugin_load('helper', 'popularity'); 618 $list = $this->initEmptySearchList(); 619 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 620 $media_count = $list['file_count']; 621 $media_size = $list['file_size']; 622 623 $this->db->exec( 624 'INSERT OR REPLACE INTO history ( 625 info, value, dt 626 ) VALUES ( 627 ?, ?, CURRENT_TIMESTAMP 628 )', 629 'media_count', 630 $media_count 631 ); 632 $this->db->exec( 633 'INSERT OR REPLACE INTO history ( 634 info, value, dt 635 ) VALUES ( 636 ?, ?, CURRENT_TIMESTAMP 637 )', 638 'media_size', 639 $media_size 640 ); 641 } 642 643 /** 644 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 645 * @return array 646 */ 647 protected function initEmptySearchList() 648 { 649 return array_fill_keys([ 650 'file_count', 651 'file_size', 652 'file_max', 653 'file_min', 654 'dir_count', 655 'dir_nest', 656 'file_oldest' 657 ], 0); 658 } 659} 660