1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\HTTP\DokuHTTPClient; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use dokuwiki\Utf8\Clean; 12use helper_plugin_popularity; 13use helper_plugin_statistics; 14 15 16class Logger 17{ 18 /** @var helper_plugin_statistics The statistics helper plugin instance */ 19 protected helper_plugin_statistics $hlp; 20 21 /** @var SQLiteDB The SQLite database instance */ 22 protected SQLiteDB $db; 23 24 /** @var string The full user agent string */ 25 protected string $uaAgent; 26 27 /** @var string The type of user agent (browser, robot, feedreader) */ 28 protected string $uaType = 'browser'; 29 30 /** @var string The browser/client name */ 31 protected string $uaName; 32 33 /** @var string The browser/client version */ 34 protected string $uaVersion; 35 36 /** @var string The operating system/platform */ 37 protected string $uaPlatform; 38 39 /** @var string The unique user identifier */ 40 protected string $uid; 41 42 /** @var DokuHTTPClient|null The HTTP client instance for testing */ 43 protected ?DokuHTTPClient $httpClient = null; 44 45 46 /** 47 * Constructor 48 * 49 * Parses browser info and set internal vars 50 */ 51 public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null) 52 { 53 global $INPUT; 54 55 $this->hlp = $hlp; 56 $this->db = $this->hlp->getDB(); 57 $this->httpClient = $httpClient; 58 59 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 60 61 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 62 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 63 $dd->discardBotInformation(); 64 $dd->parse(); 65 66 if ($dd->isFeedReader()) { 67 $this->uaType = 'feedreader'; 68 } else if ($dd->isBot()) { 69 $this->uaType = 'robot'; 70 71 // for now ignore bots 72 throw new \RuntimeException('Bot detected, not logging'); 73 } 74 75 $this->uaAgent = $ua; 76 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 77 $this->uaVersion = $dd->getClient('version') ?: '0'; 78 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 79 $this->uid = $this->getUID(); 80 81 82 $this->logLastseen(); 83 } 84 85 /** 86 * Should be called before logging 87 * 88 * This starts a transaction, so all logging is done in one go 89 */ 90 public function begin(): void 91 { 92 $this->hlp->getDB()->getPdo()->beginTransaction(); 93 } 94 95 /** 96 * Should be called after logging 97 * 98 * This commits the transaction started in begin() 99 */ 100 public function end(): void 101 { 102 $this->hlp->getDB()->getPdo()->commit(); 103 } 104 105 /** 106 * Get the unique user ID 107 * 108 * @return string The unique user identifier 109 */ 110 protected function getUID(): string 111 { 112 global $INPUT; 113 114 $uid = $INPUT->str('uid'); 115 if (!$uid) $uid = get_doku_pref('plgstats', false); 116 if (!$uid) $uid = session_id(); 117 set_doku_pref('plgstats', $uid); 118 return $uid; 119 } 120 121 /** 122 * Return the user's session ID 123 * 124 * This is usually our own managed session, not a PHP session (only in fallback) 125 * 126 * @return string The session identifier 127 */ 128 protected function getSession(): string 129 { 130 global $INPUT; 131 132 $ses = $INPUT->str('ses'); 133 if (!$ses) $ses = get_doku_pref('plgstatsses', false); 134 if (!$ses) $ses = session_id(); 135 set_doku_pref('plgstatsses', $ses); 136 return $ses; 137 } 138 139 /** 140 * Log that we've seen the user (authenticated only) 141 */ 142 public function logLastseen(): void 143 { 144 global $INPUT; 145 146 if (empty($INPUT->server->str('REMOTE_USER'))) return; 147 148 $this->db->exec( 149 'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)', 150 $INPUT->server->str('REMOTE_USER'), 151 ); 152 } 153 154 /** 155 * Log actions by groups 156 * 157 * @param int $pid Id of access data row (foreign key) 158 * @param string $type The type of access to log ('view','edit') 159 * @param array $groups The groups to log 160 */ 161 public function logGroups(int $pid, string $type, array $groups): void 162 { 163 if (empty($groups) || !$pid) return; 164 165 $toLog = (array)$this->hlp->getConf('loggroups'); 166 167 // if specific groups are configured, limit logging to them only 168 $groups = !empty(array_filter($toLog)) ? array_intersect($groups, $toLog) : $groups; 169 if (!$groups) return; 170 171 $placeholders = join(',', array_fill(0, count($groups), '(?, ?, ?)')); 172 $params = []; 173 $sql = "INSERT INTO groups (`pid`, `type`, `group`) VALUES $placeholders"; 174 foreach ($groups as $group) { 175 $params[] = $pid; 176 $params[] = $type; 177 $params[] = $group; 178 } 179 $sql = rtrim($sql, ','); 180 $this->db->exec($sql, $params); 181 } 182 183 /** 184 * Log email domain, skip logging if no domain is found 185 * 186 * @param int $pid Id of access data row (foreign key) 187 * @param string $type The type of access to log ('view','edit') 188 * @param string $mail The email to extract the domain from 189 */ 190 public function logDomain(int $pid, string $type, string $mail): void 191 { 192 if (!$pid) return; 193 194 $pos = strrpos($mail, '@'); 195 if (!$pos) return; 196 $domain = substr($mail, $pos + 1); 197 if (empty($domain)) return; 198 199 $sql = "INSERT INTO domain (`pid`, `type`, `domain`) VALUES (?, ?, ?)"; 200 $this->db->exec($sql, [$pid, $type, $domain]); 201 } 202 203 /** 204 * Log external search queries 205 * 206 * Will not write anything if the referer isn't a search engine 207 * 208 * @param string $referer The HTTP referer URL 209 * @param string $type Reference to the type variable that will be modified 210 */ 211 public function logExternalSearch(string $referer, string &$type): void 212 { 213 global $INPUT; 214 215 $searchEngine = new SearchEngines($referer); 216 217 if (!$searchEngine->isSearchEngine()) { 218 return; // not a search engine 219 } 220 221 $type = 'search'; 222 $query = $searchEngine->getQuery(); 223 224 // log it! 225 $words = []; 226 if ($query) { 227 $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*')); 228 } 229 $this->logSearch($INPUT->str('p'), $searchEngine->getEngine(), $query, $words); 230 } 231 232 /** 233 * Log search data to the search related tables 234 * 235 * @param string $page The page being searched from 236 * @param string $engine The search engine name 237 * @param string|null $query The search query 238 * @param array|null $words Array of search words 239 */ 240 public function logSearch(string $page, string $engine, ?string $query, ?array $words): void 241 { 242 $sid = $this->db->exec( 243 'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)', 244 $page, $query ?? '', $engine 245 ); 246 if (!$sid) return; 247 248 foreach ($words as $word) { 249 if (!$word) continue; 250 $this->db->exec( 251 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 252 $sid, $word 253 ); 254 } 255 } 256 257 /** 258 * Log that the session was seen 259 * 260 * This is used to calculate the time people spend on the whole site 261 * during their session 262 * 263 * Viewcounts are used for bounce calculation 264 * 265 * @param int $addview set to 1 to count a view 266 */ 267 public function logSession(int $addview = 0): void 268 { 269 // only log browser sessions 270 if ($this->uaType != 'browser') return; 271 272 $session = $this->getSession(); 273 $this->db->exec( 274 'INSERT OR REPLACE INTO session ( 275 session, dt, end, views, uid 276 ) VALUES ( 277 ?, 278 CURRENT_TIMESTAMP, 279 CURRENT_TIMESTAMP, 280 COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?), 281 ? 282 )', 283 $session, $session, $addview, $addview, $this->uid 284 ); 285 } 286 287 /** 288 * Resolve IP to country/city and store in database 289 * 290 * @param string $ip The IP address to resolve 291 */ 292 public function logIp(string $ip): void 293 { 294 // check if IP already known and up-to-date 295 $result = $this->db->queryValue( 296 "SELECT ip 297 FROM iplocation 298 WHERE ip = ? 299 AND lastupd > date('now', '-30 days')", 300 $ip 301 ); 302 if ($result) return; 303 304 $http = $this->httpClient ?: new DokuHTTPClient(); 305 $http->timeout = 10; 306 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 307 308 if (!$json) return; // FIXME log error 309 try { 310 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 311 } catch (\JsonException $e) { 312 return; // FIXME log error 313 } 314 if (!isset($data['status']) || $data['status'] !== 'success') { 315 return; // FIXME log error 316 } 317 318 $host = gethostbyaddr($ip); 319 $this->db->exec( 320 'INSERT OR REPLACE INTO iplocation ( 321 ip, country, code, city, host, lastupd 322 ) VALUES ( 323 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 324 )', 325 $ip, $data['country'], $data['countryCode'], $data['city'], $host 326 ); 327 } 328 329 /** 330 * Log a click on an external link 331 * 332 * Called from log.php 333 */ 334 public function logOutgoing(): void 335 { 336 global $INPUT; 337 338 if (!$INPUT->str('ol')) return; 339 340 $link = $INPUT->str('ol'); 341 $link_md5 = md5($link); 342 $session = $this->getSession(); 343 $page = $INPUT->str('p'); 344 345 $this->db->exec( 346 'INSERT INTO outlinks ( 347 dt, session, page, link_md5, link 348 ) VALUES ( 349 CURRENT_TIMESTAMP, ?, ?, ?, ? 350 )', 351 $session, $page, $link_md5, $link 352 ); 353 } 354 355 /** 356 * Log a page access 357 * 358 * Called from log.php 359 */ 360 public function logAccess(): void 361 { 362 global $INPUT, $USERINFO; 363 364 if (!$INPUT->str('p')) return; 365 366 # FIXME check referer against blacklist and drop logging for bad boys 367 368 // handle referer 369 $referer = trim($INPUT->str('r')); 370 if ($referer) { 371 $ref = $referer; 372 $ref_md5 = md5($referer); 373 if (str_starts_with($referer, DOKU_URL)) { 374 $ref_type = 'internal'; 375 } else { 376 $ref_type = 'external'; 377 $this->logExternalSearch($referer, $ref_type); 378 } 379 } else { 380 $ref = ''; 381 $ref_md5 = ''; 382 $ref_type = ''; 383 } 384 385 $page = $INPUT->str('p'); 386 $ip = clientIP(true); 387 $sx = $INPUT->int('sx'); 388 $sy = $INPUT->int('sy'); 389 $vx = $INPUT->int('vx'); 390 $vy = $INPUT->int('vy'); 391 $js = $INPUT->int('js'); 392 $user = $INPUT->server->str('REMOTE_USER'); 393 $session = $this->getSession(); 394 395 $accessId = $this->db->exec( 396 'INSERT INTO access ( 397 dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type, 398 screen_x, screen_y, view_x, view_y, js, user, session, uid 399 ) VALUES ( 400 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 401 ?, ?, ?, ?, ?, ?, ?, ? 402 )', 403 $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 404 $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid 405 ); 406 407 if ($ref_md5) { 408 $this->db->exec( 409 'INSERT OR IGNORE INTO refseen ( 410 ref_md5, dt 411 ) VALUES ( 412 ?, CURRENT_TIMESTAMP 413 )', 414 $ref_md5 415 ); 416 } 417 418 // log group access 419 if (isset($USERINFO['grps'])) { 420 $this->logGroups($accessId, 'view', $USERINFO['grps']); 421 } 422 // log email domain 423 if (!empty($USERINFO['mail'])) { 424 $this->logDomain($accessId, 'view', $USERINFO['mail']); 425 } 426 427 // resolve the IP 428 $this->logIp(clientIP(true)); 429 } 430 431 /** 432 * Log access to a media file 433 * 434 * Called from action.php 435 * 436 * @param string $media The media ID 437 * @param string $mime The media's mime type 438 * @param bool $inline Is this displayed inline? 439 * @param int $size Size of the media file 440 */ 441 public function logMedia(string $media, string $mime, bool $inline, int $size): void 442 { 443 global $INPUT; 444 445 [$mime1, $mime2] = explode('/', strtolower($mime)); 446 $inline = $inline ? 1 : 0; 447 $size = (int)$size; 448 449 $ip = clientIP(true); 450 $user = $INPUT->server->str('REMOTE_USER'); 451 $session = $this->getSession(); 452 453 $this->db->exec( 454 'INSERT INTO media ( 455 dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid, 456 size, mime1, mime2, inline 457 ) VALUES ( 458 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 459 ?, ?, ?, ? 460 )', 461 $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 462 $user, $session, $this->uid, $size, $mime1, $mime2, $inline 463 ); 464 } 465 466 /** 467 * Log page edits 468 * 469 * @param string $page The page that was edited 470 * @param string $type The type of edit (create, edit, etc.) 471 */ 472 public function logEdit(string $page, string $type): void 473 { 474 global $INPUT, $USERINFO; 475 476 $ip = clientIP(true); 477 $user = $INPUT->server->str('REMOTE_USER'); 478 $session = $this->getSession(); 479 480 $editId = $this->db->exec( 481 'INSERT INTO edits ( 482 dt, page, type, ip, user, session, uid 483 ) VALUES ( 484 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ? 485 )', 486 $page, $type, $ip, $user, $session, $this->uid 487 ); 488 489 // log group access 490 if (isset($USERINFO['grps'])) { 491 $this->logGroups($editId, 'edit', $USERINFO['grps']); 492 } 493 494 // log email domain 495 if (!empty($USERINFO['mail'])) { 496 $this->logDomain($editId, 'edit', $USERINFO['mail']); 497 } 498 } 499 500 /** 501 * Log login/logoffs and user creations 502 * 503 * @param string $type The type of login event (login, logout, create) 504 * @param string $user The username (optional, will use current user if empty) 505 */ 506 public function logLogin(string $type, string $user = ''): void 507 { 508 global $INPUT; 509 510 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 511 512 $ip = clientIP(true); 513 $session = $this->getSession(); 514 515 $this->db->exec( 516 'INSERT INTO logins ( 517 dt, type, ip, user, session, uid 518 ) VALUES ( 519 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 520 )', 521 $type, $ip, $user, $session, $this->uid 522 ); 523 } 524 525 /** 526 * Log the current page count and size as today's history entry 527 */ 528 public function logHistoryPages(): void 529 { 530 global $conf; 531 532 // use the popularity plugin's search method to find the wanted data 533 /** @var helper_plugin_popularity $pop */ 534 $pop = plugin_load('helper', 'popularity'); 535 $list = $this->initEmptySearchList(); 536 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 537 $page_count = $list['file_count']; 538 $page_size = $list['file_size']; 539 540 $this->db->exec( 541 'INSERT OR REPLACE INTO history ( 542 info, value, dt 543 ) VALUES ( 544 ?, ?, CURRENT_TIMESTAMP 545 )', 546 'page_count', $page_count 547 ); 548 $this->db->exec( 549 'INSERT OR REPLACE INTO history ( 550 info, value, dt 551 ) VALUES ( 552 ?, ?, CURRENT_TIMESTAMP 553 )', 554 'page_size', $page_size 555 ); 556 } 557 558 /** 559 * Log the current media count and size as today's history entry 560 */ 561 public function logHistoryMedia(): void 562 { 563 global $conf; 564 565 // use the popularity plugin's search method to find the wanted data 566 /** @var helper_plugin_popularity $pop */ 567 $pop = plugin_load('helper', 'popularity'); 568 $list = $this->initEmptySearchList(); 569 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 570 $media_count = $list['file_count']; 571 $media_size = $list['file_size']; 572 573 $this->db->exec( 574 'INSERT OR REPLACE INTO history ( 575 info, value, dt 576 ) VALUES ( 577 ?, ?, CURRENT_TIMESTAMP 578 )', 579 'media_count', $media_count 580 ); 581 $this->db->exec( 582 'INSERT OR REPLACE INTO history ( 583 info, value, dt 584 ) VALUES ( 585 ?, ?, CURRENT_TIMESTAMP 586 )', 587 'media_size', $media_size 588 ); 589 } 590 591 /** 592 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 593 * @return array 594 */ 595 protected function initEmptySearchList() 596 { 597 return array_fill_keys([ 598 'file_count', 599 'file_size', 600 'file_max', 601 'file_min', 602 'dir_count', 603 'dir_nest', 604 'file_oldest' 605 ], 0); 606 } 607} 608