1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\HTTP\DokuHTTPClient; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use dokuwiki\Utf8\Clean; 12use helper_plugin_popularity; 13use helper_plugin_statistics; 14 15 16class Logger 17{ 18 /** @var helper_plugin_statistics The statistics helper plugin instance */ 19 protected helper_plugin_statistics $hlp; 20 21 /** @var SQLiteDB The SQLite database instance */ 22 protected SQLiteDB $db; 23 24 /** @var string The full user agent string */ 25 protected string $uaAgent; 26 27 /** @var string The type of user agent (browser, robot, feedreader) */ 28 protected string $uaType = 'browser'; 29 30 /** @var string The browser/client name */ 31 protected string $uaName; 32 33 /** @var string The browser/client version */ 34 protected string $uaVersion; 35 36 /** @var string The operating system/platform */ 37 protected string $uaPlatform; 38 39 /** @var string The unique user identifier */ 40 protected string $uid; 41 42 /** @var DokuHTTPClient|null The HTTP client instance for testing */ 43 protected ?DokuHTTPClient $httpClient = null; 44 45 46 /** 47 * Constructor 48 * 49 * Parses browser info and set internal vars 50 */ 51 public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null) 52 { 53 global $INPUT; 54 55 $this->hlp = $hlp; 56 $this->db = $this->hlp->getDB(); 57 $this->httpClient = $httpClient; 58 59 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 60 61 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 62 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 63 $dd->discardBotInformation(); 64 $dd->parse(); 65 66 if ($dd->isFeedReader()) { 67 $this->uaType = 'feedreader'; 68 } else if ($dd->isBot()) { 69 $this->uaType = 'robot'; 70 71 // for now ignore bots 72 throw new \RuntimeException('Bot detected, not logging'); 73 } 74 75 $this->uaAgent = $ua; 76 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 77 $this->uaVersion = $dd->getClient('version') ?: '0'; 78 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 79 $this->uid = $this->getUID(); 80 81 82 $this->logLastseen(); 83 } 84 85 /** 86 * Should be called before logging 87 * 88 * This starts a transaction, so all logging is done in one go 89 */ 90 public function begin(): void 91 { 92 $this->hlp->getDB()->getPdo()->beginTransaction(); 93 } 94 95 /** 96 * Should be called after logging 97 * 98 * This commits the transaction started in begin() 99 */ 100 public function end(): void 101 { 102 $this->hlp->getDB()->getPdo()->commit(); 103 } 104 105 /** 106 * Get the unique user ID 107 * 108 * @return string The unique user identifier 109 */ 110 protected function getUID(): string 111 { 112 global $INPUT; 113 114 $uid = $INPUT->str('uid'); 115 if (!$uid) $uid = get_doku_pref('plgstats', false); 116 if (!$uid) $uid = session_id(); 117 set_doku_pref('plgstats', $uid); 118 return $uid; 119 } 120 121 /** 122 * Return the user's session ID 123 * 124 * This is usually our own managed session, not a PHP session (only in fallback) 125 * 126 * @return string The session identifier 127 */ 128 protected function getSession(): string 129 { 130 global $INPUT; 131 132 $ses = $INPUT->str('ses'); 133 if (!$ses) $ses = get_doku_pref('plgstatsses', false); 134 if (!$ses) $ses = session_id(); 135 set_doku_pref('plgstatsses', $ses); 136 return $ses; 137 } 138 139 /** 140 * Log that we've seen the user (authenticated only) 141 */ 142 public function logLastseen(): void 143 { 144 global $INPUT; 145 146 if (empty($INPUT->server->str('REMOTE_USER'))) return; 147 148 $this->db->exec( 149 'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)', 150 $INPUT->server->str('REMOTE_USER'), 151 ); 152 } 153 154 /** 155 * Log actions by groups 156 * 157 * @param int $pid Id of access data row (foreign key) 158 * @param string $type The type of access to log ('view','edit') 159 * @param array $groups The groups to log 160 */ 161 public function logGroups(int $pid, string $type, array $groups): void 162 { 163 if (empty($groups) || !$pid) return; 164 165 $toLog = (array)$this->hlp->getConf('loggroups'); 166 167 // if specific groups are configured, limit logging to them only 168 $groups = !empty(array_filter($toLog)) ? array_intersect($groups, $toLog) : $groups; 169 if (!$groups) return; 170 171 $placeholders = join(',', array_fill(0, count($groups), '(?, ?, ?)')); 172 $params = []; 173 $sql = "INSERT INTO groups (`pid`, `type`, `group`) VALUES $placeholders"; 174 foreach ($groups as $group) { 175 $params[] = $pid; 176 $params[] = $type; 177 $params[] = $group; 178 } 179 $sql = rtrim($sql, ','); 180 $this->db->exec($sql, $params); 181 } 182 183 /** 184 * Log email domain, skip logging if no domain is found 185 * 186 * @param int $pid Id of access data row (foreign key) 187 * @param string $type The type of access to log ('view','edit') 188 * @param string $mail The email to extract the domain from 189 */ 190 public function logDomain(int $pid, string $type, string $mail): void 191 { 192 if (!$pid) return; 193 194 $pos = strrpos($mail, '@'); 195 if (!$pos) return; 196 $domain = substr($mail, $pos + 1); 197 if (empty($domain)) return; 198 199 $sql = "INSERT INTO domain (`pid`, `type`, `domain`) VALUES (?, ?, ?)"; 200 $this->db->exec($sql, [$pid, $type, $domain]); 201 } 202 203 /** 204 * Log external search queries 205 * 206 * Will not write anything if the referer isn't a search engine 207 * 208 * @param string $referer The HTTP referer URL 209 * @param string $type Reference to the type variable that will be modified 210 */ 211 public function logExternalSearch(string $referer, string &$type): void 212 { 213 global $INPUT; 214 215 $searchEngine = new SearchEngines($referer); 216 217 if (!$searchEngine->isSearchEngine()) { 218 return; // not a search engine 219 } 220 221 $type = 'search'; 222 $query = $searchEngine->getQuery(); 223 224 // log it! 225 $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*')); 226 $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine()); 227 } 228 229 /** 230 * Log search data to the search related tables 231 * 232 * @param string $page The page being searched from 233 * @param string $query The search query 234 * @param array $words Array of search words 235 * @param string $engine The search engine name 236 */ 237 public function logSearch(string $page, string $query, array $words, string $engine): void 238 { 239 $sid = $this->db->exec( 240 'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)', 241 $page, $query, $engine 242 ); 243 if (!$sid) return; 244 245 foreach ($words as $word) { 246 if (!$word) continue; 247 $this->db->exec( 248 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 249 $sid, $word 250 ); 251 } 252 } 253 254 /** 255 * Log that the session was seen 256 * 257 * This is used to calculate the time people spend on the whole site 258 * during their session 259 * 260 * Viewcounts are used for bounce calculation 261 * 262 * @param int $addview set to 1 to count a view 263 */ 264 public function logSession(int $addview = 0): void 265 { 266 // only log browser sessions 267 if ($this->uaType != 'browser') return; 268 269 $session = $this->getSession(); 270 $this->db->exec( 271 'INSERT OR REPLACE INTO session ( 272 session, dt, end, views, uid 273 ) VALUES ( 274 ?, 275 CURRENT_TIMESTAMP, 276 CURRENT_TIMESTAMP, 277 COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?), 278 ? 279 )', 280 $session, $session, $addview, $addview, $this->uid 281 ); 282 } 283 284 /** 285 * Resolve IP to country/city and store in database 286 * 287 * @param string $ip The IP address to resolve 288 */ 289 public function logIp(string $ip): void 290 { 291 // check if IP already known and up-to-date 292 $result = $this->db->queryValue( 293 "SELECT ip 294 FROM iplocation 295 WHERE ip = ? 296 AND lastupd > date('now', '-30 days')", 297 $ip 298 ); 299 if ($result) return; 300 301 $http = $this->httpClient ?: new DokuHTTPClient(); 302 $http->timeout = 10; 303 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 304 305 if (!$json) return; // FIXME log error 306 try { 307 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 308 } catch (\JsonException $e) { 309 return; // FIXME log error 310 } 311 if (!isset($data['status']) || $data['status'] !== 'success') { 312 return; // FIXME log error 313 } 314 315 $host = gethostbyaddr($ip); 316 $this->db->exec( 317 'INSERT OR REPLACE INTO iplocation ( 318 ip, country, code, city, host, lastupd 319 ) VALUES ( 320 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 321 )', 322 $ip, $data['country'], $data['countryCode'], $data['city'], $host 323 ); 324 } 325 326 /** 327 * Log a click on an external link 328 * 329 * Called from log.php 330 */ 331 public function logOutgoing(): void 332 { 333 global $INPUT; 334 335 if (!$INPUT->str('ol')) return; 336 337 $link = $INPUT->str('ol'); 338 $link_md5 = md5($link); 339 $session = $this->getSession(); 340 $page = $INPUT->str('p'); 341 342 $this->db->exec( 343 'INSERT INTO outlinks ( 344 dt, session, page, link_md5, link 345 ) VALUES ( 346 CURRENT_TIMESTAMP, ?, ?, ?, ? 347 )', 348 $session, $page, $link_md5, $link 349 ); 350 } 351 352 /** 353 * Log a page access 354 * 355 * Called from log.php 356 */ 357 public function logAccess(): void 358 { 359 global $INPUT, $USERINFO; 360 361 if (!$INPUT->str('p')) return; 362 363 # FIXME check referer against blacklist and drop logging for bad boys 364 365 // handle referer 366 $referer = trim($INPUT->str('r')); 367 if ($referer) { 368 $ref = $referer; 369 $ref_md5 = md5($referer); 370 if (str_starts_with($referer, DOKU_URL)) { 371 $ref_type = 'internal'; 372 } else { 373 $ref_type = 'external'; 374 $this->logExternalSearch($referer, $ref_type); 375 } 376 } else { 377 $ref = ''; 378 $ref_md5 = ''; 379 $ref_type = ''; 380 } 381 382 $page = $INPUT->str('p'); 383 $ip = clientIP(true); 384 $sx = $INPUT->int('sx'); 385 $sy = $INPUT->int('sy'); 386 $vx = $INPUT->int('vx'); 387 $vy = $INPUT->int('vy'); 388 $js = $INPUT->int('js'); 389 $user = $INPUT->server->str('REMOTE_USER'); 390 $session = $this->getSession(); 391 392 $accessId = $this->db->exec( 393 'INSERT INTO access ( 394 dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type, 395 screen_x, screen_y, view_x, view_y, js, user, session, uid 396 ) VALUES ( 397 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 398 ?, ?, ?, ?, ?, ?, ?, ? 399 )', 400 $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 401 $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid 402 ); 403 404 if ($ref_md5) { 405 $this->db->exec( 406 'INSERT OR IGNORE INTO refseen ( 407 ref_md5, dt 408 ) VALUES ( 409 ?, CURRENT_TIMESTAMP 410 )', 411 $ref_md5 412 ); 413 } 414 415 // log group access 416 if (isset($USERINFO['grps'])) { 417 $this->logGroups($accessId, 'view', $USERINFO['grps']); 418 } 419 // log email domain 420 if (!empty($USERINFO['mail'])) { 421 $this->logDomain($accessId, 'view', $USERINFO['mail']); 422 } 423 424 // resolve the IP 425 $this->logIp(clientIP(true)); 426 } 427 428 /** 429 * Log access to a media file 430 * 431 * Called from action.php 432 * 433 * @param string $media The media ID 434 * @param string $mime The media's mime type 435 * @param bool $inline Is this displayed inline? 436 * @param int $size Size of the media file 437 */ 438 public function logMedia(string $media, string $mime, bool $inline, int $size): void 439 { 440 global $INPUT; 441 442 [$mime1, $mime2] = explode('/', strtolower($mime)); 443 $inline = $inline ? 1 : 0; 444 $size = (int)$size; 445 446 $ip = clientIP(true); 447 $user = $INPUT->server->str('REMOTE_USER'); 448 $session = $this->getSession(); 449 450 $this->db->exec( 451 'INSERT INTO media ( 452 dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid, 453 size, mime1, mime2, inline 454 ) VALUES ( 455 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 456 ?, ?, ?, ? 457 )', 458 $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 459 $user, $session, $this->uid, $size, $mime1, $mime2, $inline 460 ); 461 } 462 463 /** 464 * Log page edits 465 * 466 * @param string $page The page that was edited 467 * @param string $type The type of edit (create, edit, etc.) 468 */ 469 public function logEdit(string $page, string $type): void 470 { 471 global $INPUT, $USERINFO; 472 473 $ip = clientIP(true); 474 $user = $INPUT->server->str('REMOTE_USER'); 475 $session = $this->getSession(); 476 477 $editId = $this->db->exec( 478 'INSERT INTO edits ( 479 dt, page, type, ip, user, session, uid 480 ) VALUES ( 481 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ? 482 )', 483 $page, $type, $ip, $user, $session, $this->uid 484 ); 485 486 // log group access 487 if (isset($USERINFO['grps'])) { 488 $this->logGroups($editId, 'edit', $USERINFO['grps']); 489 } 490 491 // log email domain 492 if (!empty($USERINFO['mail'])) { 493 $this->logDomain($editId, 'edit', $USERINFO['mail']); 494 } 495 } 496 497 /** 498 * Log login/logoffs and user creations 499 * 500 * @param string $type The type of login event (login, logout, create) 501 * @param string $user The username (optional, will use current user if empty) 502 */ 503 public function logLogin(string $type, string $user = ''): void 504 { 505 global $INPUT; 506 507 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 508 509 $ip = clientIP(true); 510 $session = $this->getSession(); 511 512 $this->db->exec( 513 'INSERT INTO logins ( 514 dt, type, ip, user, session, uid 515 ) VALUES ( 516 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 517 )', 518 $type, $ip, $user, $session, $this->uid 519 ); 520 } 521 522 /** 523 * Log the current page count and size as today's history entry 524 */ 525 public function logHistoryPages(): void 526 { 527 global $conf; 528 529 // use the popularity plugin's search method to find the wanted data 530 /** @var helper_plugin_popularity $pop */ 531 $pop = plugin_load('helper', 'popularity'); 532 $list = $this->initEmptySearchList(); 533 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 534 $page_count = $list['file_count']; 535 $page_size = $list['file_size']; 536 537 $this->db->exec( 538 'INSERT OR REPLACE INTO history ( 539 info, value, dt 540 ) VALUES ( 541 ?, ?, CURRENT_TIMESTAMP 542 )', 543 'page_count', $page_count 544 ); 545 $this->db->exec( 546 'INSERT OR REPLACE INTO history ( 547 info, value, dt 548 ) VALUES ( 549 ?, ?, CURRENT_TIMESTAMP 550 )', 551 'page_size', $page_size 552 ); 553 } 554 555 /** 556 * Log the current media count and size as today's history entry 557 */ 558 public function logHistoryMedia(): void 559 { 560 global $conf; 561 562 // use the popularity plugin's search method to find the wanted data 563 /** @var helper_plugin_popularity $pop */ 564 $pop = plugin_load('helper', 'popularity'); 565 $list = $this->initEmptySearchList(); 566 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 567 $media_count = $list['file_count']; 568 $media_size = $list['file_size']; 569 570 $this->db->exec( 571 'INSERT OR REPLACE INTO history ( 572 info, value, dt 573 ) VALUES ( 574 ?, ?, CURRENT_TIMESTAMP 575 )', 576 'media_count', $media_count 577 ); 578 $this->db->exec( 579 'INSERT OR REPLACE INTO history ( 580 info, value, dt 581 ) VALUES ( 582 ?, ?, CURRENT_TIMESTAMP 583 )', 584 'media_size', $media_size 585 ); 586 } 587 588 /** 589 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 590 * @return array 591 */ 592 protected function initEmptySearchList() 593 { 594 return array_fill_keys([ 595 'file_count', 596 'file_size', 597 'file_max', 598 'file_min', 599 'dir_count', 600 'dir_nest', 601 'file_oldest' 602 ], 0); 603 } 604} 605