1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\ErrorHandler; 10use dokuwiki\HTTP\DokuHTTPClient; 11use dokuwiki\plugin\sqlite\SQLiteDB; 12use dokuwiki\Utf8\Clean; 13use helper_plugin_popularity; 14use helper_plugin_statistics; 15 16class Logger 17{ 18 /** @var helper_plugin_statistics The statistics helper plugin instance */ 19 protected helper_plugin_statistics $hlp; 20 21 /** @var SQLiteDB The SQLite database instance */ 22 protected SQLiteDB $db; 23 24 /** @var string The full user agent string */ 25 protected string $uaAgent; 26 27 /** @var string The type of user agent (browser, robot, feedreader) */ 28 protected string $uaType = 'browser'; 29 30 /** @var string The browser/client name */ 31 protected string $uaName; 32 33 /** @var string The browser/client version */ 34 protected string $uaVersion; 35 36 /** @var string The operating system/platform */ 37 protected string $uaPlatform; 38 39 /** @var string|null The user name, if available */ 40 protected ?string $user = null; 41 42 /** @var string The unique user identifier */ 43 protected string $uid; 44 45 /** @var string The session identifier */ 46 protected string $session; 47 48 /** @var int|null The ID of the main access log entry if any */ 49 protected ?int $hit = null; 50 51 /** @var DokuHTTPClient|null The HTTP client instance for testing */ 52 protected ?DokuHTTPClient $httpClient = null; 53 54 // region lifecycle 55 56 /** 57 * Constructor 58 * 59 * Parses browser info and set internal vars 60 */ 61 public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null) 62 { 63 global $INPUT; 64 65 $this->hlp = $hlp; 66 $this->db = $this->hlp->getDB(); 67 $this->httpClient = $httpClient; 68 69 // FIXME if we already have a session, we should not re-parse the user agent 70 71 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 72 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 73 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 74 $dd->discardBotInformation(); 75 $dd->parse(); 76 77 if ($dd->isFeedReader()) { 78 $this->uaType = 'feedreader'; 79 } elseif ($dd->isBot()) { 80 $this->uaType = 'robot'; 81 // for now ignore bots 82 throw new IgnoreException('Bot detected, not logging'); 83 } 84 85 $this->uaAgent = $ua; 86 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 87 $this->uaVersion = $dd->getClient('version') ?: '0'; 88 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 89 $this->uid = $this->getUID(); 90 $this->session = $this->getSession(); 91 $this->user = $INPUT->server->str('REMOTE_USER') ?: null; 92 } 93 94 /** 95 * Should be called before logging 96 * 97 * This starts a transaction, so all logging is done in one go. It also logs the user and session data. 98 */ 99 public function begin(): void 100 { 101 $this->hlp->getDB()->getPdo()->beginTransaction(); 102 103 $this->logUser(); 104 $this->logGroups(); 105 $this->logDomain(); 106 $this->logSession(); 107 } 108 109 /** 110 * Should be called after logging 111 * 112 * This commits the transaction started in begin() 113 */ 114 public function end(): void 115 { 116 $this->hlp->getDB()->getPdo()->commit(); 117 } 118 119 // endregion 120 // region data gathering 121 122 /** 123 * Get the unique user ID 124 * 125 * @return string The unique user identifier 126 */ 127 protected function getUID(): string 128 { 129 global $INPUT; 130 131 $uid = $INPUT->str('uid'); 132 if (!$uid) $uid = get_doku_pref('plgstats', false); 133 if (!$uid) $uid = session_id(); 134 set_doku_pref('plgstats', $uid); 135 return $uid; 136 } 137 138 /** 139 * Return the user's session ID 140 * 141 * This is usually our own managed session, not a PHP session (only in fallback) 142 * 143 * @return string The session identifier 144 */ 145 protected function getSession(): string 146 { 147 global $INPUT; 148 149 // FIXME session setting needs work. It should be reset on user change, maybe we do rely on the PHP session? 150 // We also want to store the user agent in the session table, so this needs also change the session ID 151 $ses = $INPUT->str('ses'); 152 if (!$ses) $ses = get_doku_pref('plgstatsses', false); 153 if (!$ses) $ses = session_id(); 154 set_doku_pref('plgstatsses', $ses); 155 return $ses; 156 } 157 158 // endregion 159 // region automatic logging 160 161 /** 162 * Log the user was seen 163 */ 164 protected function logUser(): void 165 { 166 if (!$this->user) return; 167 168 $this->db->exec( 169 'INSERT INTO users (user, dt) 170 VALUES (?, CURRENT_TIMESTAMP) 171 ON CONFLICT (user) DO UPDATE SET 172 dt = CURRENT_TIMESTAMP 173 WHERE excluded.user = users.user 174 ', 175 $this->user 176 ); 177 178 } 179 180 /** 181 * Log the session and user agent information 182 */ 183 protected function logSession(): void 184 { 185 $this->db->exec( 186 'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os) 187 VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?) 188 ON CONFLICT (session) DO UPDATE SET 189 end = CURRENT_TIMESTAMP 190 WHERE excluded.session = sessions.session 191 ', 192 $this->session, 193 $this->uid, 194 $this->user, 195 $this->uaAgent, 196 $this->uaName, 197 $this->uaType, 198 $this->uaVersion, 199 $this->uaPlatform 200 ); 201 } 202 203 /** 204 * Log all groups for the user 205 * 206 * @todo maybe this should be done only once per session? 207 */ 208 protected function logGroups(): void 209 { 210 global $USERINFO; 211 212 if (!$this->user) return; 213 if (!isset($USERINFO['grps'])) return; 214 if (!is_array($USERINFO['grps'])) return; 215 $groups = $USERINFO['grps']; 216 217 $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user); 218 219 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?, ?)')); 220 $params = []; 221 $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders"; 222 foreach ($groups as $group) { 223 $params[] = $this->user; 224 $params[] = $group; 225 } 226 $this->db->exec($sql, $params); 227 } 228 229 /** 230 * Log email domain 231 * 232 * @todo maybe this should be done only once per session? 233 */ 234 protected function logDomain(): void 235 { 236 global $USERINFO; 237 if (!$this->user) return; 238 if (!isset($USERINFO['mail'])) return; 239 $mail = $USERINFO['mail']; 240 241 $pos = strrpos($mail, '@'); 242 if (!$pos) return; 243 $domain = substr($mail, $pos + 1); 244 if (empty($domain)) return; 245 246 $sql = 'UPDATE users SET domain = ? WHERE user = ?'; 247 $this->db->exec($sql, [$domain, $this->user]); 248 } 249 250 // endregion 251 // region internal loggers called by the dispatchers 252 253 /** 254 * Log the given referer URL 255 * 256 * @param $referer 257 * @return int|null The referer ID or null if no referer was given 258 */ 259 public function logReferer($referer): ?int 260 { 261 if (!$referer) return null; 262 263 // FIXME we could check against a blacklist here 264 265 $se = new SearchEngines($referer); 266 $type = $se->isSearchEngine() ? 'search' : 'external'; 267 268 $sql = ' 269 INSERT INTO referers (url, type, dt) 270 VALUES (?, ?, CURRENT_TIMESTAMP) 271 ON CONFLICT (url) 272 DO UPDATE 273 SET type = excluded.type, dt = excluded.dt; 274 '; 275 return $this->db->exec($sql, [$referer, $type]); 276 } 277 278 /** 279 * Resolve IP to country/city and store in database 280 * 281 * @return string The IP address as stored 282 */ 283 public function logIp(): string 284 { 285 $ip = clientIP(true); 286 $hash = $ip; // @todo we could anonymize here 287 288 // check if IP already known and up-to-date 289 $result = $this->db->queryValue( 290 "SELECT ip 291 FROM iplocation 292 WHERE ip = ? 293 AND lastupd > date('now', '-30 days')", 294 $hash 295 ); 296 if ($result) return $hash; // already known and up-to-date 297 298 $http = $this->httpClient ?: new DokuHTTPClient(); 299 $http->timeout = 7; 300 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 301 302 if (!$json) { 303 \dokuwiki\Logger::error('Statistics Plugin - Failed talk to ip-api.com.'); 304 return $hash; 305 } 306 try { 307 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 308 } catch (\JsonException $e) { 309 \dokuwiki\Logger::error('Statistics Plugin - Failed to decode JSON from ip-api.com.', $e); 310 return $hash; 311 } 312 if (!isset($data['status']) || $data['status'] !== 'success') { 313 \dokuwiki\Logger::error('Statistics Plugin - IP location lookup failed for ' . $ip, $data); 314 return $hash; 315 } 316 317 $host = gethostbyaddr($ip); // @todo if we anonymize the IP, we should not do this 318 $this->db->exec( 319 'INSERT OR REPLACE INTO iplocation ( 320 ip, country, code, city, host, lastupd 321 ) VALUES ( 322 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 323 )', 324 $hash, 325 $data['country'], 326 $data['countryCode'], 327 $data['city'], 328 $host 329 ); 330 331 return $hash; 332 } 333 334 // endregion 335 // region log dispatchers 336 337 public function logPageView(): void 338 { 339 global $INPUT; 340 341 if (!$INPUT->str('p')) return; 342 343 344 $referer = $INPUT->filter('trim')->str('r'); 345 $ip = $this->logIp(); // resolve the IP address 346 347 $data = [ 348 'page' => $INPUT->filter('cleanID')->str('p'), 349 'ip' => $ip, 350 'ref_id' => $this->logReferer($referer), 351 'sx' => $INPUT->int('sx'), 352 'sy' => $INPUT->int('sy'), 353 'vx' => $INPUT->int('vx'), 354 'vy' => $INPUT->int('vy'), 355 'session' => $this->session, 356 ]; 357 358 $this->db->exec(' 359 INSERT INTO pageviews ( 360 dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session 361 ) VALUES ( 362 CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session 363 ) 364 ', 365 $data 366 ); 367 } 368 369 /** 370 * Log a click on an external link 371 * 372 * Called from log.php 373 */ 374 public function logOutgoing(): void 375 { 376 global $INPUT; 377 378 if (!$INPUT->str('ol')) return; 379 380 $link = $INPUT->filter('trim')->str('ol'); 381 $session = $this->session; 382 $page = $INPUT->filter('cleanID')->str('p'); 383 384 $this->db->exec( 385 'INSERT INTO outlinks ( 386 dt, session, page, link 387 ) VALUES ( 388 CURRENT_TIMESTAMP, ?, ?, ?, ? 389 )', 390 $session, 391 $page, 392 $link 393 ); 394 } 395 396 /** 397 * Log access to a media file 398 * 399 * Called from action.php 400 * 401 * @param string $media The media ID 402 * @param string $mime The media's mime type 403 * @param bool $inline Is this displayed inline? 404 * @param int $size Size of the media file 405 */ 406 public function logMedia(string $media, string $mime, bool $inline, int $size): void 407 { 408 [$mime1, $mime2] = explode('/', strtolower($mime)); 409 $inline = $inline ? 1 : 0; 410 411 412 $data = [ 413 'media' => cleanID($media), 414 'ip' => $this->logIp(), // resolve the IP address 415 'session' => $this->session, 416 'size' => $size, 417 'mime1' => $mime1, 418 'mime2' => $mime2, 419 'inline' => $inline, 420 ]; 421 422 $this->db->exec(' 423 INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline ) 424 VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline) 425 ', 426 $data 427 ); 428 } 429 430 /** 431 * Log page edits 432 * 433 * called from action.php 434 * 435 * @param string $page The page that was edited 436 * @param string $type The type of edit (create, edit, etc.) 437 */ 438 public function logEdit(string $page, string $type): void 439 { 440 $data = [ 441 'page' => cleanID($page), 442 'type' => $type, 443 'ip' => $this->logIp(), // resolve the IP address 444 'session' => $this->session 445 ]; 446 447 $editId = $this->db->exec( 448 'INSERT INTO edits ( 449 dt, page, type, ip, session 450 ) VALUES ( 451 CURRENT_TIMESTAMP, :page, :type, :ip, :session 452 )', 453 $data 454 ); 455 } 456 457 /** 458 * Log login/logoffs and user creations 459 * 460 * @param string $type The type of login event (login, logout, create) 461 * @param string $user The username (optional, will use current user if empty) 462 * @fixme this is still broken, I need to figure out the session handling first 463 */ 464 public function logLogin(string $type, string $user = ''): void 465 { 466 global $INPUT; 467 468 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 469 470 $ip = clientIP(true); 471 $session = $this->session; 472 473 $this->db->exec( 474 'INSERT INTO logins ( 475 dt, type, ip, session 476 ) VALUES ( 477 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 478 )', 479 $type, 480 $ip, 481 $user, 482 $session, 483 $this->uid 484 ); 485 } 486 487 /** 488 * Log the current page count and size as today's history entry 489 */ 490 public function logHistoryPages(): void 491 { 492 global $conf; 493 494 // use the popularity plugin's search method to find the wanted data 495 /** @var helper_plugin_popularity $pop */ 496 $pop = plugin_load('helper', 'popularity'); 497 $list = $this->initEmptySearchList(); 498 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 499 $page_count = $list['file_count']; 500 $page_size = $list['file_size']; 501 502 $this->db->exec( 503 'INSERT OR REPLACE INTO history ( 504 info, value, dt 505 ) VALUES ( 506 ?, ?, CURRENT_TIMESTAMP 507 )', 508 'page_count', 509 $page_count 510 ); 511 $this->db->exec( 512 'INSERT OR REPLACE INTO history ( 513 info, value, dt 514 ) VALUES ( 515 ?, ?, CURRENT_TIMESTAMP 516 )', 517 'page_size', 518 $page_size 519 ); 520 } 521 522 /** 523 * Log the current media count and size as today's history entry 524 */ 525 public function logHistoryMedia(): void 526 { 527 global $conf; 528 529 // use the popularity plugin's search method to find the wanted data 530 /** @var helper_plugin_popularity $pop */ 531 $pop = plugin_load('helper', 'popularity'); 532 $list = $this->initEmptySearchList(); 533 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 534 $media_count = $list['file_count']; 535 $media_size = $list['file_size']; 536 537 $this->db->exec( 538 'INSERT OR REPLACE INTO history ( 539 info, value, dt 540 ) VALUES ( 541 ?, ?, CURRENT_TIMESTAMP 542 )', 543 'media_count', 544 $media_count 545 ); 546 $this->db->exec( 547 'INSERT OR REPLACE INTO history ( 548 info, value, dt 549 ) VALUES ( 550 ?, ?, CURRENT_TIMESTAMP 551 )', 552 'media_size', 553 $media_size 554 ); 555 } 556 557 // endregion 558 559 /** 560 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 561 * @return array 562 */ 563 protected function initEmptySearchList() 564 { 565 return array_fill_keys([ 566 'file_count', 567 'file_size', 568 'file_max', 569 'file_min', 570 'dir_count', 571 'dir_nest', 572 'file_oldest' 573 ], 0); 574 } 575} 576