1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\HTTP\DokuHTTPClient; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use dokuwiki\Utf8\Clean; 12use helper_plugin_popularity; 13use helper_plugin_statistics; 14 15 16class Logger 17{ 18 /** @var helper_plugin_statistics The statistics helper plugin instance */ 19 protected helper_plugin_statistics $hlp; 20 21 /** @var SQLiteDB The SQLite database instance */ 22 protected SQLiteDB $db; 23 24 /** @var string The full user agent string */ 25 protected string $uaAgent; 26 27 /** @var string The type of user agent (browser, robot, feedreader) */ 28 protected string $uaType = 'browser'; 29 30 /** @var string The browser/client name */ 31 protected string $uaName; 32 33 /** @var string The browser/client version */ 34 protected string $uaVersion; 35 36 /** @var string The operating system/platform */ 37 protected string $uaPlatform; 38 39 /** @var string The unique user identifier */ 40 protected string $uid; 41 42 /** @var DokuHTTPClient|null The HTTP client instance for testing */ 43 protected ?DokuHTTPClient $httpClient = null; 44 45 46 /** 47 * Constructor 48 * 49 * Parses browser info and set internal vars 50 */ 51 public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null) 52 { 53 global $INPUT; 54 55 $this->hlp = $hlp; 56 $this->db = $this->hlp->getDB(); 57 $this->httpClient = $httpClient; 58 59 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 60 61 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 62 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 63 $dd->discardBotInformation(); 64 $dd->parse(); 65 66 if ($dd->isFeedReader()) { 67 $this->uaType = 'feedreader'; 68 } else if ($dd->isBot()) { 69 $this->uaType = 'robot'; 70 71 // for now ignore bots 72 throw new \RuntimeException('Bot detected, not logging'); 73 } 74 75 $this->uaAgent = $ua; 76 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 77 $this->uaVersion = $dd->getClient('version') ?: '0'; 78 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 79 $this->uid = $this->getUID(); 80 81 82 $this->logLastseen(); 83 } 84 85 /** 86 * Should be called before logging 87 * 88 * This starts a transaction, so all logging is done in one go 89 */ 90 public function begin(): void 91 { 92 $this->hlp->getDB()->getPdo()->beginTransaction(); 93 } 94 95 /** 96 * Should be called after logging 97 * 98 * This commits the transaction started in begin() 99 */ 100 public function end(): void 101 { 102 $this->hlp->getDB()->getPdo()->commit(); 103 } 104 105 /** 106 * Get the unique user ID 107 * 108 * @return string The unique user identifier 109 */ 110 protected function getUID(): string 111 { 112 global $INPUT; 113 114 $uid = $INPUT->str('uid'); 115 if (!$uid) $uid = get_doku_pref('plgstats', false); 116 if (!$uid) $uid = session_id(); 117 set_doku_pref('plgstats', $uid); 118 return $uid; 119 } 120 121 /** 122 * Return the user's session ID 123 * 124 * This is usually our own managed session, not a PHP session (only in fallback) 125 * 126 * @return string The session identifier 127 */ 128 protected function getSession(): string 129 { 130 global $INPUT; 131 132 $ses = $INPUT->str('ses'); 133 if (!$ses) $ses = get_doku_pref('plgstatsses', false); 134 if (!$ses) $ses = session_id(); 135 set_doku_pref('plgstatsses', $ses); 136 return $ses; 137 } 138 139 /** 140 * Log that we've seen the user (authenticated only) 141 */ 142 public function logLastseen(): void 143 { 144 global $INPUT; 145 146 if (empty($INPUT->server->str('REMOTE_USER'))) return; 147 148 $this->db->exec( 149 'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)', 150 $INPUT->server->str('REMOTE_USER'), 151 ); 152 } 153 154 /** 155 * Log actions by groups 156 * 157 * @param string $type The type of access to log ('view','edit') 158 * @param array $groups The groups to log 159 */ 160 public function logGroups(string $type, array $groups): void 161 { 162 if (!$groups) return; 163 164 $toLog = (array)$this->hlp->getConf('loggroups'); 165 $groups = array_intersect($groups, $toLog); 166 if (!$groups) return; 167 168 $placeholders = join(',', array_fill(0, count($groups), '(?, ?)')); 169 $params = []; 170 $sql = "INSERT INTO groups (`type`, `group`) VALUES $placeholders"; 171 foreach ($groups as $group) { 172 $params[] = $type; 173 $params[] = $group; 174 } 175 $sql = rtrim($sql, ','); 176 $this->db->exec($sql, $params); 177 } 178 179 /** 180 * Log external search queries 181 * 182 * Will not write anything if the referer isn't a search engine 183 * 184 * @param string $referer The HTTP referer URL 185 * @param string $type Reference to the type variable that will be modified 186 */ 187 public function logExternalSearch(string $referer, string &$type): void 188 { 189 global $INPUT; 190 191 $searchEngine = new SearchEngines($referer); 192 193 if (!$searchEngine->isSearchEngine()) { 194 return; // not a search engine 195 } 196 197 $type = 'search'; 198 $query = $searchEngine->getQuery(); 199 200 // log it! 201 $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*')); 202 $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine()); 203 } 204 205 /** 206 * Log search data to the search related tables 207 * 208 * @param string $page The page being searched from 209 * @param string $query The search query 210 * @param array $words Array of search words 211 * @param string $engine The search engine name 212 */ 213 public function logSearch(string $page, string $query, array $words, string $engine): void 214 { 215 $sid = $this->db->exec( 216 'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)', 217 $page, $query, $engine 218 ); 219 if (!$sid) return; 220 221 foreach ($words as $word) { 222 if (!$word) continue; 223 $this->db->exec( 224 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 225 $sid, $word 226 ); 227 } 228 } 229 230 /** 231 * Log that the session was seen 232 * 233 * This is used to calculate the time people spend on the whole site 234 * during their session 235 * 236 * Viewcounts are used for bounce calculation 237 * 238 * @param int $addview set to 1 to count a view 239 */ 240 public function logSession(int $addview = 0): void 241 { 242 // only log browser sessions 243 if ($this->uaType != 'browser') return; 244 245 $session = $this->getSession(); 246 $this->db->exec( 247 'INSERT OR REPLACE INTO session ( 248 session, dt, end, views, uid 249 ) VALUES ( 250 ?, 251 CURRENT_TIMESTAMP, 252 CURRENT_TIMESTAMP, 253 COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?), 254 ? 255 )', 256 $session, $session, $addview, $addview, $this->uid 257 ); 258 } 259 260 /** 261 * Resolve IP to country/city and store in database 262 * 263 * @param string $ip The IP address to resolve 264 */ 265 public function logIp(string $ip): void 266 { 267 // check if IP already known and up-to-date 268 $result = $this->db->queryValue( 269 "SELECT ip 270 FROM iplocation 271 WHERE ip = ? 272 AND lastupd > date('now', '-30 days')", 273 $ip 274 ); 275 if ($result) return; 276 277 $http = $this->httpClient ?: new DokuHTTPClient(); 278 $http->timeout = 10; 279 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 280 281 if (!$json) return; // FIXME log error 282 try { 283 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 284 } catch (\JsonException $e) { 285 return; // FIXME log error 286 } 287 if (!isset($data['status']) || $data['status'] !== 'success') { 288 return; // FIXME log error 289 } 290 291 $host = gethostbyaddr($ip); 292 $this->db->exec( 293 'INSERT OR REPLACE INTO iplocation ( 294 ip, country, code, city, host, lastupd 295 ) VALUES ( 296 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 297 )', 298 $ip, $data['country'], $data['countryCode'], $data['city'], $host 299 ); 300 } 301 302 /** 303 * Log a click on an external link 304 * 305 * Called from log.php 306 */ 307 public function logOutgoing(): void 308 { 309 global $INPUT; 310 311 if (!$INPUT->str('ol')) return; 312 313 $link = $INPUT->str('ol'); 314 $link_md5 = md5($link); 315 $session = $this->getSession(); 316 $page = $INPUT->str('p'); 317 318 $this->db->exec( 319 'INSERT INTO outlinks ( 320 dt, session, page, link_md5, link 321 ) VALUES ( 322 CURRENT_TIMESTAMP, ?, ?, ?, ? 323 )', 324 $session, $page, $link_md5, $link 325 ); 326 } 327 328 /** 329 * Log a page access 330 * 331 * Called from log.php 332 */ 333 public function logAccess(): void 334 { 335 global $INPUT, $USERINFO; 336 337 if (!$INPUT->str('p')) return; 338 339 # FIXME check referer against blacklist and drop logging for bad boys 340 341 // handle referer 342 $referer = trim($INPUT->str('r')); 343 if ($referer) { 344 $ref = $referer; 345 $ref_md5 = md5($referer); 346 if (str_starts_with($referer, DOKU_URL)) { 347 $ref_type = 'internal'; 348 } else { 349 $ref_type = 'external'; 350 $this->logExternalSearch($referer, $ref_type); 351 } 352 } else { 353 $ref = ''; 354 $ref_md5 = ''; 355 $ref_type = ''; 356 } 357 358 $page = $INPUT->str('p'); 359 $ip = clientIP(true); 360 $sx = $INPUT->int('sx'); 361 $sy = $INPUT->int('sy'); 362 $vx = $INPUT->int('vx'); 363 $vy = $INPUT->int('vy'); 364 $js = $INPUT->int('js'); 365 $user = $INPUT->server->str('REMOTE_USER'); 366 $session = $this->getSession(); 367 368 $this->db->exec( 369 'INSERT INTO access ( 370 dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type, 371 screen_x, screen_y, view_x, view_y, js, user, session, uid 372 ) VALUES ( 373 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 374 ?, ?, ?, ?, ?, ?, ?, ? 375 )', 376 $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 377 $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid 378 ); 379 380 if ($ref_md5) { 381 $this->db->exec( 382 'INSERT OR IGNORE INTO refseen ( 383 ref_md5, dt 384 ) VALUES ( 385 ?, CURRENT_TIMESTAMP 386 )', 387 $ref_md5 388 ); 389 } 390 391 // log group access 392 if (isset($USERINFO['grps'])) { 393 $this->logGroups('view', $USERINFO['grps']); 394 } 395 396 // resolve the IP 397 $this->logIp(clientIP(true)); 398 } 399 400 /** 401 * Log access to a media file 402 * 403 * Called from action.php 404 * 405 * @param string $media The media ID 406 * @param string $mime The media's mime type 407 * @param bool $inline Is this displayed inline? 408 * @param int $size Size of the media file 409 */ 410 public function logMedia(string $media, string $mime, bool $inline, int $size): void 411 { 412 global $INPUT; 413 414 [$mime1, $mime2] = explode('/', strtolower($mime)); 415 $inline = $inline ? 1 : 0; 416 $size = (int)$size; 417 418 $ip = clientIP(true); 419 $user = $INPUT->server->str('REMOTE_USER'); 420 $session = $this->getSession(); 421 422 $this->db->exec( 423 'INSERT INTO media ( 424 dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid, 425 size, mime1, mime2, inline 426 ) VALUES ( 427 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 428 ?, ?, ?, ? 429 )', 430 $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 431 $user, $session, $this->uid, $size, $mime1, $mime2, $inline 432 ); 433 } 434 435 /** 436 * Log page edits 437 * 438 * @param string $page The page that was edited 439 * @param string $type The type of edit (create, edit, etc.) 440 */ 441 public function logEdit(string $page, string $type): void 442 { 443 global $INPUT, $USERINFO; 444 445 $ip = clientIP(true); 446 $user = $INPUT->server->str('REMOTE_USER'); 447 $session = $this->getSession(); 448 449 $this->db->exec( 450 'INSERT INTO edits ( 451 dt, page, type, ip, user, session, uid 452 ) VALUES ( 453 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ? 454 )', 455 $page, $type, $ip, $user, $session, $this->uid 456 ); 457 458 // log group access 459 if (isset($USERINFO['grps'])) { 460 $this->logGroups('edit', $USERINFO['grps']); 461 } 462 } 463 464 /** 465 * Log login/logoffs and user creations 466 * 467 * @param string $type The type of login event (login, logout, create) 468 * @param string $user The username (optional, will use current user if empty) 469 */ 470 public function logLogin(string $type, string $user = ''): void 471 { 472 global $INPUT; 473 474 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 475 476 $ip = clientIP(true); 477 $session = $this->getSession(); 478 479 $this->db->exec( 480 'INSERT INTO logins ( 481 dt, type, ip, user, session, uid 482 ) VALUES ( 483 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 484 )', 485 $type, $ip, $user, $session, $this->uid 486 ); 487 } 488 489 /** 490 * Log the current page count and size as today's history entry 491 */ 492 public function logHistoryPages(): void 493 { 494 global $conf; 495 496 // use the popularity plugin's search method to find the wanted data 497 /** @var helper_plugin_popularity $pop */ 498 $pop = plugin_load('helper', 'popularity'); 499 $list = $this->initEmptySearchList(); 500 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 501 $page_count = $list['file_count']; 502 $page_size = $list['file_size']; 503 504 $this->db->exec( 505 'INSERT OR REPLACE INTO history ( 506 info, value, dt 507 ) VALUES ( 508 ?, ?, CURRENT_TIMESTAMP 509 )', 510 'page_count', $page_count 511 ); 512 $this->db->exec( 513 'INSERT OR REPLACE INTO history ( 514 info, value, dt 515 ) VALUES ( 516 ?, ?, CURRENT_TIMESTAMP 517 )', 518 'page_size', $page_size 519 ); 520 } 521 522 /** 523 * Log the current media count and size as today's history entry 524 */ 525 public function logHistoryMedia(): void 526 { 527 global $conf; 528 529 // use the popularity plugin's search method to find the wanted data 530 /** @var helper_plugin_popularity $pop */ 531 $pop = plugin_load('helper', 'popularity'); 532 $list = $this->initEmptySearchList(); 533 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 534 $media_count = $list['file_count']; 535 $media_size = $list['file_size']; 536 537 $this->db->exec( 538 'INSERT OR REPLACE INTO history ( 539 info, value, dt 540 ) VALUES ( 541 ?, ?, CURRENT_TIMESTAMP 542 )', 543 'media_count', $media_count 544 ); 545 $this->db->exec( 546 'INSERT OR REPLACE INTO history ( 547 info, value, dt 548 ) VALUES ( 549 ?, ?, CURRENT_TIMESTAMP 550 )', 551 'media_size', $media_size 552 ); 553 } 554 555 /** 556 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 557 * @return array 558 */ 559 protected function initEmptySearchList() 560 { 561 return array_fill_keys([ 562 'file_count', 563 'file_size', 564 'file_max', 565 'file_min', 566 'dir_count', 567 'dir_nest', 568 'file_oldest' 569 ], 0); 570 } 571} 572