1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\HTTP\DokuHTTPClient; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use dokuwiki\Utf8\Clean; 12use helper_plugin_popularity; 13use helper_plugin_statistics; 14 15 16class Logger 17{ 18 /** @var helper_plugin_statistics The statistics helper plugin instance */ 19 protected helper_plugin_statistics $hlp; 20 21 /** @var SQLiteDB The SQLite database instance */ 22 protected SQLiteDB $db; 23 24 /** @var string The full user agent string */ 25 protected string $uaAgent; 26 27 /** @var string The type of user agent (browser, robot, feedreader) */ 28 protected string $uaType = 'browser'; 29 30 /** @var string The browser/client name */ 31 protected string $uaName; 32 33 /** @var string The browser/client version */ 34 protected string $uaVersion; 35 36 /** @var string The operating system/platform */ 37 protected string $uaPlatform; 38 39 /** @var string The unique user identifier */ 40 protected string $uid; 41 42 /** @var DokuHTTPClient|null The HTTP client instance for testing */ 43 protected ?DokuHTTPClient $httpClient = null; 44 45 46 /** 47 * Constructor 48 * 49 * Parses browser info and set internal vars 50 */ 51 public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null) 52 { 53 global $INPUT; 54 55 $this->hlp = $hlp; 56 $this->db = $this->hlp->getDB(); 57 $this->httpClient = $httpClient; 58 59 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 60 61 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 62 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 63 $dd->discardBotInformation(); 64 $dd->parse(); 65 66 if ($dd->isBot()) { 67 $this->uaType = 'robot'; 68 69 // for now ignore bots 70 throw new \RuntimeException('Bot detected, not logging'); 71 } 72 73 $this->uaAgent = $ua; 74 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')); 75 $this->uaVersion = $dd->getClient('version'); 76 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')); 77 $this->uid = $this->getUID(); 78 79 if ($dd->isFeedReader()) { 80 $this->uaType = 'feedreader'; 81 } 82 83 $this->logLastseen(); 84 } 85 86 /** 87 * Should be called before logging 88 * 89 * This starts a transaction, so all logging is done in one go 90 */ 91 public function begin(): void 92 { 93 $this->hlp->getDB()->getPdo()->beginTransaction(); 94 } 95 96 /** 97 * Should be called after logging 98 * 99 * This commits the transaction started in begin() 100 */ 101 public function end(): void 102 { 103 $this->hlp->getDB()->getPdo()->commit(); 104 } 105 106 /** 107 * Get the unique user ID 108 * 109 * @return string The unique user identifier 110 */ 111 protected function getUID(): string 112 { 113 global $INPUT; 114 115 $uid = $INPUT->str('uid'); 116 if (!$uid) $uid = get_doku_pref('plgstats', false); 117 if (!$uid) $uid = session_id(); 118 return $uid; 119 } 120 121 /** 122 * Return the user's session ID 123 * 124 * This is usually our own managed session, not a PHP session (only in fallback) 125 * 126 * @return string The session identifier 127 */ 128 protected function getSession(): string 129 { 130 global $INPUT; 131 132 $ses = $INPUT->str('ses'); 133 if (!$ses) $ses = get_doku_pref('plgstatsses', false); 134 if (!$ses) $ses = session_id(); 135 return $ses; 136 } 137 138 /** 139 * Log that we've seen the user (authenticated only) 140 */ 141 public function logLastseen(): void 142 { 143 global $INPUT; 144 145 if (empty($INPUT->server->str('REMOTE_USER'))) return; 146 147 $this->db->exec( 148 'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)', 149 $INPUT->server->str('REMOTE_USER'), 150 ); 151 } 152 153 /** 154 * Log actions by groups 155 * 156 * @param string $type The type of access to log ('view','edit') 157 * @param array $groups The groups to log 158 */ 159 public function logGroups(string $type, array $groups): void 160 { 161 if (!$groups) return; 162 163 $toLog = (array)$this->hlp->getConf('loggroups'); 164 $groups = array_intersect($groups, $toLog); 165 if (!$groups) return; 166 167 $placeholders = join(',', array_fill(0, count($groups), '(?, ?)')); 168 $params = []; 169 $sql = "INSERT INTO groups (`type`, `group`) VALUES $placeholders"; 170 foreach ($groups as $group) { 171 $params[] = $type; 172 $params[] = $group; 173 } 174 $sql = rtrim($sql, ','); 175 $this->db->exec($sql, $params); 176 } 177 178 /** 179 * Log external search queries 180 * 181 * Will not write anything if the referer isn't a search engine 182 * 183 * @param string $referer The HTTP referer URL 184 * @param string $type Reference to the type variable that will be modified 185 */ 186 public function logExternalSearch(string $referer, string &$type): void 187 { 188 global $INPUT; 189 190 $searchEngine = new SearchEngines($referer); 191 192 if (!$searchEngine->isSearchEngine()) { 193 return; // not a search engine 194 } 195 196 $type = 'search'; 197 $query = $searchEngine->getQuery(); 198 199 // log it! 200 $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*')); 201 $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine()); 202 } 203 204 /** 205 * Log search data to the search related tables 206 * 207 * @param string $page The page being searched from 208 * @param string $query The search query 209 * @param array $words Array of search words 210 * @param string $engine The search engine name 211 */ 212 public function logSearch(string $page, string $query, array $words, string $engine): void 213 { 214 $sid = $this->db->exec( 215 'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)', 216 $page, $query, $engine 217 ); 218 if (!$sid) return; 219 220 foreach ($words as $word) { 221 if (!$word) continue; 222 $this->db->exec( 223 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 224 $sid, $word 225 ); 226 } 227 } 228 229 /** 230 * Log that the session was seen 231 * 232 * This is used to calculate the time people spend on the whole site 233 * during their session 234 * 235 * Viewcounts are used for bounce calculation 236 * 237 * @param int $addview set to 1 to count a view 238 */ 239 public function logSession(int $addview = 0): void 240 { 241 // only log browser sessions 242 if ($this->uaType != 'browser') return; 243 244 $session = $this->getSession(); 245 $this->db->exec( 246 'INSERT OR REPLACE INTO session ( 247 session, dt, end, views, uid 248 ) VALUES ( 249 ?, 250 CURRENT_TIMESTAMP, 251 CURRENT_TIMESTAMP, 252 COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?), 253 ? 254 )', 255 $session, $session, $addview, $addview, $this->uid 256 ); 257 } 258 259 /** 260 * Resolve IP to country/city and store in database 261 * 262 * @param string $ip The IP address to resolve 263 */ 264 public function logIp(string $ip): void 265 { 266 // check if IP already known and up-to-date 267 $result = $this->db->queryValue( 268 "SELECT ip 269 FROM iplocation 270 WHERE ip = ? 271 AND lastupd > date('now', '-30 days')", 272 $ip 273 ); 274 if ($result) return; 275 276 $http = $this->httpClient ?: new DokuHTTPClient(); 277 $http->timeout = 10; 278 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 279 280 if (!$json) return; // FIXME log error 281 try { 282 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 283 } catch (\JsonException $e) { 284 return; // FIXME log error 285 } 286 if (!isset($data['status']) || $data['status'] !== 'success') { 287 return; // FIXME log error 288 } 289 290 $host = gethostbyaddr($ip); 291 $this->db->exec( 292 'INSERT OR REPLACE INTO iplocation ( 293 ip, country, code, city, host, lastupd 294 ) VALUES ( 295 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 296 )', 297 $ip, $data['country'], $data['countryCode'], $data['city'], $host 298 ); 299 } 300 301 /** 302 * Log a click on an external link 303 * 304 * Called from log.php 305 */ 306 public function logOutgoing(): void 307 { 308 global $INPUT; 309 310 if (!$INPUT->str('ol')) return; 311 312 $link = $INPUT->str('ol'); 313 $link_md5 = md5($link); 314 $session = $this->getSession(); 315 $page = $INPUT->str('p'); 316 317 $this->db->exec( 318 'INSERT INTO outlinks ( 319 dt, session, page, link_md5, link 320 ) VALUES ( 321 CURRENT_TIMESTAMP, ?, ?, ?, ? 322 )', 323 $session, $page, $link_md5, $link 324 ); 325 } 326 327 /** 328 * Log a page access 329 * 330 * Called from log.php 331 */ 332 public function logAccess(): void 333 { 334 global $INPUT, $USERINFO; 335 336 if (!$INPUT->str('p')) return; 337 338 # FIXME check referer against blacklist and drop logging for bad boys 339 340 // handle referer 341 $referer = trim($INPUT->str('r')); 342 if ($referer) { 343 $ref = $referer; 344 $ref_md5 = md5($referer); 345 if (str_starts_with($referer, DOKU_URL)) { 346 $ref_type = 'internal'; 347 } else { 348 $ref_type = 'external'; 349 $this->logExternalSearch($referer, $ref_type); 350 } 351 } else { 352 $ref = ''; 353 $ref_md5 = ''; 354 $ref_type = ''; 355 } 356 357 $page = $INPUT->str('p'); 358 $ip = clientIP(true); 359 $sx = $INPUT->int('sx'); 360 $sy = $INPUT->int('sy'); 361 $vx = $INPUT->int('vx'); 362 $vy = $INPUT->int('vy'); 363 $js = $INPUT->int('js'); 364 $user = $INPUT->server->str('REMOTE_USER'); 365 $session = $this->getSession(); 366 367 $this->db->exec( 368 'INSERT INTO access ( 369 dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type, 370 screen_x, screen_y, view_x, view_y, js, user, session, uid 371 ) VALUES ( 372 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 373 ?, ?, ?, ?, ?, ?, ?, ? 374 )', 375 $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 376 $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid 377 ); 378 379 if ($ref_md5) { 380 $this->db->exec( 381 'INSERT OR IGNORE INTO refseen ( 382 ref_md5, dt 383 ) VALUES ( 384 ?, CURRENT_TIMESTAMP 385 )', 386 $ref_md5 387 ); 388 } 389 390 // log group access 391 if (isset($USERINFO['grps'])) { 392 $this->logGroups('view', $USERINFO['grps']); 393 } 394 395 // resolve the IP 396 $this->logIp(clientIP(true)); 397 } 398 399 /** 400 * Log access to a media file 401 * 402 * Called from action.php 403 * 404 * @param string $media The media ID 405 * @param string $mime The media's mime type 406 * @param bool $inline Is this displayed inline? 407 * @param int $size Size of the media file 408 */ 409 public function logMedia(string $media, string $mime, bool $inline, int $size): void 410 { 411 global $INPUT; 412 413 [$mime1, $mime2] = explode('/', strtolower($mime)); 414 $inline = $inline ? 1 : 0; 415 $size = (int)$size; 416 417 $ip = clientIP(true); 418 $user = $INPUT->server->str('REMOTE_USER'); 419 $session = $this->getSession(); 420 421 $this->db->exec( 422 'INSERT INTO media ( 423 dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid, 424 size, mime1, mime2, inline 425 ) VALUES ( 426 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 427 ?, ?, ?, ? 428 )', 429 $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 430 $user, $session, $this->uid, $size, $mime1, $mime2, $inline 431 ); 432 } 433 434 /** 435 * Log page edits 436 * 437 * @param string $page The page that was edited 438 * @param string $type The type of edit (create, edit, etc.) 439 */ 440 public function logEdit(string $page, string $type): void 441 { 442 global $INPUT, $USERINFO; 443 444 $ip = clientIP(true); 445 $user = $INPUT->server->str('REMOTE_USER'); 446 $session = $this->getSession(); 447 448 $this->db->exec( 449 'INSERT INTO edits ( 450 dt, page, type, ip, user, session, uid 451 ) VALUES ( 452 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ? 453 )', 454 $page, $type, $ip, $user, $session, $this->uid 455 ); 456 457 // log group access 458 if (isset($USERINFO['grps'])) { 459 $this->logGroups('edit', $USERINFO['grps']); 460 } 461 } 462 463 /** 464 * Log login/logoffs and user creations 465 * 466 * @param string $type The type of login event (login, logout, create) 467 * @param string $user The username (optional, will use current user if empty) 468 */ 469 public function logLogin(string $type, string $user = ''): void 470 { 471 global $INPUT; 472 473 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 474 475 $ip = clientIP(true); 476 $session = $this->getSession(); 477 478 $this->db->exec( 479 'INSERT INTO logins ( 480 dt, type, ip, user, session, uid 481 ) VALUES ( 482 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 483 )', 484 $type, $ip, $user, $session, $this->uid 485 ); 486 } 487 488 /** 489 * Log the current page count and size as today's history entry 490 */ 491 public function logHistoryPages(): void 492 { 493 global $conf; 494 495 // use the popularity plugin's search method to find the wanted data 496 /** @var helper_plugin_popularity $pop */ 497 $pop = plugin_load('helper', 'popularity'); 498 $list = []; 499 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 500 $page_count = $list['file_count']; 501 $page_size = $list['file_size']; 502 503 $this->db->exec( 504 'INSERT OR REPLACE INTO history ( 505 info, value, dt 506 ) VALUES ( 507 ?, ?, CURRENT_TIMESTAMP 508 )', 509 'page_count', $page_count 510 ); 511 $this->db->exec( 512 'INSERT OR REPLACE INTO history ( 513 info, value, dt 514 ) VALUES ( 515 ?, ?, CURRENT_TIMESTAMP 516 )', 517 'page_size', $page_size 518 ); 519 } 520 521 /** 522 * Log the current media count and size as today's history entry 523 */ 524 public function logHistoryMedia(): void 525 { 526 global $conf; 527 528 // use the popularity plugin's search method to find the wanted data 529 /** @var helper_plugin_popularity $pop */ 530 $pop = plugin_load('helper', 'popularity'); 531 $list = []; 532 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 533 $media_count = $list['file_count']; 534 $media_size = $list['file_size']; 535 536 $this->db->exec( 537 'INSERT OR REPLACE INTO history ( 538 info, value, dt 539 ) VALUES ( 540 ?, ?, CURRENT_TIMESTAMP 541 )', 542 'media_count', $media_count 543 ); 544 $this->db->exec( 545 'INSERT OR REPLACE INTO history ( 546 info, value, dt 547 ) VALUES ( 548 ?, ?, CURRENT_TIMESTAMP 549 )', 550 'media_size', $media_size 551 ); 552 } 553} 554