1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\HTTP\DokuHTTPClient; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use dokuwiki\Utf8\Clean; 12use helper_plugin_popularity; 13use helper_plugin_statistics; 14 15 16class Logger 17{ 18 /** @var helper_plugin_statistics The statistics helper plugin instance */ 19 protected helper_plugin_statistics $hlp; 20 21 /** @var SQLiteDB The SQLite database instance */ 22 protected SQLiteDB $db; 23 24 /** @var string The full user agent string */ 25 protected string $uaAgent; 26 27 /** @var string The type of user agent (browser, robot, feedreader) */ 28 protected string $uaType = 'browser'; 29 30 /** @var string The browser/client name */ 31 protected string $uaName; 32 33 /** @var string The browser/client version */ 34 protected string $uaVersion; 35 36 /** @var string The operating system/platform */ 37 protected string $uaPlatform; 38 39 /** @var string The unique user identifier */ 40 protected string $uid; 41 42 /** @var DokuHTTPClient|null The HTTP client instance for testing */ 43 protected ?DokuHTTPClient $httpClient = null; 44 45 46 /** 47 * Constructor 48 * 49 * Parses browser info and set internal vars 50 */ 51 public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null) 52 { 53 global $INPUT; 54 55 $this->hlp = $hlp; 56 $this->db = $this->hlp->getDB(); 57 $this->httpClient = $httpClient; 58 59 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 60 61 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 62 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 63 $dd->discardBotInformation(); 64 $dd->parse(); 65 66 if ($dd->isFeedReader()) { 67 $this->uaType = 'feedreader'; 68 } else if ($dd->isBot()) { 69 $this->uaType = 'robot'; 70 71 // for now ignore bots 72 throw new \RuntimeException('Bot detected, not logging'); 73 } 74 75 $this->uaAgent = $ua; 76 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 77 $this->uaVersion = $dd->getClient('version') ?: '0'; 78 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 79 $this->uid = $this->getUID(); 80 81 82 $this->logLastseen(); 83 } 84 85 /** 86 * Should be called before logging 87 * 88 * This starts a transaction, so all logging is done in one go 89 */ 90 public function begin(): void 91 { 92 $this->hlp->getDB()->getPdo()->beginTransaction(); 93 } 94 95 /** 96 * Should be called after logging 97 * 98 * This commits the transaction started in begin() 99 */ 100 public function end(): void 101 { 102 $this->hlp->getDB()->getPdo()->commit(); 103 } 104 105 /** 106 * Get the unique user ID 107 * 108 * @return string The unique user identifier 109 */ 110 protected function getUID(): string 111 { 112 global $INPUT; 113 114 $uid = $INPUT->str('uid'); 115 if (!$uid) $uid = get_doku_pref('plgstats', false); 116 if (!$uid) $uid = session_id(); 117 return $uid; 118 } 119 120 /** 121 * Return the user's session ID 122 * 123 * This is usually our own managed session, not a PHP session (only in fallback) 124 * 125 * @return string The session identifier 126 */ 127 protected function getSession(): string 128 { 129 global $INPUT; 130 131 $ses = $INPUT->str('ses'); 132 if (!$ses) $ses = get_doku_pref('plgstatsses', false); 133 if (!$ses) $ses = session_id(); 134 return $ses; 135 } 136 137 /** 138 * Log that we've seen the user (authenticated only) 139 */ 140 public function logLastseen(): void 141 { 142 global $INPUT; 143 144 if (empty($INPUT->server->str('REMOTE_USER'))) return; 145 146 $this->db->exec( 147 'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)', 148 $INPUT->server->str('REMOTE_USER'), 149 ); 150 } 151 152 /** 153 * Log actions by groups 154 * 155 * @param string $type The type of access to log ('view','edit') 156 * @param array $groups The groups to log 157 */ 158 public function logGroups(string $type, array $groups): void 159 { 160 if (!$groups) return; 161 162 $toLog = (array)$this->hlp->getConf('loggroups'); 163 $groups = array_intersect($groups, $toLog); 164 if (!$groups) return; 165 166 $placeholders = join(',', array_fill(0, count($groups), '(?, ?)')); 167 $params = []; 168 $sql = "INSERT INTO groups (`type`, `group`) VALUES $placeholders"; 169 foreach ($groups as $group) { 170 $params[] = $type; 171 $params[] = $group; 172 } 173 $sql = rtrim($sql, ','); 174 $this->db->exec($sql, $params); 175 } 176 177 /** 178 * Log external search queries 179 * 180 * Will not write anything if the referer isn't a search engine 181 * 182 * @param string $referer The HTTP referer URL 183 * @param string $type Reference to the type variable that will be modified 184 */ 185 public function logExternalSearch(string $referer, string &$type): void 186 { 187 global $INPUT; 188 189 $searchEngine = new SearchEngines($referer); 190 191 if (!$searchEngine->isSearchEngine()) { 192 return; // not a search engine 193 } 194 195 $type = 'search'; 196 $query = $searchEngine->getQuery(); 197 198 // log it! 199 $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*')); 200 $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine()); 201 } 202 203 /** 204 * Log search data to the search related tables 205 * 206 * @param string $page The page being searched from 207 * @param string $query The search query 208 * @param array $words Array of search words 209 * @param string $engine The search engine name 210 */ 211 public function logSearch(string $page, string $query, array $words, string $engine): void 212 { 213 $sid = $this->db->exec( 214 'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)', 215 $page, $query, $engine 216 ); 217 if (!$sid) return; 218 219 foreach ($words as $word) { 220 if (!$word) continue; 221 $this->db->exec( 222 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 223 $sid, $word 224 ); 225 } 226 } 227 228 /** 229 * Log that the session was seen 230 * 231 * This is used to calculate the time people spend on the whole site 232 * during their session 233 * 234 * Viewcounts are used for bounce calculation 235 * 236 * @param int $addview set to 1 to count a view 237 */ 238 public function logSession(int $addview = 0): void 239 { 240 // only log browser sessions 241 if ($this->uaType != 'browser') return; 242 243 $session = $this->getSession(); 244 $this->db->exec( 245 'INSERT OR REPLACE INTO session ( 246 session, dt, end, views, uid 247 ) VALUES ( 248 ?, 249 CURRENT_TIMESTAMP, 250 CURRENT_TIMESTAMP, 251 COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?), 252 ? 253 )', 254 $session, $session, $addview, $addview, $this->uid 255 ); 256 } 257 258 /** 259 * Resolve IP to country/city and store in database 260 * 261 * @param string $ip The IP address to resolve 262 */ 263 public function logIp(string $ip): void 264 { 265 // check if IP already known and up-to-date 266 $result = $this->db->queryValue( 267 "SELECT ip 268 FROM iplocation 269 WHERE ip = ? 270 AND lastupd > date('now', '-30 days')", 271 $ip 272 ); 273 if ($result) return; 274 275 $http = $this->httpClient ?: new DokuHTTPClient(); 276 $http->timeout = 10; 277 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 278 279 if (!$json) return; // FIXME log error 280 try { 281 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 282 } catch (\JsonException $e) { 283 return; // FIXME log error 284 } 285 if (!isset($data['status']) || $data['status'] !== 'success') { 286 return; // FIXME log error 287 } 288 289 $host = gethostbyaddr($ip); 290 $this->db->exec( 291 'INSERT OR REPLACE INTO iplocation ( 292 ip, country, code, city, host, lastupd 293 ) VALUES ( 294 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 295 )', 296 $ip, $data['country'], $data['countryCode'], $data['city'], $host 297 ); 298 } 299 300 /** 301 * Log a click on an external link 302 * 303 * Called from log.php 304 */ 305 public function logOutgoing(): void 306 { 307 global $INPUT; 308 309 if (!$INPUT->str('ol')) return; 310 311 $link = $INPUT->str('ol'); 312 $link_md5 = md5($link); 313 $session = $this->getSession(); 314 $page = $INPUT->str('p'); 315 316 $this->db->exec( 317 'INSERT INTO outlinks ( 318 dt, session, page, link_md5, link 319 ) VALUES ( 320 CURRENT_TIMESTAMP, ?, ?, ?, ? 321 )', 322 $session, $page, $link_md5, $link 323 ); 324 } 325 326 /** 327 * Log a page access 328 * 329 * Called from log.php 330 */ 331 public function logAccess(): void 332 { 333 global $INPUT, $USERINFO; 334 335 if (!$INPUT->str('p')) return; 336 337 # FIXME check referer against blacklist and drop logging for bad boys 338 339 // handle referer 340 $referer = trim($INPUT->str('r')); 341 if ($referer) { 342 $ref = $referer; 343 $ref_md5 = md5($referer); 344 if (str_starts_with($referer, DOKU_URL)) { 345 $ref_type = 'internal'; 346 } else { 347 $ref_type = 'external'; 348 $this->logExternalSearch($referer, $ref_type); 349 } 350 } else { 351 $ref = ''; 352 $ref_md5 = ''; 353 $ref_type = ''; 354 } 355 356 $page = $INPUT->str('p'); 357 $ip = clientIP(true); 358 $sx = $INPUT->int('sx'); 359 $sy = $INPUT->int('sy'); 360 $vx = $INPUT->int('vx'); 361 $vy = $INPUT->int('vy'); 362 $js = $INPUT->int('js'); 363 $user = $INPUT->server->str('REMOTE_USER'); 364 $session = $this->getSession(); 365 366 $this->db->exec( 367 'INSERT INTO access ( 368 dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type, 369 screen_x, screen_y, view_x, view_y, js, user, session, uid 370 ) VALUES ( 371 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 372 ?, ?, ?, ?, ?, ?, ?, ? 373 )', 374 $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 375 $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid 376 ); 377 378 if ($ref_md5) { 379 $this->db->exec( 380 'INSERT OR IGNORE INTO refseen ( 381 ref_md5, dt 382 ) VALUES ( 383 ?, CURRENT_TIMESTAMP 384 )', 385 $ref_md5 386 ); 387 } 388 389 // log group access 390 if (isset($USERINFO['grps'])) { 391 $this->logGroups('view', $USERINFO['grps']); 392 } 393 394 // resolve the IP 395 $this->logIp(clientIP(true)); 396 } 397 398 /** 399 * Log access to a media file 400 * 401 * Called from action.php 402 * 403 * @param string $media The media ID 404 * @param string $mime The media's mime type 405 * @param bool $inline Is this displayed inline? 406 * @param int $size Size of the media file 407 */ 408 public function logMedia(string $media, string $mime, bool $inline, int $size): void 409 { 410 global $INPUT; 411 412 [$mime1, $mime2] = explode('/', strtolower($mime)); 413 $inline = $inline ? 1 : 0; 414 $size = (int)$size; 415 416 $ip = clientIP(true); 417 $user = $INPUT->server->str('REMOTE_USER'); 418 $session = $this->getSession(); 419 420 $this->db->exec( 421 'INSERT INTO media ( 422 dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid, 423 size, mime1, mime2, inline 424 ) VALUES ( 425 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 426 ?, ?, ?, ? 427 )', 428 $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 429 $user, $session, $this->uid, $size, $mime1, $mime2, $inline 430 ); 431 } 432 433 /** 434 * Log page edits 435 * 436 * @param string $page The page that was edited 437 * @param string $type The type of edit (create, edit, etc.) 438 */ 439 public function logEdit(string $page, string $type): void 440 { 441 global $INPUT, $USERINFO; 442 443 $ip = clientIP(true); 444 $user = $INPUT->server->str('REMOTE_USER'); 445 $session = $this->getSession(); 446 447 $this->db->exec( 448 'INSERT INTO edits ( 449 dt, page, type, ip, user, session, uid 450 ) VALUES ( 451 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ? 452 )', 453 $page, $type, $ip, $user, $session, $this->uid 454 ); 455 456 // log group access 457 if (isset($USERINFO['grps'])) { 458 $this->logGroups('edit', $USERINFO['grps']); 459 } 460 } 461 462 /** 463 * Log login/logoffs and user creations 464 * 465 * @param string $type The type of login event (login, logout, create) 466 * @param string $user The username (optional, will use current user if empty) 467 */ 468 public function logLogin(string $type, string $user = ''): void 469 { 470 global $INPUT; 471 472 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 473 474 $ip = clientIP(true); 475 $session = $this->getSession(); 476 477 $this->db->exec( 478 'INSERT INTO logins ( 479 dt, type, ip, user, session, uid 480 ) VALUES ( 481 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 482 )', 483 $type, $ip, $user, $session, $this->uid 484 ); 485 } 486 487 /** 488 * Log the current page count and size as today's history entry 489 */ 490 public function logHistoryPages(): void 491 { 492 global $conf; 493 494 // use the popularity plugin's search method to find the wanted data 495 /** @var helper_plugin_popularity $pop */ 496 $pop = plugin_load('helper', 'popularity'); 497 $list = []; 498 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 499 $page_count = $list['file_count']; 500 $page_size = $list['file_size']; 501 502 $this->db->exec( 503 'INSERT OR REPLACE INTO history ( 504 info, value, dt 505 ) VALUES ( 506 ?, ?, CURRENT_TIMESTAMP 507 )', 508 'page_count', $page_count 509 ); 510 $this->db->exec( 511 'INSERT OR REPLACE INTO history ( 512 info, value, dt 513 ) VALUES ( 514 ?, ?, CURRENT_TIMESTAMP 515 )', 516 'page_size', $page_size 517 ); 518 } 519 520 /** 521 * Log the current media count and size as today's history entry 522 */ 523 public function logHistoryMedia(): void 524 { 525 global $conf; 526 527 // use the popularity plugin's search method to find the wanted data 528 /** @var helper_plugin_popularity $pop */ 529 $pop = plugin_load('helper', 'popularity'); 530 $list = []; 531 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 532 $media_count = $list['file_count']; 533 $media_size = $list['file_size']; 534 535 $this->db->exec( 536 'INSERT OR REPLACE INTO history ( 537 info, value, dt 538 ) VALUES ( 539 ?, ?, CURRENT_TIMESTAMP 540 )', 541 'media_count', $media_count 542 ); 543 $this->db->exec( 544 'INSERT OR REPLACE INTO history ( 545 info, value, dt 546 ) VALUES ( 547 ?, ?, CURRENT_TIMESTAMP 548 )', 549 'media_size', $media_size 550 ); 551 } 552} 553