1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\HTTP\DokuHTTPClient; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use dokuwiki\Utf8\Clean; 12use dokuwiki\Utf8\PhpString; 13use helper_plugin_popularity; 14use helper_plugin_statistics; 15 16 17class Logger 18{ 19 /** @var helper_plugin_statistics The statistics helper plugin instance */ 20 protected helper_plugin_statistics $hlp; 21 22 /** @var SQLiteDB The SQLite database instance */ 23 protected SQLiteDB $db; 24 25 /** @var string The full user agent string */ 26 protected string $uaAgent; 27 28 /** @var string The type of user agent (browser, robot, feedreader) */ 29 protected string $uaType = 'browser'; 30 31 /** @var string The browser/client name */ 32 protected string $uaName; 33 34 /** @var string The browser/client version */ 35 protected string $uaVersion; 36 37 /** @var string The operating system/platform */ 38 protected string $uaPlatform; 39 40 /** @var string The unique user identifier */ 41 protected string $uid; 42 43 44 /** 45 * Constructor 46 * 47 * Parses browser info and set internal vars 48 */ 49 public function __construct(helper_plugin_statistics $hlp) 50 { 51 global $INPUT; 52 53 $this->hlp = $hlp; 54 $this->db = $this->hlp->getDB(); 55 56 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 57 58 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 59 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 60 $dd->discardBotInformation(); 61 $dd->parse(); 62 63 if ($dd->isBot()) { 64 $this->uaType = 'robot'; 65 66 // for now ignore bots 67 throw new \RuntimeException('Bot detected, not logging'); 68 } 69 70 $this->uaAgent = $ua; 71 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')); 72 $this->uaVersion = $dd->getClient('version'); 73 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')); 74 $this->uid = $this->getUID(); 75 76 if ($dd->isFeedReader()) { 77 $this->uaType = 'feedreader'; 78 } 79 80 $this->logLastseen(); 81 } 82 83 /** 84 * Should be called before logging 85 * 86 * This starts a transaction, so all logging is done in one go 87 */ 88 public function begin(): void 89 { 90 $this->hlp->getDB()->getPdo()->beginTransaction(); 91 } 92 93 /** 94 * Should be called after logging 95 * 96 * This commits the transaction started in begin() 97 */ 98 public function end(): void 99 { 100 $this->hlp->getDB()->getPdo()->commit(); 101 } 102 103 /** 104 * Get the unique user ID 105 * 106 * @return string The unique user identifier 107 */ 108 protected function getUID(): string 109 { 110 global $INPUT; 111 112 $uid = $INPUT->str('uid'); 113 if (!$uid) $uid = get_doku_pref('plgstats', false); 114 if (!$uid) $uid = session_id(); 115 return $uid; 116 } 117 118 /** 119 * Return the user's session ID 120 * 121 * This is usually our own managed session, not a PHP session (only in fallback) 122 * 123 * @return string The session identifier 124 */ 125 protected function getSession(): string 126 { 127 global $INPUT; 128 129 $ses = $INPUT->str('ses'); 130 if (!$ses) $ses = get_doku_pref('plgstatsses', false); 131 if (!$ses) $ses = session_id(); 132 return $ses; 133 } 134 135 /** 136 * Log that we've seen the user (authenticated only) 137 */ 138 public function logLastseen(): void 139 { 140 global $INPUT; 141 142 if (empty($INPUT->server->str('REMOTE_USER'))) return; 143 144 $this->db->exec( 145 'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)', 146 $INPUT->server->str('REMOTE_USER'), 147 ); 148 } 149 150 /** 151 * Log actions by groups 152 * 153 * @param string $type The type of access to log ('view','edit') 154 * @param array $groups The groups to log 155 */ 156 public function logGroups(string $type, array $groups): void 157 { 158 if (!is_array($groups)) { 159 return; 160 } 161 162 $tolog = (array)$this->hlp->getConf('loggroups'); 163 $groups = array_intersect($groups, $tolog); 164 if ($groups === []) { 165 return; 166 } 167 168 169 $params = []; 170 $sql = "INSERT INTO groups (`type`, `group`) VALUES "; 171 foreach ($groups as $group) { 172 $sql .= '(?, ?),'; 173 $params[] = $type; 174 $params[] = $group; 175 } 176 $sql = rtrim($sql, ','); 177 $this->db->exec($sql, $params); 178 } 179 180 /** 181 * Log external search queries 182 * 183 * Will not write anything if the referer isn't a search engine 184 * 185 * @param string $referer The HTTP referer URL 186 * @param string $type Reference to the type variable that will be modified 187 */ 188 public function logExternalSearch(string $referer, string &$type): void 189 { 190 global $INPUT; 191 192 $searchEngine = new SearchEngines($referer); 193 194 if (!$searchEngine->isSearchEngine()) { 195 return; // not a search engine 196 } 197 198 $type = 'search'; 199 $query = $searchEngine->getQuery(); 200 201 // log it! 202 $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*')); 203 $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine()); 204 } 205 206 /** 207 * Log search data to the search related tables 208 * 209 * @param string $page The page being searched from 210 * @param string $query The search query 211 * @param array $words Array of search words 212 * @param string $engine The search engine name 213 */ 214 public function logSearch(string $page, string $query, array $words, string $engine): void 215 { 216 $sid = $this->db->exec( 217 'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)', 218 $page, $query, $engine 219 ); 220 if (!$sid) return; 221 222 foreach ($words as $word) { 223 if (!$word) continue; 224 $this->db->exec( 225 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 226 $sid, $word 227 ); 228 } 229 } 230 231 /** 232 * Log that the session was seen 233 * 234 * This is used to calculate the time people spend on the whole site 235 * during their session 236 * 237 * Viewcounts are used for bounce calculation 238 * 239 * @param int $addview set to 1 to count a view 240 */ 241 public function logSession(int $addview = 0): void 242 { 243 // only log browser sessions 244 if ($this->uaType != 'browser') return; 245 246 $session = $this->getSession(); 247 $this->db->exec( 248 'INSERT OR REPLACE INTO session ( 249 session, dt, end, views, uid 250 ) VALUES ( 251 ?, 252 CURRENT_TIMESTAMP, 253 CURRENT_TIMESTAMP, 254 COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?), 255 ? 256 )', 257 $session, $session, $addview, $addview, $this->uid 258 ); 259 } 260 261 /** 262 * Resolve IP to country/city and store in database 263 * 264 * @param string $ip The IP address to resolve 265 */ 266 public function logIp(string $ip): void 267 { 268 // check if IP already known and up-to-date 269 $result = $this->db->queryValue( 270 "SELECT ip 271 FROM iplocation 272 WHERE ip = ? 273 AND lastupd > date('now', '-30 days')", 274 $ip 275 ); 276 if ($result) return; 277 278 $http = new DokuHTTPClient(); 279 $http->timeout = 10; 280 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 281 282 if (!$json) return; // FIXME log error 283 try { 284 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 285 } catch (\JsonException $e) { 286 return; // FIXME log error 287 } 288 if(!isset($data['status']) || $data['status'] !== 'success') { 289 return; // FIXME log error 290 } 291 292 $host = gethostbyaddr($ip); 293 $this->db->exec( 294 'INSERT OR REPLACE INTO iplocation ( 295 ip, country, code, city, host, lastupd 296 ) VALUES ( 297 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 298 )', 299 $ip, $data['country'], $data['countryCode'], $data['city'], $host 300 ); 301 } 302 303 /** 304 * Log a click on an external link 305 * 306 * Called from log.php 307 */ 308 public function logOutgoing(): void 309 { 310 global $INPUT; 311 312 if (!$INPUT->str('ol')) return; 313 314 $link = $INPUT->str('ol'); 315 $link_md5 = md5($link); 316 $session = $this->getSession(); 317 $page = $INPUT->str('p'); 318 319 $this->db->exec( 320 'INSERT INTO outlinks ( 321 dt, session, page, link_md5, link 322 ) VALUES ( 323 CURRENT_TIMESTAMP, ?, ?, ?, ? 324 )', 325 $session, $page, $link_md5, $link 326 ); 327 } 328 329 /** 330 * Log a page access 331 * 332 * Called from log.php 333 */ 334 public function logAccess(): void 335 { 336 global $INPUT, $USERINFO; 337 338 if (!$INPUT->str('p')) return; 339 340 # FIXME check referer against blacklist and drop logging for bad boys 341 342 // handle referer 343 $referer = trim($INPUT->str('r')); 344 if ($referer) { 345 $ref = $referer; 346 $ref_md5 = md5($referer); 347 if (str_starts_with($referer, DOKU_URL)) { 348 $ref_type = 'internal'; 349 } else { 350 $ref_type = 'external'; 351 $this->logExternalSearch($referer, $ref_type); 352 } 353 } else { 354 $ref = ''; 355 $ref_md5 = ''; 356 $ref_type = ''; 357 } 358 359 $page = $INPUT->str('p'); 360 $ip = clientIP(true); 361 $sx = $INPUT->int('sx'); 362 $sy = $INPUT->int('sy'); 363 $vx = $INPUT->int('vx'); 364 $vy = $INPUT->int('vy'); 365 $js = $INPUT->int('js'); 366 $user = $INPUT->server->str('REMOTE_USER'); 367 $session = $this->getSession(); 368 369 $this->db->exec( 370 'INSERT INTO access ( 371 dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type, 372 screen_x, screen_y, view_x, view_y, js, user, session, uid 373 ) VALUES ( 374 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 375 ?, ?, ?, ?, ?, ?, ?, ? 376 )', 377 $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 378 $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid 379 ); 380 381 if ($ref_md5) { 382 $this->db->exec( 383 'INSERT OR IGNORE INTO refseen ( 384 ref_md5, dt 385 ) VALUES ( 386 ?, CURRENT_TIMESTAMP 387 )', 388 $ref_md5 389 ); 390 } 391 392 // log group access 393 if (isset($USERINFO['grps'])) { 394 $this->logGroups('view', $USERINFO['grps']); 395 } 396 397 // resolve the IP 398 $this->logIp(clientIP(true)); 399 } 400 401 /** 402 * Log access to a media file 403 * 404 * Called from action.php 405 * 406 * @param string $media The media ID 407 * @param string $mime The media's mime type 408 * @param bool $inline Is this displayed inline? 409 * @param int $size Size of the media file 410 */ 411 public function logMedia(string $media, string $mime, bool $inline, int $size): void 412 { 413 global $INPUT; 414 415 [$mime1, $mime2] = explode('/', strtolower($mime)); 416 $inline = $inline ? 1 : 0; 417 $size = (int)$size; 418 419 $ip = clientIP(true); 420 $user = $INPUT->server->str('REMOTE_USER'); 421 $session = $this->getSession(); 422 423 $this->db->exec( 424 'INSERT INTO media ( 425 dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid, 426 size, mime1, mime2, inline 427 ) VALUES ( 428 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 429 ?, ?, ?, ? 430 )', 431 $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 432 $user, $session, $this->uid, $size, $mime1, $mime2, $inline 433 ); 434 } 435 436 /** 437 * Log page edits 438 * 439 * @param string $page The page that was edited 440 * @param string $type The type of edit (create, edit, etc.) 441 */ 442 public function logEdit(string $page, string $type): void 443 { 444 global $INPUT, $USERINFO; 445 446 $ip = clientIP(true); 447 $user = $INPUT->server->str('REMOTE_USER'); 448 $session = $this->getSession(); 449 450 $this->db->exec( 451 'INSERT INTO edits ( 452 dt, page, type, ip, user, session, uid 453 ) VALUES ( 454 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ? 455 )', 456 $page, $type, $ip, $user, $session, $this->uid 457 ); 458 459 // log group access 460 if (isset($USERINFO['grps'])) { 461 $this->logGroups('edit', $USERINFO['grps']); 462 } 463 } 464 465 /** 466 * Log login/logoffs and user creations 467 * 468 * @param string $type The type of login event (login, logout, create) 469 * @param string $user The username (optional, will use current user if empty) 470 */ 471 public function logLogin(string $type, string $user = ''): void 472 { 473 global $INPUT; 474 475 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 476 477 $ip = clientIP(true); 478 $session = $this->getSession(); 479 480 $this->db->exec( 481 'INSERT INTO logins ( 482 dt, type, ip, user, session, uid 483 ) VALUES ( 484 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 485 )', 486 $type, $ip, $user, $session, $this->uid 487 ); 488 } 489 490 /** 491 * Log the current page count and size as today's history entry 492 */ 493 public function logHistoryPages(): void 494 { 495 global $conf; 496 497 // use the popularity plugin's search method to find the wanted data 498 /** @var helper_plugin_popularity $pop */ 499 $pop = plugin_load('helper', 'popularity'); 500 $list = []; 501 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 502 $page_count = $list['file_count']; 503 $page_size = $list['file_size']; 504 505 $this->db->exec( 506 'INSERT OR REPLACE INTO history ( 507 info, value, dt 508 ) VALUES ( 509 ?, ?, date("now") 510 )', 511 'page_count', $page_count 512 ); 513 $this->db->exec( 514 'INSERT OR REPLACE INTO history ( 515 info, value, dt 516 ) VALUES ( 517 ?, ?, date("now") 518 )', 519 'page_size', $page_size 520 ); 521 } 522 523 /** 524 * Log the current media count and size as today's history entry 525 */ 526 public function logHistoryMedia(): void 527 { 528 global $conf; 529 530 // use the popularity plugin's search method to find the wanted data 531 /** @var helper_plugin_popularity $pop */ 532 $pop = plugin_load('helper', 'popularity'); 533 $list = []; 534 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 535 $media_count = $list['file_count']; 536 $media_size = $list['file_size']; 537 538 $this->db->exec( 539 'INSERT OR REPLACE INTO history ( 540 info, value, dt 541 ) VALUES ( 542 ?, ?, date("now") 543 )', 544 'media_count', $media_count 545 ); 546 $this->db->exec( 547 'INSERT OR REPLACE INTO history ( 548 info, value, dt 549 ) VALUES ( 550 ?, ?, date("now") 551 )', 552 'media_size', $media_size 553 ); 554 } 555} 556