1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\HTTP\DokuHTTPClient; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use dokuwiki\Utf8\Clean; 12use dokuwiki\Utf8\PhpString; 13use helper_plugin_popularity; 14use helper_plugin_statistics; 15 16 17class Logger 18{ 19 /** @var helper_plugin_statistics The statistics helper plugin instance */ 20 protected helper_plugin_statistics $hlp; 21 22 /** @var SQLiteDB The SQLite database instance */ 23 protected SQLiteDB $db; 24 25 /** @var string The full user agent string */ 26 protected string $uaAgent; 27 28 /** @var string The type of user agent (browser, robot, feedreader) */ 29 protected string $uaType = 'browser'; 30 31 /** @var string The browser/client name */ 32 protected string $uaName; 33 34 /** @var string The browser/client version */ 35 protected string $uaVersion; 36 37 /** @var string The operating system/platform */ 38 protected string $uaPlatform; 39 40 /** @var string The unique user identifier */ 41 protected string $uid; 42 43 44 /** 45 * Constructor 46 * 47 * Parses browser info and set internal vars 48 */ 49 public function __construct(helper_plugin_statistics $hlp) 50 { 51 global $INPUT; 52 53 $this->hlp = $hlp; 54 $this->db = $this->hlp->getDB(); 55 56 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 57 58 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 59 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 60 $dd->discardBotInformation(); 61 $dd->parse(); 62 63 if ($dd->isBot()) { 64 $this->uaType = 'robot'; 65 66 // for now ignore bots 67 throw new \RuntimeException('Bot detected, not logging'); 68 } 69 70 $this->uaAgent = $ua; 71 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')); 72 $this->uaVersion = $dd->getClient('version'); 73 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')); 74 $this->uid = $this->getUID(); 75 76 if ($dd->isFeedReader()) { 77 $this->uaType = 'feedreader'; 78 } 79 80 $this->logLastseen(); 81 } 82 83 /** 84 * Should be called before logging 85 * 86 * This starts a transaction, so all logging is done in one go 87 */ 88 public function begin(): void 89 { 90 $this->hlp->getDB()->getPdo()->beginTransaction(); 91 } 92 93 /** 94 * Should be called after logging 95 * 96 * This commits the transaction started in begin() 97 */ 98 public function end(): void 99 { 100 $this->hlp->getDB()->getPdo()->commit(); 101 } 102 103 /** 104 * Get the unique user ID 105 * 106 * @return string The unique user identifier 107 */ 108 protected function getUID(): string 109 { 110 global $INPUT; 111 112 $uid = $INPUT->str('uid'); 113 if (!$uid) $uid = get_doku_pref('plgstats', false); 114 if (!$uid) $uid = session_id(); 115 return $uid; 116 } 117 118 /** 119 * Return the user's session ID 120 * 121 * This is usually our own managed session, not a PHP session (only in fallback) 122 * 123 * @return string The session identifier 124 */ 125 protected function getSession(): string 126 { 127 global $INPUT; 128 129 $ses = $INPUT->str('ses'); 130 if (!$ses) $ses = get_doku_pref('plgstatsses', false); 131 if (!$ses) $ses = session_id(); 132 return $ses; 133 } 134 135 /** 136 * Log that we've seen the user (authenticated only) 137 */ 138 public function logLastseen(): void 139 { 140 global $INPUT; 141 142 if (empty($INPUT->server->str('REMOTE_USER'))) return; 143 144 $this->db->exec( 145 'REPLACE INTO lastseen (user, dt) VALUES (?, CURRENT_TIMESTAMP)', 146 $INPUT->server->str('REMOTE_USER'), 147 ); 148 } 149 150 /** 151 * Log actions by groups 152 * 153 * @param string $type The type of access to log ('view','edit') 154 * @param array $groups The groups to log 155 */ 156 public function logGroups(string $type, array $groups): void 157 { 158 if (!is_array($groups)) { 159 return; 160 } 161 162 $tolog = (array)$this->hlp->getConf('loggroups'); 163 $groups = array_intersect($groups, $tolog); 164 if ($groups === []) { 165 return; 166 } 167 168 169 $params = []; 170 $sql = "INSERT INTO groups (`type`, `group`) VALUES "; 171 foreach ($groups as $group) { 172 $sql .= '(?, ?),'; 173 $params[] = $type; 174 $params[] = $group; 175 } 176 $sql = rtrim($sql, ','); 177 $this->db->exec($sql, $params); 178 } 179 180 /** 181 * Log external search queries 182 * 183 * Will not write anything if the referer isn't a search engine 184 * 185 * @param string $referer The HTTP referer URL 186 * @param string $type Reference to the type variable that will be modified 187 */ 188 public function logExternalSearch(string $referer, string &$type): void 189 { 190 global $INPUT; 191 192 $searchEngine = new SearchEngines($referer); 193 194 if (!$searchEngine->isSearchEngine()) { 195 return; // not a search engine 196 } 197 198 $type = 'search'; 199 $query = $searchEngine->getQuery(); 200 201 // log it! 202 $words = explode(' ', Clean::stripspecials($query, ' ', '\._\-:\*')); 203 $this->logSearch($INPUT->str('p'), $query, $words, $searchEngine->getEngine()); 204 } 205 206 /** 207 * Log search data to the search related tables 208 * 209 * @param string $page The page being searched from 210 * @param string $query The search query 211 * @param array $words Array of search words 212 * @param string $engine The search engine name 213 */ 214 public function logSearch(string $page, string $query, array $words, string $engine): void 215 { 216 $sid = $this->db->exec( 217 'INSERT INTO search (dt, page, query, engine) VALUES (CURRENT_TIMESTAMP, ?, ?, ?)', 218 $page, $query, $engine 219 ); 220 if (!$sid) return; 221 222 foreach ($words as $word) { 223 if (!$word) continue; 224 $this->db->exec( 225 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 226 $sid, $word 227 ); 228 } 229 } 230 231 /** 232 * Log that the session was seen 233 * 234 * This is used to calculate the time people spend on the whole site 235 * during their session 236 * 237 * Viewcounts are used for bounce calculation 238 * 239 * @param int $addview set to 1 to count a view 240 */ 241 public function logSession(int $addview = 0): void 242 { 243 // only log browser sessions 244 if ($this->uaType != 'browser') return; 245 246 $session = $this->getSession(); 247 $this->db->exec( 248 'INSERT OR REPLACE INTO session ( 249 session, dt, end, views, uid 250 ) VALUES ( 251 ?, 252 CURRENT_TIMESTAMP, 253 CURRENT_TIMESTAMP, 254 COALESCE((SELECT views FROM session WHERE session = ?) + ?, ?), 255 ? 256 )', 257 $session, $session, $addview, $addview, $this->uid 258 ); 259 } 260 261 /** 262 * Resolve IP to country/city and store in database 263 * 264 * @param string $ip The IP address to resolve 265 */ 266 public function logIp(string $ip): void 267 { 268 // check if IP already known and up-to-date 269 $result = $this->db->queryValue( 270 "SELECT ip 271 FROM iplocation 272 WHERE ip = ? 273 AND lastupd > date('now', '-30 days')", 274 $ip 275 ); 276 if ($result) return; 277 278 $http = new DokuHTTPClient(); 279 $http->timeout = 10; 280 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 281 282 if (!$json) return; // FIXME log error 283 try { 284 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 285 } catch (\JsonException $e) { 286 return; // FIXME log error 287 } 288 289 $host = gethostbyaddr($ip); 290 $this->db->exec( 291 'INSERT OR REPLACE INTO iplocation ( 292 ip, country, code, city, host, lastupd 293 ) VALUES ( 294 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 295 )', 296 $ip, $data['country'], $data['countryCode'], $data['city'], $host 297 ); 298 } 299 300 /** 301 * Log a click on an external link 302 * 303 * Called from log.php 304 */ 305 public function logOutgoing(): void 306 { 307 global $INPUT; 308 309 if (!$INPUT->str('ol')) return; 310 311 $link = $INPUT->str('ol'); 312 $link_md5 = md5($link); 313 $session = $this->getSession(); 314 $page = $INPUT->str('p'); 315 316 $this->db->exec( 317 'INSERT INTO outlinks ( 318 dt, session, page, link_md5, link 319 ) VALUES ( 320 CURRENT_TIMESTAMP, ?, ?, ?, ? 321 )', 322 $session, $page, $link_md5, $link 323 ); 324 } 325 326 /** 327 * Log a page access 328 * 329 * Called from log.php 330 */ 331 public function logAccess(): void 332 { 333 global $INPUT, $USERINFO; 334 335 if (!$INPUT->str('p')) return; 336 337 # FIXME check referer against blacklist and drop logging for bad boys 338 339 // handle referer 340 $referer = trim($INPUT->str('r')); 341 if ($referer) { 342 $ref = $referer; 343 $ref_md5 = md5($referer); 344 if (str_starts_with($referer, DOKU_URL)) { 345 $ref_type = 'internal'; 346 } else { 347 $ref_type = 'external'; 348 $this->logExternalSearch($referer, $ref_type); 349 } 350 } else { 351 $ref = ''; 352 $ref_md5 = ''; 353 $ref_type = ''; 354 } 355 356 $page = $INPUT->str('p'); 357 $ip = clientIP(true); 358 $sx = $INPUT->int('sx'); 359 $sy = $INPUT->int('sy'); 360 $vx = $INPUT->int('vx'); 361 $vy = $INPUT->int('vy'); 362 $js = $INPUT->int('js'); 363 $user = $INPUT->server->str('REMOTE_USER'); 364 $session = $this->getSession(); 365 366 $this->db->exec( 367 'INSERT INTO access ( 368 dt, page, ip, ua, ua_info, ua_type, ua_ver, os, ref, ref_md5, ref_type, 369 screen_x, screen_y, view_x, view_y, js, user, session, uid 370 ) VALUES ( 371 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 372 ?, ?, ?, ?, ?, ?, ?, ? 373 )', 374 $page, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 375 $ref, $ref_md5, $ref_type, $sx, $sy, $vx, $vy, $js, $user, $session, $this->uid 376 ); 377 378 if ($ref_md5) { 379 $this->db->exec( 380 'INSERT OR IGNORE INTO refseen ( 381 ref_md5, dt 382 ) VALUES ( 383 ?, CURRENT_TIMESTAMP 384 )', 385 $ref_md5 386 ); 387 } 388 389 // log group access 390 if (isset($USERINFO['grps'])) { 391 $this->logGroups('view', $USERINFO['grps']); 392 } 393 394 // resolve the IP 395 $this->logIp(clientIP(true)); 396 } 397 398 /** 399 * Log access to a media file 400 * 401 * Called from action.php 402 * 403 * @param string $media The media ID 404 * @param string $mime The media's mime type 405 * @param bool $inline Is this displayed inline? 406 * @param int $size Size of the media file 407 */ 408 public function logMedia(string $media, string $mime, bool $inline, int $size): void 409 { 410 global $INPUT; 411 412 [$mime1, $mime2] = explode('/', strtolower($mime)); 413 $inline = $inline ? 1 : 0; 414 $size = (int)$size; 415 416 $ip = clientIP(true); 417 $user = $INPUT->server->str('REMOTE_USER'); 418 $session = $this->getSession(); 419 420 $this->db->exec( 421 'INSERT INTO media ( 422 dt, media, ip, ua, ua_info, ua_type, ua_ver, os, user, session, uid, 423 size, mime1, mime2, inline 424 ) VALUES ( 425 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 426 ?, ?, ?, ? 427 )', 428 $media, $ip, $this->uaAgent, $this->uaName, $this->uaType, $this->uaVersion, $this->uaPlatform, 429 $user, $session, $this->uid, $size, $mime1, $mime2, $inline 430 ); 431 } 432 433 /** 434 * Log page edits 435 * 436 * @param string $page The page that was edited 437 * @param string $type The type of edit (create, edit, etc.) 438 */ 439 public function logEdit(string $page, string $type): void 440 { 441 global $INPUT, $USERINFO; 442 443 $ip = clientIP(true); 444 $user = $INPUT->server->str('REMOTE_USER'); 445 $session = $this->getSession(); 446 447 $this->db->exec( 448 'INSERT INTO edits ( 449 dt, page, type, ip, user, session, uid 450 ) VALUES ( 451 CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ? 452 )', 453 $page, $type, $ip, $user, $session, $this->uid 454 ); 455 456 // log group access 457 if (isset($USERINFO['grps'])) { 458 $this->logGroups('edit', $USERINFO['grps']); 459 } 460 } 461 462 /** 463 * Log login/logoffs and user creations 464 * 465 * @param string $type The type of login event (login, logout, create) 466 * @param string $user The username (optional, will use current user if empty) 467 */ 468 public function logLogin(string $type, string $user = ''): void 469 { 470 global $INPUT; 471 472 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 473 474 $ip = clientIP(true); 475 $session = $this->getSession(); 476 477 $this->db->exec( 478 'INSERT INTO logins ( 479 dt, type, ip, user, session, uid 480 ) VALUES ( 481 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 482 )', 483 $type, $ip, $user, $session, $this->uid 484 ); 485 } 486 487 /** 488 * Log the current page count and size as today's history entry 489 */ 490 public function logHistoryPages(): void 491 { 492 global $conf; 493 494 // use the popularity plugin's search method to find the wanted data 495 /** @var helper_plugin_popularity $pop */ 496 $pop = plugin_load('helper', 'popularity'); 497 $list = []; 498 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 499 $page_count = $list['file_count']; 500 $page_size = $list['file_size']; 501 502 $this->db->exec( 503 'INSERT OR REPLACE INTO history ( 504 info, value, dt 505 ) VALUES ( 506 ?, ?, date("now") 507 )', 508 'page_count', $page_count 509 ); 510 $this->db->exec( 511 'INSERT OR REPLACE INTO history ( 512 info, value, dt 513 ) VALUES ( 514 ?, ?, date("now") 515 )', 516 'page_size', $page_size 517 ); 518 } 519 520 /** 521 * Log the current media count and size as today's history entry 522 */ 523 public function logHistoryMedia(): void 524 { 525 global $conf; 526 527 // use the popularity plugin's search method to find the wanted data 528 /** @var helper_plugin_popularity $pop */ 529 $pop = plugin_load('helper', 'popularity'); 530 $list = []; 531 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 532 $media_count = $list['file_count']; 533 $media_size = $list['file_size']; 534 535 $this->db->exec( 536 'INSERT OR REPLACE INTO history ( 537 info, value, dt 538 ) VALUES ( 539 ?, ?, date("now") 540 )', 541 'media_count', $media_count 542 ); 543 $this->db->exec( 544 'INSERT OR REPLACE INTO history ( 545 info, value, dt 546 ) VALUES ( 547 ?, ?, date("now") 548 )', 549 'media_size', $media_size 550 ); 551 } 552} 553