1<?php
2/**
3 * Plugin Logstats - J.-F. Lalande
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author Matthias Grimm <matthiasgrimm@users.sourceforge.net>
7 * @author     J.-F. Lalande <jf@lalande.nom.fr>
8 */
9if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../../').'/');
10
11/**
12 * All DokuWiki plugins to extend the parser/rendering mechanism
13 * need to inherit from this class
14 */
15class action_plugin_logstats extends DokuWiki_Action_Plugin {
16
17    /**
18     * Register its handlers with the dokuwiki's event controller
19     */
20    public function register(Doku_Event_Handler $controller) {
21        $controller->register_hook('ACTION_HEADERS_SEND', 'BEFORE', $this, 'logPageAccess');
22        $controller->register_hook('FETCH_MEDIA_STATUS', 'BEFORE', $this, 'logMediaAccess');
23    }
24
25    /**
26     * Logs access to a wiki page (only show mode)
27     *
28     * @param Doku_Event $event
29     */
30    public function logPageAccess($event) {
31        global $ID;
32        global $ACT;
33
34        if($ACT != 'show') return;
35
36        $page = $this->prepareID($ID);
37
38        $crumbs = breadcrumbs(); // get last visited pages
39        $crumbs = array_keys($crumbs); // get raw page IDs
40        array_pop($crumbs); // skip current page
41        $referer = array_pop($crumbs); // get current page's predecessor
42        $referer = ($referer) ? $this->prepareID($referer) : '';
43
44        $size   = @filesize(wikiFN($ID));
45        $status = $size ? 200 : 404;
46
47        $this->logAccess($page, $status, $size, $referer);
48    }
49
50    /**
51     * logs access to a media file
52     *
53     * @param Doku_Event $event
54     */
55    public function logMediaAccess($event) {
56        // don't log external stuff
57        if(preg_match('#^(https?|ftp)://#i', $event->data['media'])) return;
58
59        $media  = $this->prepareID($event->data['media']);
60        $status = $event->data['status'];
61        $size   = @filesize($event->data['file']);
62
63        $this->logAccess("/_media/$media", $status, $size);
64    }
65
66    /**
67     * beautify a wiki page id for the log
68     *
69     * The wiki page id will be transformed to a filename like string
70     * utf8 codes will be encoded.
71     *
72     * @author Matthias Grimm <matthiasgrimm@users.sourceforge.net>
73     * @param string $path wiki page id
74     * @return mixed|string
75     */
76    protected function prepareID($path) {
77        $path = cleanID($path);
78        $path = str_replace(':', '/', $path);
79        $path = utf8_encodeFN($path);
80        return $path;
81    }
82
83    /**
84     * creates a log file entry and writes it to the log
85     *
86     * This function writes access information of the current page to a log
87     * file. It uses the combined log file format that is also used by the
88     * apache web server. A whole bunch of available log analysers could be
89     * used to visualize the log.
90     *
91     *
92     * combined log file format:
93     *     <host> <rfc931> <user> [<timestamp>] "<request>" <error> <filesize>
94     *               "<referer>" "<agent>"\n
95     *
96     * <host>      IP of the client host (we don't do reverse host lookups)
97     * <rfc931>    remote user identification or '-' if not available
98     * <user>      user id or '-' if not available
99     * <timestamp> time in format [01/Dec/2005:22:19:12 +0200]
100     * <request>   Requested protocol, for eg. GET or POST, requested page
101     *             and protocol
102     * <error>     error code from server, for eg. 200 (OK) or 404 (file
103     *             not found)
104     * <filesize>  size of the wiki page (only the bare text)
105     * <referer>   page that called this one. We don't have this information
106     *             and filled the dokuwiki script name in.
107     * <agent>     identifying information that the client browser reports
108     *             about itself
109     *
110     * @author Matthias Grimm <matthiasgrimm@users.sourceforge.net>
111     *
112     * @param  string $page     page name that was called
113     * @param  int    $status   HTTP status code
114     * @param  int    $size     file size
115     * @param  string $referer  predecessor of $page (which page link to $page)
116     *                          Is this field empty, the functions tries to get
117     *                          the referer from the web server (HTTP_REFERER)
118     * @return bool true if a log entry was written
119     */
120    public function logAccess($page, $status, $size, $referer = '') {
121        global $conf;
122
123        $host      = clientIP(true);
124        $user      = isset($_SERVER['REMOTE_USER']) ? $_SERVER['REMOTE_USER'] : "-";
125        $timestamp = date("[d/M/Y:H:i:s O]");
126        $method    = isset($_SERVER['REQUEST_METHOD']) ? $_SERVER['REQUEST_METHOD'] : "";
127        $protocol  = isset($_SERVER['SERVER_PROTOCOL']) ? $_SERVER['SERVER_PROTOCOL'] : "";
128        $agent     = isset($_SERVER['HTTP_USER_AGENT']) ? $_SERVER['HTTP_USER_AGENT'] : "";
129
130        // We have to check if this agent is not banned
131        if($this->getConf('banned_agents') != "") {
132            $tmp_array_agents_banned = explode(',', $this->getConf('banned_agents'));
133            foreach($tmp_array_agents_banned as $agents_banned) {
134                if(stristr($agent, $agents_banned) !== false) {
135                    return false; // exit the function, nothing have to be written
136                }
137            }
138        }
139
140        // We have to check if this IP is not banned
141        if($this->getConf('banned_ip') != "") {
142            $tmp_array_ip_banned = explode(',', $this->getConf('banned_ip'));
143            foreach($tmp_array_ip_banned as $ip_banned) {
144                if(strcmp($host, $ip_banned) == 0) {
145                    return false; // exit the function, nothing have to be written
146                }
147            }
148        }
149
150        // Banned some users
151        if($this->getConf('banned_users') != "") {
152            $tmp_array_users_banned = explode(',', $this->getConf('banned_users'));
153            foreach($tmp_array_users_banned as $users_banned) {
154                if(strcmp($user, $users_banned) == 0) {
155                    return false; // exit the function, nothing have to be written
156                }
157            }
158        }
159
160        // Analyzing referer
161        if($referer == "") {
162            //echo "referrer: " . $_SERVER['HTTP_REFERER'];
163            if(isset($_SERVER['HTTP_REFERER'])) {
164                $cnt = preg_match('/\?id=((\w+\:*)+)/i', $_SERVER['HTTP_REFERER'], $match);
165                if($cnt == 1) {
166                    $referer = $this->prepareID($match[1]);
167                } else {
168                    $referer = $_SERVER['HTTP_REFERER'];
169                }
170            }
171        }
172        $logline = "$host - $user $timestamp \"$method $page $protocol\" $status $size \"$referer\" \"$agent\"\n";
173
174        // determine log, relative paths resolve to meta dir
175        $dir = dirname($this->getConf('accesslog'));
176        $log = basename($this->getConf('accesslog'));
177        if($dir == '.' || $dir == '' || !is_dir($dir)){
178            $dir = fullpath($conf['metadir'].'/'.$dir);
179        }
180
181        return io_saveFile("$dir/$log", $logline, true);
182    }
183
184} // End of class
185
186