1<?php
2
3/**
4 * Basic antispam features for the DokuWiki Linkback Plugin.
5 *
6 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
7 * @author     Gina Haeussge <osd@foosel.net>
8 * @link       http://wiki.foosel.net/snippets/dokuwiki/linkback
9 */
10
11class action_plugin_linkback_antispam extends DokuWiki_Action_Plugin {
12
13    /**
14     * Register the eventhandlers.
15     */
16    function register(Doku_Event_Handler $controller) {
17        $controller->register_hook('ACTION_LINKBACK_RECEIVED', 'BEFORE', $this, 'handle_linkback_received', array ());
18    }
19
20    /**
21     * Handler for the ACTION_LINKBACK_RECEIVED event.
22     */
23    function handle_linkback_received(Doku_Event $event, $param) {
24        $linkback = $event->data['trackback_data'];
25        $page = $event->data['page'];
26        $target = $event->data['target'];
27
28        if ($this->getConf('antispam_linkcount_enable') && !$this->_clean_linkcount($linkback['raw_excerpt'])) {
29            $event->data['log'][] = "\tLinkcount exceeded, marked as spam";
30            $event->data['show'] = false;
31            if (!$this->getConf('antispam_linkcount_moderate'))
32                $event->preventDefault();
33            else
34                $event->data['log'][] = "\t -> moderated";
35        } else {
36            $event->data['log'][] = "\tLinkcount ok, marked as ham";
37        }
38
39        if ($this->getConf('antispam_wordblock_enable') && !$this->_clean_wordblock($linkback['raw_excerpt'])) {
40            $event->data['log'][] = "\tWordblock active, marked as spam";
41            $event->data['show'] = false;
42            if (!$this->getConf('antispam_wordblock_moderate'))
43                $event->preventDefault();
44            else
45                $event->data['log'][] = "\t -> moderated";
46        } else {
47            $event->data['log'][] = "\tWordblock ok, marked as ham";
48        }
49
50        if ($this->getConf('antispam_host_enable') && !$this->_clean_host($linkback['url'], $linkback['submitter_ip'])) {
51            $event->data['log'][] = "\tHosts do not match, marked as spam";
52            $event->data['show'] = false;
53            if (!$this->getConf('antispam_host_moderate'))
54                $event->preventDefault();
55            else
56                $event->data['log'][] = "\t -> moderated";
57        } else {
58            $event->data['log'][] = "\tHosts ok, marked as ham";
59        }
60
61        if ($this->getConf('antispam_link_enable') && !$this->_clean_link($target, $page, $linkback['type'])) {
62            $event->data['log'][] = "\tURL not contained in linking page, marked as spam";
63            $event->data['show'] = false;
64            if (!$this->getConf('antispam_link_moderate'))
65                $event->preventDefault();
66            else
67                $event->data['log'][] = "\t -> moderated";
68        } else {
69            $event->data['log'][] = "\tURL found in linking page, marked as ham";
70        }
71    }
72
73    /**
74     * Check against linkcount limit.
75     */
76    function _clean_linkcount($excerpt) {
77        $regex = '!<a\s.*?</a>!is';
78        if (preg_match($regex, $excerpt) > $this->getConf('antispam_linkcount_max'))
79            return false;
80        return true;
81    }
82
83    /**
84     * Check against wordblock.
85     */
86    function _clean_wordblock($excerpt) {
87        global $TEXT;
88
89        $otext = $TEXT;
90        $TEXT = $excerpt;
91        $retval = checkwordblock();
92        $TEXT = $otext;
93
94        return !$retval;
95    }
96
97    /**
98     * Check whether source host matches requesting host.
99     */
100    function _clean_host($sourceUri, $remote_addr) {
101        $urlparts = parse_url($sourceUri);
102        $source_addr = gethostbyname($urlparts['host']);
103        return ($source_addr == $remote_addr);
104    }
105
106    /**
107     * Check whether linking page contains link to us.
108     *
109     * Only used for trackbacks (pingbacks get this treatment right on arrival
110     * for excerpt extraction anyway...)
111     */
112    function _clean_link($targetUri, $page, $type) {
113        if ($type == 'pingback')
114            return true;
115
116        $searchurl = preg_quote($targetUri, '!');
117        $regex = '!<a[^>]+?href="' . $searchurl . '"[^>]*?>(.*?)</a>!is';
118        $regex2 = '!\s(' . $searchurl . ')\s!is';
119        if (!preg_match($regex, $page['body'], $match) && !preg_match($regex2, $page['body'], $match)) {
120            if ($this->getConf('ping_internal') && (strstr($targetUri, DOKU_URL) == $targetUri)) {
121                $ID = substr($_SERVER['PATH_INFO'], 1);
122                $searchurl = preg_quote(wl($ID), '!');
123
124                $regex = '!<a[^>]+?href="' . $searchurl . '"[^>]*?>(.*?)</a>!is';
125                $regex2 = '!\s(' . $searchurl . ')\s!is';
126                if (!preg_match($regex, $page['body'], $match) && !preg_match($regex2, $page['body'], $match))
127                    return false;
128            } else {
129                return false;
130            }
131        }
132
133        return true;
134    }
135}
136