1<?php 2 3/** 4 * Basic antispam features for the DokuWiki Linkback Plugin. 5 * 6 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 7 * @author Gina Haeussge <osd@foosel.net> 8 * @link http://wiki.foosel.net/snippets/dokuwiki/linkback 9 */ 10 11class action_plugin_linkback_antispam extends DokuWiki_Action_Plugin { 12 13 /** 14 * Register the eventhandlers. 15 */ 16 function register(Doku_Event_Handler $controller) { 17 $controller->register_hook('ACTION_LINKBACK_RECEIVED', 'BEFORE', $this, 'handle_linkback_received', array ()); 18 } 19 20 /** 21 * Handler for the ACTION_LINKBACK_RECEIVED event. 22 */ 23 function handle_linkback_received(Doku_Event $event, $param) { 24 $linkback = $event->data['trackback_data']; 25 $page = $event->data['page']; 26 $target = $event->data['target']; 27 28 if ($this->getConf('antispam_linkcount_enable') && !$this->_clean_linkcount($linkback['raw_excerpt'])) { 29 $event->data['log'][] = "\tLinkcount exceeded, marked as spam"; 30 $event->data['show'] = false; 31 if (!$this->getConf('antispam_linkcount_moderate')) 32 $event->preventDefault(); 33 else 34 $event->data['log'][] = "\t -> moderated"; 35 } else { 36 $event->data['log'][] = "\tLinkcount ok, marked as ham"; 37 } 38 39 if ($this->getConf('antispam_wordblock_enable') && !$this->_clean_wordblock($linkback['raw_excerpt'])) { 40 $event->data['log'][] = "\tWordblock active, marked as spam"; 41 $event->data['show'] = false; 42 if (!$this->getConf('antispam_wordblock_moderate')) 43 $event->preventDefault(); 44 else 45 $event->data['log'][] = "\t -> moderated"; 46 } else { 47 $event->data['log'][] = "\tWordblock ok, marked as ham"; 48 } 49 50 if ($this->getConf('antispam_host_enable') && !$this->_clean_host($linkback['url'], $linkback['submitter_ip'])) { 51 $event->data['log'][] = "\tHosts do not match, marked as spam"; 52 $event->data['show'] = false; 53 if (!$this->getConf('antispam_host_moderate')) 54 $event->preventDefault(); 55 else 56 $event->data['log'][] = "\t -> moderated"; 57 } else { 58 $event->data['log'][] = "\tHosts ok, marked as ham"; 59 } 60 61 if ($this->getConf('antispam_link_enable') && !$this->_clean_link($target, $page, $linkback['type'])) { 62 $event->data['log'][] = "\tURL not contained in linking page, marked as spam"; 63 $event->data['show'] = false; 64 if (!$this->getConf('antispam_link_moderate')) 65 $event->preventDefault(); 66 else 67 $event->data['log'][] = "\t -> moderated"; 68 } else { 69 $event->data['log'][] = "\tURL found in linking page, marked as ham"; 70 } 71 } 72 73 /** 74 * Check against linkcount limit. 75 */ 76 function _clean_linkcount($excerpt) { 77 $regex = '!<a\s.*?</a>!is'; 78 if (preg_match($regex, $excerpt) > $this->getConf('antispam_linkcount_max')) 79 return false; 80 return true; 81 } 82 83 /** 84 * Check against wordblock. 85 */ 86 function _clean_wordblock($excerpt) { 87 global $TEXT; 88 89 $otext = $TEXT; 90 $TEXT = $excerpt; 91 $retval = checkwordblock(); 92 $TEXT = $otext; 93 94 return !$retval; 95 } 96 97 /** 98 * Check whether source host matches requesting host. 99 */ 100 function _clean_host($sourceUri, $remote_addr) { 101 $urlparts = parse_url($sourceUri); 102 $source_addr = gethostbyname($urlparts['host']); 103 return ($source_addr == $remote_addr); 104 } 105 106 /** 107 * Check whether linking page contains link to us. 108 * 109 * Only used for trackbacks (pingbacks get this treatment right on arrival 110 * for excerpt extraction anyway...) 111 */ 112 function _clean_link($targetUri, $page, $type) { 113 if ($type == 'pingback') 114 return true; 115 116 $searchurl = preg_quote($targetUri, '!'); 117 $regex = '!<a[^>]+?href="' . $searchurl . '"[^>]*?>(.*?)</a>!is'; 118 $regex2 = '!\s(' . $searchurl . ')\s!is'; 119 if (!preg_match($regex, $page['body'], $match) && !preg_match($regex2, $page['body'], $match)) { 120 if ($this->getConf('ping_internal') && (strstr($targetUri, DOKU_URL) == $targetUri)) { 121 $ID = substr($_SERVER['PATH_INFO'], 1); 122 $searchurl = preg_quote(wl($ID), '!'); 123 124 $regex = '!<a[^>]+?href="' . $searchurl . '"[^>]*?>(.*?)</a>!is'; 125 $regex2 = '!\s(' . $searchurl . ')\s!is'; 126 if (!preg_match($regex, $page['body'], $match) && !preg_match($regex2, $page['body'], $match)) 127 return false; 128 } else { 129 return false; 130 } 131 } 132 133 return true; 134 } 135} 136