1<?php 2if(!defined('DOKU_INC')) die(); 3if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/'); 4require_once(DOKU_PLUGIN.'action.php'); 5 6class action_plugin_robot404 extends DokuWiki_Action_Plugin { 7 function getInfo(){ return conf_loadfile(dirname(__FILE__).'/info.txt'); } 8 function register($contr){ 9 if(static::client_isrobot()){ 10 $contr->register_hook('ACTION_ACT_PREPROCESS','BEFORE',$this,'handle_action'); 11 } 12 else{ 13 #In case our robot detection does not capture a robot, also hook to headers so we can add noindex,nofollow to these pages. 14 $contr->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'handle_header'); 15 $contr->register_hook('ACTION_HEADERS_SEND', 'BEFORE', $this, 'handle_header'); 16 } 17 } 18 function handle_header(&$e){ 19 global $ACT; 20 #looking at $ACT is not enough (e.g, if 'register' action is disabled, ACT becomes 'show'. We also check the original 'do' parameter.) 21 if(!$this->ishiddenpage()&&!$this->isdisabledaction($ACT)&&!$this->isdisabledaction($_REQUEST['do'])) return; 22 if ($e->name == 'TPL_METAHEADER_OUTPUT'){ 23 $found=false; 24 foreach($e->data['meta']?:[] as $key=>$entry){ 25 if($entry['name']=='robots'){ 26 $content=explode(',',$entry['content']); #e.g., convert 'index,follow' to an array. 27 $content=array_diff($content,['index','follow']); #remove 'index' and 'follow' from the array. 28 $content[]='noindex'; #add noindex and nofollow 29 $content[]='nofollow'; 30 #$content[]='addedbyrobot404'; #used for debugging. 31 $entry['content']=implode(',',$content); 32 $e->data['meta'][$key]=$entry; 33 $found=true; 34 } 35 } 36 if(!$found) $e->data['meta'][]=['name'=>'robots','content'=>'noindex,nofollow']; 37 } 38 elseif ($e->name == 'ACTION_HEADERS_SEND') 39 $e->data[] = 'X-Robots-Tag: noindex,nofollow'; 40 } 41 42 static function client_isrobot(){ 43 if(isset($_REQUEST['isrobot404'])&&$_REQUEST['isrobot404']) return true; 44 if(!isset($_SERVER['HTTP_USER_AGENT'])||!$_SERVER['HTTP_USER_AGENT']) return false; 45 if(preg_match('#(Google|msnbot|Yahoo|Rambler|AbachoBOT|accoona|AcioRobot|ASPSeek|CocoCrawler|Dumbot|FAST-WebCrawler|GeonaBot|Gigabot|Lycos|MSRBOT|Scooter|AltaVista|IDBot|eStyle|Scrubby|BaiDuSpider|Baiduspider)#i',$_SERVER['HTTP_USER_AGENT'],$m)) return strtolower($m[1]); 46 #also use the list in: https://stackoverflow.com/questions/677419/how-to-detect-search-engine-bots-with-php 47 if(preg_match('#(bot|crawl|slurp|spider|mediapartners)#i',$_SERVER['HTTP_USER_AGENT'],$m)) return strtolower($m[1]); 48 return false; 49 } 50 51 function ishiddenpage(){ 52 global $ID; 53 return $this->getConf('hiddenpages') && isHiddenPage($ID); 54 } 55 function isdisabledaction($action){ 56 if(is_array($action)){ 57 foreach($action as $act){ 58 if($this->isdisabledaction($act)) return true; 59 } 60 return false; 61 } 62 global $conf; 63 $actions=$conf['disableactions']; 64 if(is_string($actions)) $actions=explode(',',$actions); 65 if(in_array($action,$actions)) return true; 66 67 $actions=$this->getConf('disableactions'); 68 if(is_string($actions)) $actions=explode(',',$actions); 69 if(in_array($action,$actions)) return true; 70 71 return false; 72 } 73 74 function handle_action(&$e){ 75 if(!$this->ishiddenpage() && !$this->isdisabledaction($e->data)) return; #if !hidden and !disabledaction, nothing to do. return. 76 header('HTTP/1.0 404 Not Found'); 77 echo $this->ishiddenpage()?"Hidden page for robots.":"Disallowed action for robots: ".esc($e->data); 78 die(); 79 } 80} 81