1<?php 2/** 3 * @author Myron Turner <turnermm02@shaw.ca> 4 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 5*/ 6class admin_plugin_xtern extends DokuWiki_Admin_Plugin { 7 private $dnld = false; 8 private $check = false; 9 private $wikiRoot; 10 private $dir = NULL; 11 private $accumulator = null; 12 private $broken = array(); 13 private $review = false; 14 private $headers; 15 private $debug_handle = null; 16 function __construct() { 17 $this->wikiRoot = realpath (DOKU_INC. 'data/pages'); 18 $this->accumulator = metaFN('xtern:accumulator','.ser'); 19 // $this->debug_handle=fopen(DOKU_INC.'xtern.txt', 'wb'); 20 } 21 22 function handle() { 23 24 if (!isset($_REQUEST['cmd'])) return; // first time - nothing to do 25 26 $this->output = 'invalid'; 27 28 if (!checkSecurityToken()) return; 29 if (!is_array($_REQUEST['cmd'])) return; 30 31 switch (key($_REQUEST['cmd'])) { 32 case 'check_links' : 33 $this->output = 'check_links'; 34 $this->check = true; 35 if(!empty($_REQUEST['dir'])) { 36 $this->dir = $_REQUEST['dir']; 37 } 38 break; 39 case 'download' : 40 $this->output = 'download'; 41 $this->dnld = true; 42 break; 43 case 'review_links' : 44 $this->output = 'reviews'; 45 $this->review = true; 46 } 47 48 //msg(__DIR__); 49 } 50 51 /** 52 * output appropriate html 53 */ 54 function html() { 55 56 $max_time = $this->getConf('max_time'); 57 $ini_max = ini_get('max_execution_time'); 58 $max_time = $max_time > $ini_max ? $max_time : $ini_max; 59 60 $this->buttons($max_time); 61 62 if($this->check) { 63 $this->check_links($max_time); 64 } 65 else if ($this->dnld) { 66 $this->downloadPem(); 67 } 68 else if($this->review ) { 69 $this->review_links(); 70 } 71 } 72 73 function check_links($max_time) { 74 set_time_limit($max_time); 75 $this->disable_ob(); 76 $this->headers = array(); 77 $this->buttons($max_time,$this->dir); 78 if(isset($this->dir)){ 79 $dir = trim($this->dir,':'); 80 $dir = str_replace(':', '/', $dir); 81 $dir = $this->wikiRoot . '/' . $dir; 82 } 83 else $dir = $this->wikiRoot; 84 ptln('<div id="xtern_chklnk"><hr>'); 85 echo "Checking: $dir<br />"; 86 usleep(300000); 87 $site = $this->scanDirectories($dir); 88 echo "Checking links\n<br />"; 89 echo "<table>\n"; 90 usleep(300000); 91 foreach($site AS $entry=>$data) { 92 $handle = fopen($data['path'], "r"); 93 if ($handle) { 94 $this->parse_dwfile($handle,$data['id'],$data['path']); 95 fclose($handle); 96 } 97 } 98 ptln("</table><b>DONE</b>"); 99 ptln('</div>' . NL); 100 io_saveFile($this->accumulator,serialize($this->broken)) ; 101 // fclose($this->debug_handle); 102 } 103 function review_links() { 104 $reviews_ar = unserialize(io_readFile($this->accumulator ,false)) ; 105 set_time_limit($max_time); 106 $this->disable_ob(); 107 $this->buttons($max_timer); 108 109 ptln('<div id="xtern_review"><hr>'); 110 ptln('<table>'); 111 foreach($reviews_ar as $id=>$errors) { 112 ptln("<tr><th>$id</th></tr>"); 113 114 foreach($errors as $error) { 115 $this->do_check($error, "", $id); 116 } 117 } 118 ptln("</table><b>DONE</b>"); 119 ptln('</div>' . NL); 120 } 121 122 function buttons($max_time = "",$ns="") { 123 ptln('<div id="xtern_info">'); 124 echo $this->locale_xhtml('header'); 125 ptln('</div>'); 126 //$ns = isset($this->dir) ? $this->dir : ""; 127 ptln('<div id="xtern_adminform">' .NL); 128 ptln('<form action="'.wl($ID).'" method="post">'); 129 // output hidden values to ensure dokuwiki will return back to this plugin 130 ptln(' <input type="hidden" name="do" value="admin" />'); 131 ptln(' <input type="hidden" name="page" value="'.$this->getPluginName().'" />'); 132 formSecurityToken(); 133 ptln(' <input type="submit" name="cmd[download]" class = "xtern_font" value="'.$this->getLang('btn_download').'" />'); 134 ptln(' <input type="submit" name="cmd[check_links]" class = "xtern_font" value="'.$this->getLang('btn_check_links').'" />'); 135 ptln(' <label><span class="xtern_font">' .$this->getLang('ns').'</span> '); 136 ptln(' <input type="textbox" name="dir" value="' . $ns . '" /></label> '); 137 ptln(' <input type="submit" name="cmd[review_links]" class = "xtern_font" value="'.$this->getLang('btn_review').'" />'); 138 ptln(' <input type="button" class = "xtern_info_but" value = "'. $this->getLang('info_show') . '">'); 139 ptln('</form>'); 140 if($max_time) { 141 ptln('<br />' . $this->getLang('max_time') . ": $max_time"); 142 } 143 ptln('</div>'); 144 } 145 /** 146 * @ $id wiki page 147 * @ $url broken link address 148 */ 149 function local_url($id,$url) { 150 $id = trim($id,':'); 151 $url = rawurlencode($url); 152 $id = str_replace(array('"', "'"),array(""),$id); 153 return " <a href='". DOKU_URL ."doku.php?id=$id&xtern_url=$url&do=edit' target = 'xtern_xtern' class='wikilink1'>$id</a>"; 154 } 155 function add_broken($id,$url) { 156 $id = trim($id,':'); 157 if(!isset($this->broken[$id])) { 158 $this->broken[$id] = array(); 159 } 160 $this->broken[$id][] = $url; 161 } 162 163 function parse_dwfile($handle="",$id, $path) { 164 $in_code = false; 165 $in_file = false; 166 $lineno = 0; 167 while (!feof($handle)) { 168 $lineno++; 169 $buffer = fgets($handle); 170 if($in_code) { 171 if(preg_match("#<\/code>#",$buffer)) { 172 $in_code = false; 173 } 174 else continue; 175 } 176 if($in_file) { 177 if(preg_match("#\<\/file>#",$buffer)) { 178 $in_file = false; 179 } 180 else continue; 181 } 182 if(preg_match("#^\s*\<code.*?>#",$buffer)) { 183 $in_code=true; 184 continue; 185 } 186 if(preg_match("#^\s*\<file.*?>#",$buffer)) { 187 $in_file=true; 188 continue; 189 } 190 if(preg_match("#\<nowiki>#",$buffer)) { 191 if(preg_match('#\<nowiki>.*?https?:\/\/.*?\<\/nowiki\>#', $buffer)) { 192 continue; 193 } 194 } 195 if(preg_match("#\[?(https?://\S+)\]?#",$buffer,$matches)) { 196 197 preg_match_all("#https?://\S+#",$buffer,$submatches); 198 $num_urls = count($submatches[0]); 199 if($num_urls > 1) { 200 foreach($submatches[0] as $link) { 201 $link = preg_replace("#[^\w\#\?\/]+$#m","",$link); 202 $this->do_check($link,$lineno,$id); 203 } 204 } 205 else { 206 $this->do_check($matches[1],$lineno,$id); 207 } 208 } 209 } 210 } 211 function do_check($url, $lineno = "",$id = "") { 212 $url = trim($url,' )(\\;:-!"\'.,'); 213 list($url,$rest) = explode('|',$url); 214 $header = $id ? "<tr><th>$id</th></tr>" : ""; 215 216 if(strpos($url, '{{') !== false || strpos($url, '}}') !== false) { 217 if(preg_match("#\{\{https?://(.*?)\}\}#", $url,$submatches)) { 218 $url = $submatches[1]; 219 $url = "submatches: $url"; 220 } 221 else return ""; 222 } 223 $url = trim($url,']'); 224 $status = $this->link_check($url); 225 if($status !="200" && $status !="300" && $status != "301" && $status != "0") { 226 $link =$this->local_url($id,$url); 227 $len = strlen($url); 228 if(strlen($url) > 1024) { 229 $status = "414"; 230 } 231 if($lineno) { 232 $this->add_broken($id,$url); 233 if(!isset($this->headers[$id])) { 234 ptln($header); 235 $this->headers[$id] = 1; 236 } 237 } 238 $trunc = substr($url,0,512); 239 if(strlen($trunc) > strlen($url)) { 240 $url .= '. . .'; 241 } 242 ptln('<tr><td>'); 243 echo $status .": $link:\n<br />"; 244 usleep(300000); 245 if($lineno) { 246 echo ' line' . " $lineno: $url" . "\n"; 247 } 248 else { 249 echo " <u>". $this->getLang('bad_link') . ":</u> $url\n"; 250 } 251 ptln('</td></tr>'); 252 usleep(300000); 253 } 254 } 255 256 function link_check($url) { 257 $url = trim($url, ' "\'' ); 258 $url=html_entity_decode($url); 259 $ch = curl_init($url); 260 // curl --remote-name --time-cond cacert.pem https://curl.haxx.se/ca/cacert.pem 261 if($this->getConf('ca_required')) { 262 curl_setopt($ch, CURLOPT_CAINFO, __DIR__ . "/ca/cacert.pem"); 263 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 1); 264 } 265 curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); 266 curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1); 267 curl_setopt($ch, CURLOPT_MAXREDIRS, 5); 268 curl_setopt($ch,CURLOPT_TIMEOUT,15); 269 curl_setopt($ch, CURLOPT_NOBODY, 1); //just fetch headers 270 $output = curl_exec($ch); 271 $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 272 $curl_errno = curl_errno($ch); 273 if($curl_errno) { 274 return "Curl Erro: " .curl_errno($ch) . "--" . curl_error($ch); 275 } 276 curl_close($ch); 277 return trim("$httpcode"); 278 } 279 280 /*https://stackoverflow.com/questions/1281140/run-process-with-realtime-output-in-php/5956708#5956708 */ 281 function disable_ob() { 282 // Turn off output buffering 283 ini_set('output_buffering', 'off'); 284 // Turn off PHP output compression 285 // ini_set('zlib.output_compression', false); 286 // Implicitly flush the buffer(s) 287 ini_set('implicit_flush', true); 288 ob_implicit_flush(true); 289 // Clear, and turn off output buffering 290 while (ob_get_level() > 0) { 291 // Get the curent level 292 $level = ob_get_level(); 293 // End the buffering 294 ob_end_clean(); 295 // If the current level has not changed, abort 296 if (ob_get_level() == $level) break; 297 } 298 // Disable apache output buffering/compression 299 if (function_exists('apache_setenv')) { 300 apache_setenv('no-gzip', '1'); 301 apache_setenv('dont-vary', '1'); 302 } 303 } 304 /* http://php.net/manual/en/function.scandir.php#80057 */ 305 function scanDirectories($rootDir, $allData=array()) { 306 // set filenames invisible if you want 307 $invisibleFileNames = array(".", "..", ".htaccess", ".htpasswd"); 308 // run through content of root directory 309 $dirContent = scandir($rootDir); 310 foreach($dirContent as $key => $content) { 311 // filter all files not accessible 312 $path = $rootDir.'/'.$content; 313 // echo "$content\n<br />"; 314 if(!is_dir($path)) { 315 $ext = pathinfo ( $path,PATHINFO_EXTENSION); 316 if($ext !='txt') continue; 317 } 318 if(!in_array($content, $invisibleFileNames) ) { 319 // if content is file & readable, add to array 320 if(is_file($path) && is_readable($path)) { 321 // save file name with path 322 $ns = preg_replace('#' . preg_quote($this->wikiRoot) . '#', "", $path); 323 $ns = str_replace(array('/','\\\\','.txt'), array(':',':'), $ns); 324 $allData[] = array('path'=>$path,'file'=>$content, 'id'=>$ns); 325 // if content is a directory and readable, add path and name 326 }elseif(is_dir($path) && is_readable($path)) { 327 // recursive callback to open new directory 328 $allData = $this->scanDirectories($path, $allData); 329 } 330 } 331 } 332 return $allData; 333 } 334 335 function downloadPem() { 336 @set_time_limit(60); 337 $SavePath = DOKU_INC . 'lib/plugins/xtern/ca/cacert.pem'; 338 $url = "https://curl.haxx.se/ca/cacert.pem"; 339 io_makeFileDir($SavePath); 340 $http = new DokuHTTPClient(); 341 $http->max_bodysize = 32777216; 342 $http->timeout = 120; 343 $http->keep_alive = false; 344 345 $data = $http->get($url); 346 if(!$data) { 347 $this->say('download failed', $url); 348 return; 349 } 350 351 $fp = @fopen($SavePath,'wb'); 352 if($fp === false) { 353 $this->say('write_fail', $SavePath); 354 return; 355 } 356 if(!fwrite($fp,$data)) { 357 $this->say('write_fail', $SavePath); 358 return; 359 } 360 fclose($fp); 361 $this->say('file_saved', $SavePath); 362 363} 364 365 function say(){ 366 $args = func_get_args(); 367 echo vsprintf("%s: %s\n",$args); 368 ob_flush(); 369 } 370 371 function write_debug($data) { 372 return; 373 if(!$this->debug_handle) return; 374 fwrite($this->debug_handle, "$data\n"); 375} 376 377 378}