1<?php
2/**
3 *   @author Myron Turner <turnermm02@shaw.ca>
4 *   @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
5*/
6class admin_plugin_xtern extends DokuWiki_Admin_Plugin {
7	private $dnld = false;
8	private $check = false;
9	private  $wikiRoot;
10	private  $dir = NULL;
11	private   $accumulator = null;
12	private $broken = array();
13	private $review = false;
14	private $headers;
15    private  $debug_handle = null;
16    function __construct() {
17		$this->wikiRoot = realpath (DOKU_INC. 'data/pages');
18		$this->accumulator = metaFN('xtern:accumulator','.ser');
19      //  $this->debug_handle=fopen(DOKU_INC.'xtern.txt', 'wb');
20	}
21
22    function handle() {
23
24      if (!isset($_REQUEST['cmd'])) return;   // first time - nothing to do
25
26      $this->output = 'invalid';
27
28      if (!checkSecurityToken()) return;
29      if (!is_array($_REQUEST['cmd'])) return;
30
31      switch (key($_REQUEST['cmd'])) {
32        case 'check_links' :
33		    $this->output = 'check_links';
34			$this->check = true;
35            if(!empty($_REQUEST['dir'])) {
36                $this->dir = $_REQUEST['dir'];
37            }
38			break;
39        case 'download' :
40	  	    $this->output = 'download';
41			$this->dnld = true;
42             break;
43	    case 'review_links' :
44  	  	    $this->output = 'reviews';
45	    	$this->review = true;
46      }
47
48	  //msg(__DIR__);
49    }
50
51    /**
52     * output appropriate html
53     */
54    function html() {
55
56	  $max_time =  $this->getConf('max_time');
57	  $ini_max = ini_get('max_execution_time');
58	  $max_time = $max_time >  $ini_max ?  $max_time : $ini_max;
59
60	  $this->buttons($max_time);
61
62	  if($this->check) {
63	      $this->check_links($max_time);
64	  }
65	  else if ($this->dnld) {
66		  $this->downloadPem();
67	  }
68	else if($this->review ) {
69          $this->review_links();
70      }
71    }
72
73	     function check_links($max_time) {
74		   set_time_limit($max_time);
75		  $this->disable_ob();
76		   $this->headers = array();
77		   $this->buttons($max_time,$this->dir);
78			if(isset($this->dir)){
79                $dir = trim($this->dir,':');
80                $dir = str_replace(':', '/', $dir);
81                $dir = $this->wikiRoot . '/' . $dir;
82            }
83            else $dir = $this->wikiRoot;
84             ptln('<div id="xtern_chklnk"><hr>');
85			echo "Checking: $dir<br />";
86		    usleep(300000);
87			$site = $this->scanDirectories($dir);
88			echo "Checking links\n<br />";
89            echo "<table>\n";
90			 usleep(300000);
91			foreach($site AS $entry=>$data) {
92				  $handle = fopen($data['path'], "r");
93				 if ($handle) {
94					$this->parse_dwfile($handle,$data['id'],$data['path']);
95					fclose($handle);
96				 }
97			}
98           ptln("</table><b>DONE</b>");
99           ptln('</div>' . NL);
100		   io_saveFile($this->accumulator,serialize($this->broken)) ;
101              // fclose($this->debug_handle);
102	}
103    function review_links()  {
104          $reviews_ar = unserialize(io_readFile($this->accumulator ,false)) ;
105   		   set_time_limit($max_time);
106		  $this->disable_ob();
107		   $this->buttons($max_timer);
108
109         ptln('<div id="xtern_review"><hr>');
110         ptln('<table>');
111		 foreach($reviews_ar as $id=>$errors) {
112		           ptln("<tr><th>$id</th></tr>");
113
114                   foreach($errors as $error) {
115                       $this->do_check($error, "", $id);
116				   }
117       }
118           ptln("</table><b>DONE</b>");
119           ptln('</div>' . NL);
120      }
121
122     function buttons($max_time = "",$ns="") {
123          ptln('<div id="xtern_info">');
124          echo $this->locale_xhtml('header');
125		  ptln('</div>');
126          //$ns = isset($this->dir) ? $this->dir : "";
127          ptln('<div id="xtern_adminform">' .NL);
128          ptln('<form action="'.wl($ID).'" method="post">');
129          // output hidden values to ensure dokuwiki will return back to this plugin
130          ptln('  <input type="hidden" name="do"   value="admin" />');
131          ptln('  <input type="hidden" name="page" value="'.$this->getPluginName().'" />');
132          formSecurityToken();
133          ptln('  <input type="submit" name="cmd[download]" class  = "xtern_font" value="'.$this->getLang('btn_download').'" />');
134          ptln('&nbsp;  <input type="submit" name="cmd[check_links]" class  = "xtern_font" value="'.$this->getLang('btn_check_links').'" />');
135          ptln('  <label><span class="xtern_font">' .$this->getLang('ns').'</span> ');
136          ptln(' <input type="textbox" name="dir"  value="' . $ns . '" /></label>&nbsp;');
137          ptln('&nbsp;  <input type="submit" name="cmd[review_links]" class  = "xtern_font" value="'.$this->getLang('btn_review').'" />');
138          ptln('&nbsp;  <input type="button"  class = "xtern_info_but" value = "'. $this->getLang('info_show') . '">');
139          ptln('</form>');
140          if($max_time) {
141			    ptln('<br />' . $this->getLang('max_time') . ":  $max_time");
142		  }
143          ptln('</div>');
144     }
145     /**
146	  *   @ $id  wiki page
147	  *   @	 $url  broken link address
148	 */
149     function local_url($id,$url) {
150          $id = trim($id,':');
151		  $url = rawurlencode($url);
152          $id = str_replace(array('"', "'"),array(""),$id);
153              return " <a href='". DOKU_URL ."doku.php?id=$id&xtern_url=$url&do=edit' target = 'xtern_xtern' class='wikilink1'>$id</a>";
154           }
155	function add_broken($id,$url) {
156         $id = trim($id,':');
157		if(!isset($this->broken[$id])) {
158			$this->broken[$id] = array();
159		}
160		$this->broken[$id][] = $url;
161	}
162
163    		function parse_dwfile($handle="",$id, $path) {
164           $in_code = false;
165           $in_file = false;
166           $lineno = 0;
167		   while (!feof($handle)) {
168               $lineno++;
169				$buffer = fgets($handle);
170                if($in_code) {
171                    if(preg_match("#<\/code>#",$buffer)) {
172                        $in_code = false;
173                    }
174                    else continue;
175                }
176                if($in_file) {
177                    if(preg_match("#\<\/file>#",$buffer)) {
178                        $in_file = false;
179                    }
180                    else continue;
181                }
182                if(preg_match("#^\s*\<code.*?>#",$buffer)) {
183                    $in_code=true;
184                    continue;
185                }
186                  if(preg_match("#^\s*\<file.*?>#",$buffer)) {
187                    $in_file=true;
188                    continue;
189                }
190                 if(preg_match("#\<nowiki>#",$buffer)) {
191                       if(preg_match('#\<nowiki>.*?https?:\/\/.*?\<\/nowiki\>#', $buffer)) {
192                          continue;
193                       }
194                }
195                 if(preg_match("#\[?(https?://\S+)\]?#",$buffer,$matches)) {
196
197                       preg_match_all("#https?://\S+#",$buffer,$submatches);
198					   $num_urls = count($submatches[0]);
199					   if($num_urls > 1) {
200						   foreach($submatches[0] as $link) {
201							  $link = preg_replace("#[^\w\#\?\/]+$#m","",$link);
202							  $this->do_check($link,$lineno,$id);
203						   }
204					   }
205                       else {
206						   $this->do_check($matches[1],$lineno,$id);
207					   }
208                 }
209              }
210           }
211           function do_check($url, $lineno = "",$id = "") {
212                    $url = trim($url,' )(\\;:-!"\'.,');
213					list($url,$rest) = explode('|',$url);
214                    $header = $id ? "<tr><th>$id</th></tr>" :  "";
215
216                    if(strpos($url, '{{') !== false || strpos($url, '}}') !== false) {
217                        if(preg_match("#\{\{https?://(.*?)\}\}#", $url,$submatches)) {
218                            $url = $submatches[1];
219                            $url = "submatches: $url";
220                        }
221                        else return "";
222                    }
223                    $url = trim($url,']');
224					$status =   $this->link_check($url);
225					if($status !="200" && $status !="300"  && $status != "301"  && $status != "0") {
226                       $link =$this->local_url($id,$url);
227                       $len = strlen($url);
228                        if(strlen($url) > 1024)  {
229                            $status = "414";
230                        }
231                           if($lineno) {
232						   $this->add_broken($id,$url);
233							   if(!isset($this->headers[$id])) {
234								   ptln($header);
235								   $this->headers[$id] = 1;
236							   }
237						   }
238                           $trunc = substr($url,0,512);
239                           if(strlen($trunc) > strlen($url)) {
240                               $url .= '.  .  .';
241                           }
242                           ptln('<tr><td>');
243					    	echo $status .":  $link:\n<br />";
244						   usleep(300000);
245                           if($lineno) {
246						       echo '&nbsp;&nbsp;&nbsp;&nbsp;line' . " $lineno:&nbsp;$url" . "\n";
247                           }
248                           else {
249                               echo "&nbsp;&nbsp;&nbsp;&nbsp;<u>". $this->getLang('bad_link') . ":</u> $url\n";
250                           }
251                       ptln('</td></tr>');
252						   usleep(300000);
253					}
254		}
255
256		  function link_check($url) {
257			 $url = trim($url, ' "\'' );
258			$url=html_entity_decode($url);
259			$ch = curl_init($url);
260			// curl --remote-name --time-cond cacert.pem https://curl.haxx.se/ca/cacert.pem
261	        if($this->getConf('ca_required')) {
262                curl_setopt($ch, CURLOPT_CAINFO, __DIR__ . "/ca/cacert.pem");
263                curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 1);
264	        }
265			curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
266			curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1);
267			curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
268			curl_setopt($ch,CURLOPT_TIMEOUT,15);
269            curl_setopt($ch, CURLOPT_NOBODY, 1); //just fetch headers
270			$output = curl_exec($ch);
271			$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
272            $curl_errno = curl_errno($ch);
273			if($curl_errno) {
274				return "Curl Erro: " .curl_errno($ch) .  "--" . curl_error($ch);
275		    }
276			curl_close($ch);
277			return trim("$httpcode");
278		 }
279
280     /*https://stackoverflow.com/questions/1281140/run-process-with-realtime-output-in-php/5956708#5956708 */
281	function disable_ob() {
282        // Turn off output buffering
283        ini_set('output_buffering', 'off');
284        // Turn off PHP output compression
285     //   ini_set('zlib.output_compression', false);
286        // Implicitly flush the buffer(s)
287        ini_set('implicit_flush', true);
288        ob_implicit_flush(true);
289        // Clear, and turn off output buffering
290        while (ob_get_level() > 0) {
291            // Get the curent level
292            $level = ob_get_level();
293            // End the buffering
294            ob_end_clean();
295            // If the current level has not changed, abort
296            if (ob_get_level() == $level) break;
297        }
298        // Disable apache output buffering/compression
299        if (function_exists('apache_setenv')) {
300            apache_setenv('no-gzip', '1');
301            apache_setenv('dont-vary', '1');
302        }
303	}
304		/* http://php.net/manual/en/function.scandir.php#80057 */
305	function scanDirectories($rootDir, $allData=array()) {
306		// set filenames invisible if you want
307		$invisibleFileNames = array(".", "..", ".htaccess", ".htpasswd");
308		// run through content of root directory
309		$dirContent = scandir($rootDir);
310		foreach($dirContent as $key => $content) {
311			// filter all files not accessible
312			$path = $rootDir.'/'.$content;
313		 //   echo "$content\n<br />";
314           if(!is_dir($path)) {
315		       $ext =  pathinfo ( $path,PATHINFO_EXTENSION);
316			   if($ext !='txt') continue;
317		   }
318			if(!in_array($content, $invisibleFileNames) ) {
319				// if content is file & readable, add to array
320				if(is_file($path) && is_readable($path)) {
321					// save file name with path
322					$ns = preg_replace('#' . preg_quote($this->wikiRoot) . '#', "", $path);
323					$ns = str_replace(array('/','\\\\','.txt'), array(':',':'), $ns);
324					$allData[] = array('path'=>$path,'file'=>$content, 'id'=>$ns);
325				// if content is a directory and readable, add path and name
326				}elseif(is_dir($path) && is_readable($path)) {
327					// recursive callback to open new directory
328					$allData = $this->scanDirectories($path, $allData);
329				}
330			}
331		}
332		return $allData;
333	}
334
335	function downloadPem() {
336    @set_time_limit(60);
337    $SavePath = DOKU_INC .  'lib/plugins/xtern/ca/cacert.pem';
338    $url = "https://curl.haxx.se/ca/cacert.pem";
339     io_makeFileDir($SavePath);
340    $http = new DokuHTTPClient();
341    $http->max_bodysize = 32777216;
342    $http->timeout = 120;
343    $http->keep_alive = false;
344
345    $data = $http->get($url);
346    if(!$data) {
347        $this->say('download failed',  $url);
348        return;
349      }
350
351    $fp = @fopen($SavePath,'wb');
352     if($fp === false) {
353           $this->say('write_fail',  $SavePath);
354           return;
355      }
356      if(!fwrite($fp,$data)) {
357         $this->say('write_fail',  $SavePath);
358         return;
359      }
360      fclose($fp);
361     $this->say('file_saved',   $SavePath);
362
363}
364
365  function say(){
366        $args = func_get_args();
367        echo vsprintf("%s:  %s\n",$args);
368        ob_flush();
369    }
370
371    function write_debug($data) {
372	return;
373      if(!$this->debug_handle) return;
374      fwrite($this->debug_handle, "$data\n");
375}
376
377
378}