1<?php
2if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../');
3require_once (DOKU_INC . 'inc/utf8.php');
4require_once (DOKU_INC . 'inc/pageutils.php');
5require_once (DOKU_INC . 'inc/io.php');
6require_once (DOKU_INC . 'conf/dokuwiki.php');
7global  $wikiRoot;
8$dir =realpath (DOKU_INC. 'data/pages');
9$wikiRoot = $dir;
10echo "$dir\n";
11$site = scanDirectories($dir);
12//print_r($site);
13
14foreach($site AS $entry=>$data) {
15	  $handle = fopen($data['path'], "r");
16     if ($handle) {
17        parse_dwfile($handle,$data['id'],$data['path']);
18		fclose($handle);
19     }
20}
21
22function parse_dwfile($handle="",$id, $path) {
23   while (!feof($handle)) {
24        $buffer = fgets($handle);
25	    if(preg_match("#\[\[(https?://.*?)\]\]#",$buffer,$matches)) {
26		//	echo $matches[0] ."\n";
27			list($url,$rest) = explode('|',$matches[1]);
28			$status =   link_check($url);
29			if($status !="200" && $status !="300"  && $status != "301") {
30				echo $status .":  $id:\n\t";
31				echo  $url . "\n";
32			}
33        }
34   }
35}
36
37	 function link_check($url) {
38         $url = trim($url, ' "\'' );
39        $url=html_entity_decode($url);
40        $ch = curl_init($url);
41        // curl --remote-name --time-cond cacert.pem https://curl.haxx.se/ca/cacert.pem
42        //curl_setopt($ch, CURLOPT_CAINFO, __DIR__ . "/certs/cacert.pem");
43        //curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
44		curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
45		curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1);
46		curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
47		curl_setopt($ch,CURLOPT_TIMEOUT,10);
48		$output = curl_exec($ch);
49		$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
50        if(curl_errno($ch)){
51			return "500:  " . curl_error($ch);
52           // msg( 'Request Error:' . curl_error($ch));
53       }
54 		curl_close($ch);
55		return trim("$httpcode");
56	 }
57/* http://php.net/manual/en/function.scandir.php#80057 */
58function scanDirectories($rootDir, $allData=array()) {
59global  $wikiRoot;
60    // set filenames invisible if you want
61    $invisibleFileNames = array(".", "..", ".htaccess", ".htpasswd");
62    // run through content of root directory
63    $dirContent = scandir($rootDir);
64    foreach($dirContent as $key => $content) {
65        // filter all files not accessible
66        $path = $rootDir.'/'.$content;
67     //   echo "$content\n";
68        if(!in_array($content, $invisibleFileNames)) {
69            // if content is file & readable, add to array
70            if(is_file($path) && is_readable($path)) {
71                // save file name with path
72                $ns = preg_replace('#' . preg_quote($wikiRoot) . '#', "", $path);
73				$ns = str_replace(array('/','\\\\','.txt'), array(':',':'), $ns);
74                $allData[] = array('path'=>$path,'file'=>$content, 'id'=>$ns);
75            // if content is a directory and readable, add path and name
76            }elseif(is_dir($path) && is_readable($path)) {
77                // recursive callback to open new directory
78                $allData = scanDirectories($path, $allData);
79            }
80        }
81    }
82    return $allData;
83}
84?>
85