1<?php 2if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../'); 3require_once (DOKU_INC . 'inc/utf8.php'); 4require_once (DOKU_INC . 'inc/pageutils.php'); 5require_once (DOKU_INC . 'inc/io.php'); 6require_once (DOKU_INC . 'conf/dokuwiki.php'); 7global $wikiRoot; 8$dir =realpath (DOKU_INC. 'data/pages'); 9$wikiRoot = $dir; 10echo "$dir\n"; 11$site = scanDirectories($dir); 12//print_r($site); 13 14foreach($site AS $entry=>$data) { 15 $handle = fopen($data['path'], "r"); 16 if ($handle) { 17 parse_dwfile($handle,$data['id'],$data['path']); 18 fclose($handle); 19 } 20} 21 22function parse_dwfile($handle="",$id, $path) { 23 while (!feof($handle)) { 24 $buffer = fgets($handle); 25 if(preg_match("#\[\[(https?://.*?)\]\]#",$buffer,$matches)) { 26 // echo $matches[0] ."\n"; 27 list($url,$rest) = explode('|',$matches[1]); 28 $status = link_check($url); 29 if($status !="200" && $status !="300" && $status != "301") { 30 echo $status .": $id:\n\t"; 31 echo $url . "\n"; 32 } 33 } 34 } 35} 36 37 function link_check($url) { 38 $url = trim($url, ' "\'' ); 39 $url=html_entity_decode($url); 40 $ch = curl_init($url); 41 // curl --remote-name --time-cond cacert.pem https://curl.haxx.se/ca/cacert.pem 42 //curl_setopt($ch, CURLOPT_CAINFO, __DIR__ . "/certs/cacert.pem"); 43 //curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 44 curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); 45 curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1); 46 curl_setopt($ch, CURLOPT_MAXREDIRS, 5); 47 curl_setopt($ch,CURLOPT_TIMEOUT,10); 48 $output = curl_exec($ch); 49 $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 50 if(curl_errno($ch)){ 51 return "500: " . curl_error($ch); 52 // msg( 'Request Error:' . curl_error($ch)); 53 } 54 curl_close($ch); 55 return trim("$httpcode"); 56 } 57/* http://php.net/manual/en/function.scandir.php#80057 */ 58function scanDirectories($rootDir, $allData=array()) { 59global $wikiRoot; 60 // set filenames invisible if you want 61 $invisibleFileNames = array(".", "..", ".htaccess", ".htpasswd"); 62 // run through content of root directory 63 $dirContent = scandir($rootDir); 64 foreach($dirContent as $key => $content) { 65 // filter all files not accessible 66 $path = $rootDir.'/'.$content; 67 // echo "$content\n"; 68 if(!in_array($content, $invisibleFileNames)) { 69 // if content is file & readable, add to array 70 if(is_file($path) && is_readable($path)) { 71 // save file name with path 72 $ns = preg_replace('#' . preg_quote($wikiRoot) . '#', "", $path); 73 $ns = str_replace(array('/','\\\\','.txt'), array(':',':'), $ns); 74 $allData[] = array('path'=>$path,'file'=>$content, 'id'=>$ns); 75 // if content is a directory and readable, add path and name 76 }elseif(is_dir($path) && is_readable($path)) { 77 // recursive callback to open new directory 78 $allData = scanDirectories($path, $allData); 79 } 80 } 81 } 82 return $allData; 83} 84?> 85