1<?php 2/** 3 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 4 * @author <dae@douglasedmunds.com> 5 * @author Andy Webber <dokuwiki at andywebber dot com> 6 * @author Federico Ariel Castagnini 7 * @author Cyrille37 <cyrille37@gmail.com> 8 * @author Matthias Schulte <dokuwiki@lupo49.de> 9 * @author Rik Blok <rik dot blok at ubc dot ca> 10 * @author Christian Paul <christian at chrpaul dot de> 11 */ 12// must be run within Dokuwiki 13if(!defined('DOKU_INC')) die(); 14if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/'); 15 16require_once(DOKU_INC.'inc/search.php'); 17 18class helper_plugin_orphanswanted extends DokuWiki_Plugin { 19 20 function orph_callback_search_wanted(&$data, $base, $file, $type, $lvl, $opts) { 21 22 if($type == 'd') { 23 return true; // recurse all directories, but we don't store namespaces 24 } 25 26 if(!preg_match("/.*\.txt$/", $file)) { 27 // Ignore everything but TXT 28 return true; 29 } 30 31 // search the body of the file for links 32 // dae mod 33 // orph_Check_InternalLinks(&$data,$base,$file,$type,$lvl,$opts); 34 $this->orph_Check_InternalLinks($data,$base,$file,$type,$lvl,$opts); 35 36 $eventData = array( 37 'data' => &$data, 38 'file' => $file 39 ); 40 trigger_event('PLUGIN_ORPHANS_WANTED_PROCESS_PAGE', $eventData); 41 42 // get id of this file 43 $id = pathID($file); 44 45 // make sure ID is lowercase 46 $id = utf8_strtolower($id); 47 48 //check ACL 49 if(auth_quickaclcheck($id) < AUTH_READ) { 50 return false; 51 } 52 53 // try to avoid making duplicate entries for forms and pages 54 $item = &$data["$id"]; 55 56 if(isset($item)) { 57 // This item already has a member in the array 58 // Note that the file search found it 59 $item['exists'] = true; 60 } else { 61 // Create a new entry 62 $data["$id"]=array('exists' => true, 'links' => 0); 63 } 64 return true; 65 } 66 67 function orph_handle_link(&$data, $link) { 68 global $conf; 69 70 if(isset($data[$link])) { 71 // This item already has a member in the array 72 // Note that the file search found it 73 $data[$link]['links'] ++ ; // count the link 74 } else { 75 // Create a new entry 76 $data[$link] = array( 77 'exists' => false, // Only found a link, not the file 78 'links' => 1 79 ); 80 // echo " <!-- added link to list --> \n"; 81 } 82 83 if ($conf['allowdebug']) echo "<p>-- New count for link <b>" . $link . "</b>: " . $data[$link]['links'] . "</p>\n"; 84 } 85 86 87 /** 88 * Search for internal wiki links in page $file 89 */ 90 function orph_Check_InternalLinks( &$data, $base, $file, $type, $lvl, $opts ) { 91 global $conf; 92 93 if (!defined('LINK_PATTERN')) define('LINK_PATTERN', '%\[\[([^\]|#]*)(#[^\]|]*)?\|?([^\]]*)]]%'); 94 95 if(!preg_match("/.*\.txt$/", $file)) { 96 return; 97 } 98 99 $currentID = pathID($file); 100 $currentNS = getNS($currentID); 101 102 if($conf['allowdebug']) echo sprintf("<p><b>%s</b>: %s</p>\n", $file, $currentID); 103 104 // echo " <!-- checking file: $file -->\n"; 105 $body = @file_get_contents($conf['datadir'] . $file); 106 107 // ignores entries in blocks that ignore links 108 foreach( array( 109 '@<nowiki>.*?<\/nowiki>@su', 110 '@%%.*?%%@su', 111 '@<php>.*?</php>@su', 112 '@<PHP>.*?</PHP>@su', 113 '@<html>.*?</html>@su', 114 '@<HTML>.*?</HTML>@su', 115 '@^( {2,}|\t)[^\*\- ].*?$@mu', 116 '@<code[^>]*?>.*?<\/code>@su', 117 '@<file[^>]*?>.*?<\/file>@su' 118 ) 119 as $ignored ) 120 { 121 $body = preg_replace($ignored, '', $body); 122 } 123 124 $links = array(); 125 preg_match_all( LINK_PATTERN, $body, $links ); 126 127 foreach($links[1] as $link) { 128 if($conf['allowdebug']) echo sprintf("--- Checking %s<br />\n", $link); 129 130 if( (0 < strlen(ltrim($link))) 131 and ! preg_match('/^[a-zA-Z0-9\.]+>{1}.*$/u',$link) // Interwiki 132 and ! preg_match('/^\\\\\\\\[\w.:?\-;,]+?\\\\/u',$link) // Windows Share 133 and ! preg_match('#^([a-z0-9\-\.+]+?)://#i',$link) // external link (accepts all protocols) 134 and ! preg_match('<'.PREG_PATTERN_VALID_EMAIL.'>',$link) // E-Mail (pattern above is defined in inc/mail.php) 135 and ! preg_match('!^#.+!',$link) // inside page link (html anchor) 136 ) { 137 # remove parameters 138 $link = preg_replace('/\?.*/', '', $link); 139 140 $pageExists = false; 141 resolve_pageid($currentNS, $link, $pageExists ); 142 if ($conf['allowdebug']) echo sprintf("---- link='%s' %s ", $link, $pageExists?'EXISTS':'MISS'); 143 144 if(((strlen(ltrim($link)) > 0) // there IS an id? 145 and !auth_quickaclcheck($link) < AUTH_READ)) { 146 // should be visible to user 147 //echo " <!-- adding $link -->\n"; 148 149 if($conf['allowdebug']) echo ' A_LINK' ; 150 151 $link= utf8_strtolower( $link ); 152 $this->orph_handle_link($data, $link); 153 } 154 else 155 { 156 if($conf['allowdebug']) echo ' EMPTY_OR_FORBIDDEN' ; 157 } 158 } // link is not empty and is a local link? 159 else { 160 if($conf['allowdebug']) echo ' NOT_INTERNAL'; 161 } 162 163 if($conf['allowdebug']) echo "<br />\n"; 164 } // end of foreach link 165 } 166 167 // three choices 168 // $params_array used to extract excluded namespaces for report 169 // orphans = orph_report_table($data, true, false, $params_array); 170 // wanted = orph_report_table($data, false, true), $params_array; 171 // valid = orph_report_table($data, true, true, $params_array); 172 173 function orphan_pages($params_array) { 174 global $conf, $ID; 175 $result = ''; 176 $data = array(); 177 search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID))); 178 $result .= $this->orph_report_table($data, true, false, $params_array, 'orphan'); 179 180 return $result; 181 } 182 183 function wanted_pages($params_array) { 184 global $conf, $ID; 185 $result = ''; 186 $data = array(); 187 search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID))); 188 $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted'); 189 190 return $result; 191 } 192 193 function valid_pages($params_array) { 194 global $conf, $ID; 195 $result = ''; 196 $data = array(); 197 search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID))); 198 $result .= $this->orph_report_table($data, true, true, $params_array, 'valid'); 199 200 return $result; 201 } 202 203 function all_pages($params_array) { 204 global $conf, $ID; 205 $result = ''; 206 $data = array(); 207 search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted') , array('ns' => getNS($ID))); 208 209 $result .= "</p><p>Orphans</p><p>"; 210 $result .= $this->orph_report_table($data, true, false, $params_array, 'orphan'); 211 $result .= "</p><p>Wanted</p><p>"; 212 $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted'); 213 $result .= "</p><p>Valid</p><p>"; 214 $result .= $this->orph_report_table($data, true, true, $params_array, 'valid'); 215 216 return $result; 217 } 218 219 function orph_report_table($data, $page_exists, $has_links, $params_array, $caller = null) { 220 global $conf; 221 $ignoredPages = $this->getConf('ignoredpages'); // Fetch pages which shouldn't be listed 222 if($ignoredPages != '') { 223 $ignoredPages = explode(';', $ignoredPages); 224 } else { 225 $ignoredPages = null; 226 } 227 228 $show_heading = ($page_exists && $conf['useheading']) ? true : false ; 229 //take off $params_array[0]; 230 $include_array = $params_array[1]; 231 $exclude_array = $params_array[2]; 232 233 $count = 1; 234 $output = ''; 235 236 // for valid html - need to close the <p> that is feed before this 237 $output .= '</p>'; 238 $output .= '<table class="inline"><tr><th> # </th><th> ID </th>' 239 . ($show_heading ? '<th>Title</th>' : '' ) 240 . ($caller != "orphan" ? '<th>Links</th>' : '') 241 . '</tr>' 242 . "\n" ; 243 244 // Sort by namespace and name 245 ksort($data); 246 247 // Sort descending by existing links. 248 // This does not make sense for orphans since they don't have links. 249 if ($caller != "orphan") { 250 arsort($data); 251 } 252 253 foreach($data as $id=>$item) { 254 if( ! (($item['exists'] == $page_exists) and (($item['links'] <> 0)== $has_links)) ) continue ; 255 256 // $id is a string, looks like this: page, namespace:page, or namespace:<subspaces>:page 257 $match_array = explode(":", $id); 258 //remove last item in array, the page identifier 259 $match_array = array_slice($match_array, 0, -1); 260 //put it back together 261 $page_namespace = implode (":", $match_array); 262 //add a trailing : 263 $page_namespace = $page_namespace . ':'; 264 265 if (empty($include_array)) { 266 // if inclusion list is empty then show all namespaces 267 $show_it = true; 268 } else { 269 // otherwise only show if in inclusion list 270 $show_it = false; 271 foreach ($include_array as $include_item) { 272 //add a trailing : to each $item too 273 $include_item = $include_item . ":"; 274 // need === to avoid boolean false 275 // strpos(haystack, needle) 276 // if exclusion is beginning of page's namespace, block it 277 if (strpos($page_namespace, $include_item) === 0) { 278 //there is a match, so show it and move on 279 $show_it = true; 280 break; 281 } 282 } 283 } 284 285 if(!is_null($ignoredPages) && in_array($id, $ignoredPages)) { 286 if ($conf['allowdebug']) echo "Skipped page (global ignored): " . $id . "<br />"; 287 $show_it = false; 288 } elseif(isHiddenPage($id)) { 289 if ($conf['allowdebug']) echo "Skipped page (global hidden): " . $id . "<br />"; 290 $show_it = false; 291 } elseif ( $show_it ) { 292 //check if blocked by exclusion list 293 foreach ($exclude_array as $exclude_item) { 294 //add a trailing : to each $item too 295 $exclude_item = $exclude_item . ":"; 296 // need === to avoid boolean false 297 // strpos(haystack, needle) 298 // if exclusion is beginning of page's namespace , block it 299 if (strpos($page_namespace, $exclude_item) === 0) { 300 //there is a match, so block it and move on 301 $show_it = false; 302 break; 303 } 304 } 305 } 306 307 if($show_it) { 308 $output .= "<tr><td>$count</td><td><a href=\"". wl($id) 309 . "\" class=\"" . ($page_exists ? "wikilink1" : "wikilink2") . "\" >" 310 . $id .'</a></td>' 311 . ($show_heading ? '<td>' . hsc(p_get_first_heading($id)) .'</td>' : '' ); 312 313 if($caller != "orphan") { // Skip "link" column if user wants orphan pages only 314 $output .= '<td>' . $item['links'] 315 . ($has_links ? " : <a href=\"". wl($id, 'do=backlink') 316 . "\" class=\"wikilink1\">Show backlinks</a>" : '') . "</td>"; 317 } 318 $output .= "</tr>\n"; 319 $count++; 320 } 321 } 322 323 $output .= "</table>\n"; 324 //for valid html = need to reopen a <p> 325 $output .= '<p>'; 326 327 return $output; 328 } 329} 330