1<?php
2/**
3 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
4 * @author     <dae@douglasedmunds.com>
5 * @author     Andy Webber <dokuwiki at andywebber dot com>
6 * @author     Federico Ariel Castagnini
7 * @author     Cyrille37 <cyrille37@gmail.com>
8 * @author	   Matthias Schulte <dokuwiki@lupo49.de>
9 * @author     Rik Blok <rik dot blok at ubc dot ca>
10 * @author     Christian Paul <christian at chrpaul dot de>
11 */
12// must be run within Dokuwiki
13if(!defined('DOKU_INC')) die();
14if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
15
16require_once(DOKU_INC.'inc/search.php');
17
18class helper_plugin_orphanswanted extends DokuWiki_Plugin {
19
20    function orph_callback_search_wanted(&$data, $base, $file, $type, $lvl, $opts) {
21
22        if($type == 'd') {
23            return true; // recurse all directories, but we don't store namespaces
24        }
25
26        if(!preg_match("/.*\.txt$/", $file)) {
27            // Ignore everything but TXT
28            return true;
29        }
30
31        // search the body of the file for links
32        // dae mod
33        //	orph_Check_InternalLinks(&$data,$base,$file,$type,$lvl,$opts);
34        $this->orph_Check_InternalLinks($data,$base,$file,$type,$lvl,$opts);
35
36        $eventData = array(
37            'data' => &$data,
38            'file' => $file
39        );
40        trigger_event('PLUGIN_ORPHANS_WANTED_PROCESS_PAGE', $eventData);
41
42        // get id of this file
43        $id = pathID($file);
44
45        // make sure ID is lowercase
46        $id = utf8_strtolower($id);
47
48        //check ACL
49        if(auth_quickaclcheck($id) < AUTH_READ) {
50            return false;
51        }
52
53        // try to avoid making duplicate entries for forms and pages
54        $item = &$data["$id"];
55
56        if(isset($item)) {
57            // This item already has a member in the array
58            // Note that the file search found it
59            $item['exists'] = true;
60        } else {
61            // Create a new entry
62            $data["$id"]=array('exists' => true, 'links' => 0);
63        }
64        return true;
65    }
66
67    function orph_handle_link(&$data, $link) {
68        global $conf;
69
70        if(isset($data[$link])) {
71            // This item already has a member in the array
72            // Note that the file search found it
73            $data[$link]['links'] ++ ;   // count the link
74        } else {
75            // Create a new entry
76            $data[$link] = array(
77          'exists' => false,  // Only found a link, not the file
78          'links'  => 1
79            );
80            // echo "      <!-- added link to list --> \n";
81        }
82
83        if ($conf['allowdebug']) echo "<p>-- New count for link <b>" . $link . "</b>: " . $data[$link]['links'] . "</p>\n";
84    }
85
86
87    /**
88     * Search for internal wiki links in page $file
89     */
90    function orph_Check_InternalLinks( &$data, $base, $file, $type, $lvl, $opts ) {
91        global $conf;
92
93        if (!defined('LINK_PATTERN')) define('LINK_PATTERN', '%\[\[([^\]|#]*)(#[^\]|]*)?\|?([^\]]*)]]%');
94
95        if(!preg_match("/.*\.txt$/", $file)) {
96            return;
97        }
98
99        $currentID = pathID($file);
100        $currentNS = getNS($currentID);
101
102        if($conf['allowdebug']) echo sprintf("<p><b>%s</b>: %s</p>\n", $file, $currentID);
103
104        // echo "  <!-- checking file: $file -->\n";
105        $body = @file_get_contents($conf['datadir'] . $file);
106
107        // ignores entries in blocks that ignore links
108        foreach( array(
109                  '@<nowiki>.*?<\/nowiki>@su',
110                  '@%%.*?%%@su',
111                  '@<php>.*?</php>@su',
112                  '@<PHP>.*?</PHP>@su',
113                  '@<html>.*?</html>@su',
114                  '@<HTML>.*?</HTML>@su',
115                  '@^( {2,}|\t)[^\*\- ].*?$@mu',
116                  '@<code[^>]*?>.*?<\/code>@su',
117                  '@<file[^>]*?>.*?<\/file>@su'
118        )
119        as $ignored )
120        {
121            $body = preg_replace($ignored, '',  $body);
122        }
123
124        $links = array();
125        preg_match_all( LINK_PATTERN, $body, $links );
126
127        foreach($links[1] as $link) {
128            if($conf['allowdebug']) echo sprintf("--- Checking %s<br />\n", $link);
129
130            if( (0 < strlen(ltrim($link)))
131            and ! preg_match('/^[a-zA-Z0-9\.]+>{1}.*$/u',$link) // Interwiki
132            and ! preg_match('/^\\\\\\\\[\w.:?\-;,]+?\\\\/u',$link) // Windows Share
133            and ! preg_match('#^([a-z0-9\-\.+]+?)://#i',$link) // external link (accepts all protocols)
134            and ! preg_match('<'.PREG_PATTERN_VALID_EMAIL.'>',$link) // E-Mail (pattern above is defined in inc/mail.php)
135            and ! preg_match('!^#.+!',$link) // inside page link (html anchor)
136            ) {
137                # remove parameters
138                $link = preg_replace('/\?.*/', '', $link);
139
140                $pageExists = false;
141                resolve_pageid($currentNS, $link, $pageExists );
142                if ($conf['allowdebug']) echo sprintf("---- link='%s' %s ", $link, $pageExists?'EXISTS':'MISS');
143
144                if(((strlen(ltrim($link)) > 0)           // there IS an id?
145                and !auth_quickaclcheck($link) < AUTH_READ)) {
146                    // should be visible to user
147                    //echo "      <!-- adding $link -->\n";
148
149                    if($conf['allowdebug']) echo ' A_LINK' ;
150
151                    $link= utf8_strtolower( $link );
152                    $this->orph_handle_link($data, $link);
153                }
154                else
155                {
156                    if($conf['allowdebug']) echo ' EMPTY_OR_FORBIDDEN' ;
157                }
158            } // link is not empty and is a local link?
159            else {
160                if($conf['allowdebug']) echo ' NOT_INTERNAL';
161            }
162
163            if($conf['allowdebug']) echo "<br />\n";
164        } // end of foreach link
165    }
166
167    //    three choices
168    //    $params_array used to extract excluded namespaces for report
169    //    orphans =  orph_report_table($data, true, false, $params_array);
170    //    wanted =  orph_report_table($data, false, true), $params_array;
171    //    valid  =  orph_report_table($data, true, true, $params_array);
172
173    function orphan_pages($params_array) {
174        global $conf, $ID;
175        $result = '';
176        $data = array();
177        search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID)));
178        $result .=  $this->orph_report_table($data, true, false, $params_array, 'orphan');
179
180        return $result;
181    }
182
183    function wanted_pages($params_array) {
184        global $conf, $ID;
185        $result = '';
186        $data = array();
187        search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID)));
188        $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted');
189
190        return $result;
191    }
192
193    function valid_pages($params_array) {
194        global $conf, $ID;
195        $result = '';
196        $data = array();
197        search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID)));
198        $result .= $this->orph_report_table($data, true, true, $params_array, 'valid');
199
200        return $result;
201    }
202
203    function all_pages($params_array) {
204        global $conf, $ID;
205        $result = '';
206        $data = array();
207        search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted') , array('ns' => getNS($ID)));
208
209        $result .= "</p><p>Orphans</p><p>";
210        $result .= $this->orph_report_table($data, true, false, $params_array, 'orphan');
211        $result .= "</p><p>Wanted</p><p>";
212        $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted');
213        $result .= "</p><p>Valid</p><p>";
214        $result .= $this->orph_report_table($data, true, true, $params_array, 'valid');
215
216        return $result;
217    }
218
219    function orph_report_table($data, $page_exists, $has_links, $params_array, $caller = null) {
220        global $conf;
221        $ignoredPages = $this->getConf('ignoredpages'); // Fetch pages which shouldn't be listed
222        if($ignoredPages != '') {
223            $ignoredPages = explode(';', $ignoredPages);
224        } else {
225            $ignoredPages = null;
226        }
227
228        $show_heading = ($page_exists && $conf['useheading']) ? true : false ;
229        //take off $params_array[0];
230        $include_array = $params_array[1];
231        $exclude_array = $params_array[2];
232
233        $count = 1;
234        $output = '';
235
236        // for valid html - need to close the <p> that is feed before this
237        $output .= '</p>';
238        $output .= '<table class="inline"><tr><th> # </th><th> ID </th>'
239                    . ($show_heading ? '<th>Title</th>' : '' )
240                    . ($caller != "orphan" ? '<th>Links</th>' : '')
241                    . '</tr>'
242                    . "\n" ;
243
244        // Sort by namespace and name
245        ksort($data);
246
247        // Sort descending by existing links.
248        // This does not make sense for orphans since they don't have links.
249        if ($caller != "orphan") {
250            arsort($data);
251        }
252
253        foreach($data as $id=>$item) {
254            if( ! (($item['exists'] == $page_exists) and (($item['links'] <> 0)== $has_links)) ) continue ;
255
256            // $id is a string, looks like this: page, namespace:page, or namespace:<subspaces>:page
257            $match_array = explode(":", $id);
258            //remove last item in array, the page identifier
259            $match_array = array_slice($match_array, 0, -1);
260            //put it back together
261            $page_namespace = implode (":", $match_array);
262            //add a trailing :
263            $page_namespace = $page_namespace . ':';
264
265            if (empty($include_array)) {
266                // if inclusion list is empty then show all namespaces
267                $show_it = true;
268            } else {
269                // otherwise only show if in inclusion list
270                $show_it = false;
271                foreach ($include_array as $include_item) {
272                    //add a trailing : to each $item too
273                    $include_item = $include_item . ":";
274                    // need === to avoid boolean false
275                    // strpos(haystack, needle)
276                    // if exclusion is beginning of page's namespace, block it
277                    if (strpos($page_namespace, $include_item) === 0) {
278                        //there is a match, so show it and move on
279                        $show_it = true;
280                        break;
281                    }
282                }
283            }
284
285            if(!is_null($ignoredPages) && in_array($id, $ignoredPages)) {
286                if ($conf['allowdebug']) echo "Skipped page (global ignored): " . $id . "<br />";
287                $show_it = false;
288            } elseif(isHiddenPage($id)) {
289                if ($conf['allowdebug']) echo "Skipped page (global hidden): " . $id . "<br />";
290                $show_it = false;
291            } elseif ( $show_it )  {
292                //check if blocked by exclusion list
293                foreach ($exclude_array as $exclude_item) {
294                    //add a trailing : to each $item too
295                    $exclude_item = $exclude_item . ":";
296                    // need === to avoid boolean false
297                    // strpos(haystack, needle)
298                    // if exclusion is beginning of page's namespace , block it
299                    if (strpos($page_namespace, $exclude_item) === 0) {
300                        //there is a match, so block it and move on
301                        $show_it = false;
302                        break;
303                    }
304                }
305            }
306
307            if($show_it) {
308                $output .=  "<tr><td>$count</td><td><a href=\"". wl($id)
309                            . "\" class=\"" . ($page_exists ? "wikilink1" : "wikilink2") . "\" >"
310                            . $id .'</a></td>'
311                            . ($show_heading ? '<td>' . hsc(p_get_first_heading($id)) .'</td>' : '' );
312
313                if($caller != "orphan") { // Skip "link" column if user wants orphan pages only
314                    $output .= '<td>' . $item['links']
315                                . ($has_links ? "&nbsp;:&nbsp;<a href=\"". wl($id, 'do=backlink')
316                                . "\" class=\"wikilink1\">Show&nbsp;backlinks</a>" : '') . "</td>";
317                }
318                $output .= "</tr>\n";
319                $count++;
320            }
321        }
322
323        $output .=  "</table>\n";
324        //for valid html = need to reopen a <p>
325        $output .= '<p>';
326
327        return $output;
328    }
329}
330