1 #!/usr/bin/env php
2 <?php
3 
4 use dokuwiki\Utf8\Sort;
5 use dokuwiki\File\PageResolver;
6 use splitbrain\phpcli\CLI;
7 use splitbrain\phpcli\Options;
8 
9 if (!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/');
10 define('NOSESSION', 1);
11 require_once(DOKU_INC . 'inc/init.php');
12 
13 /**
14  * Find wanted pages
15  */
16 class WantedPagesCLI extends CLI
17 {
18     protected const DIR_CONTINUE = 1;
19     protected const DIR_NS = 2;
20     protected const DIR_PAGE = 3;
21 
22     private $skip = false;
23     private $sort = 'wanted';
24 
25     private $result = [];
26 
27     /**
28      * Register options and arguments on the given $options object
29      *
30      * @param Options $options
31      * @return void
32      */
33     protected function setup(Options $options)
34     {
35         $options->setHelp(
36             'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
37             ' (the pages that are linkin to these missing pages).'
38         );
39         $options->registerArgument(
40             'namespace',
41             'The namespace to lookup. Defaults to root namespace',
42             false
43         );
44 
45         $options->registerOption(
46             'sort',
47             'Sort by wanted or origin page',
48             's',
49             '(wanted|origin)'
50         );
51 
52         $options->registerOption(
53             'skip',
54             'Do not show the second dimension',
55             'k'
56         );
57     }
58 
59     /**
60      * Your main program
61      *
62      * Arguments and options have been parsed when this is run
63      *
64      * @param Options $options
65      * @return void
66      */
67     protected function main(Options $options)
68     {
69         $args = $options->getArgs();
70         if ($args) {
71             $startdir = dirname(wikiFN($args[0] . ':xxx'));
72         } else {
73             $startdir = dirname(wikiFN('xxx'));
74         }
75 
76         $this->skip = $options->getOpt('skip');
77         $this->sort = $options->getOpt('sort');
78 
79         $this->info("searching $startdir");
80 
81         foreach ($this->getPages($startdir) as $page) {
82             $this->internalLinks($page);
83         }
84         Sort::ksort($this->result);
85         foreach ($this->result as $main => $subs) {
86             if ($this->skip) {
87                 echo "$main\n";
88             } else {
89                 $subs = array_unique($subs);
90                 Sort::sort($subs);
91                 foreach ($subs as $sub) {
92                     printf("%-40s %s\n", $main, $sub);
93                 }
94             }
95         }
96     }
97 
98     /**
99      * Determine directions of the search loop
100      *
101      * @param string $entry
102      * @param string $basepath
103      * @return int
104      */
105     protected function dirFilter($entry, $basepath)
106     {
107         if ($entry == '.' || $entry == '..') {
108             return WantedPagesCLI::DIR_CONTINUE;
109         }
110         if (is_dir($basepath . '/' . $entry)) {
111             if (strpos($entry, '_') === 0) {
112                 return WantedPagesCLI::DIR_CONTINUE;
113             }
114             return WantedPagesCLI::DIR_NS;
115         }
116         if (preg_match('/\.txt$/', $entry)) {
117             return WantedPagesCLI::DIR_PAGE;
118         }
119         return WantedPagesCLI::DIR_CONTINUE;
120     }
121 
122     /**
123      * Collects recursively the pages in a namespace
124      *
125      * @param string $dir
126      * @return array
127      * @throws DokuCLI_Exception
128      */
129     protected function getPages($dir)
130     {
131         static $trunclen = null;
132         if (!$trunclen) {
133             global $conf;
134             $trunclen = strlen($conf['datadir'] . ':');
135         }
136 
137         if (!is_dir($dir)) {
138             throw new DokuCLI_Exception("Unable to read directory $dir");
139         }
140 
141         $pages = [];
142         $dh = opendir($dir);
143         while (false !== ($entry = readdir($dh))) {
144             $status = $this->dirFilter($entry, $dir);
145             if ($status == WantedPagesCLI::DIR_CONTINUE) {
146                 continue;
147             } elseif ($status == WantedPagesCLI::DIR_NS) {
148                 $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
149             } else {
150                 $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry];
151                 $pages[] = $page;
152             }
153         }
154         closedir($dh);
155         return $pages;
156     }
157 
158     /**
159      * Parse instructions and add the non-existing links to the result array
160      *
161      * @param array $page array with page id and file path
162      */
163     protected function internalLinks($page)
164     {
165         global $conf;
166         $instructions = p_get_instructions(file_get_contents($page['file']));
167         $resolver = new PageResolver($page['id']);
168         $pid = $page['id'];
169         foreach ($instructions as $ins) {
170             if ($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
171                 $mid = $resolver->resolveId($ins[1][0]);
172                 if (!page_exists($mid)) {
173                     [$mid] = explode('#', $mid); //record pages without hashes
174 
175                     if ($this->sort == 'origin') {
176                         $this->result[$pid][] = $mid;
177                     } else {
178                         $this->result[$mid][] = $pid;
179                     }
180                 }
181             }
182         }
183     }
184 }
185 
186 // Main
187 $cli = new WantedPagesCLI();
188 $cli->run();
189