xref: /dokuwiki/bin/wantedpages.php (revision 919f6a873fa5f26bceba67b2fd997262aa68a3ea)
1#!/usr/bin/php
2<?php
3
4use splitbrain\phpcli\CLI;
5use splitbrain\phpcli\Options;
6
7if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
8define('NOSESSION', 1);
9require_once(DOKU_INC . 'inc/init.php');
10
11/**
12 * Find wanted pages
13 */
14class WantedPagesCLI extends CLI {
15
16    const DIR_CONTINUE = 1;
17    const DIR_NS = 2;
18    const DIR_PAGE = 3;
19
20    private $skip = false;
21    private $sort = 'wanted';
22
23    private $result = array();
24
25    /**
26     * Register options and arguments on the given $options object
27     *
28     * @param Options $options
29     * @return void
30     */
31    protected function setup(Options $options) {
32        $options->setHelp(
33            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
34            ' (the pages that are linkin to these missing pages).'
35        );
36        $options->registerArgument(
37            'namespace',
38            'The namespace to lookup. Defaults to root namespace',
39            false
40        );
41
42        $options->registerOption(
43            'sort',
44            'Sort by wanted or origin page',
45            's',
46            '(wanted|origin)'
47        );
48
49        $options->registerOption(
50            'skip',
51            'Do not show the second dimension',
52            'k'
53        );
54    }
55
56    /**
57     * Your main program
58     *
59     * Arguments and options have been parsed when this is run
60     *
61     * @param Options $options
62     * @return void
63     */
64    protected function main(Options $options) {
65        $args = $options->getArgs();
66        if($args) {
67            $startdir = dirname(wikiFN($args[0] . ':xxx'));
68        } else {
69            $startdir = dirname(wikiFN('xxx'));
70        }
71
72        $this->skip = $options->getOpt('skip');
73        $this->sort = $options->getOpt('sort');
74
75        $this->info("searching $startdir");
76
77        foreach($this->get_pages($startdir) as $page) {
78            $this->internal_links($page);
79        }
80        ksort($this->result);
81        foreach($this->result as $main => $subs) {
82            if($this->skip) {
83                print "$main\n";
84            } else {
85                $subs = array_unique($subs);
86                sort($subs);
87                foreach($subs as $sub) {
88                    printf("%-40s %s\n", $main, $sub);
89                }
90            }
91        }
92    }
93
94    /**
95     * Determine directions of the search loop
96     *
97     * @param string $entry
98     * @param string $basepath
99     * @return int
100     */
101    protected function dir_filter($entry, $basepath) {
102        if($entry == '.' || $entry == '..') {
103            return WantedPagesCLI::DIR_CONTINUE;
104        }
105        if(is_dir($basepath . '/' . $entry)) {
106            if(strpos($entry, '_') === 0) {
107                return WantedPagesCLI::DIR_CONTINUE;
108            }
109            return WantedPagesCLI::DIR_NS;
110        }
111        if(preg_match('/\.txt$/', $entry)) {
112            return WantedPagesCLI::DIR_PAGE;
113        }
114        return WantedPagesCLI::DIR_CONTINUE;
115    }
116
117    /**
118     * Collects recursively the pages in a namespace
119     *
120     * @param string $dir
121     * @return array
122     * @throws DokuCLI_Exception
123     */
124    protected function get_pages($dir) {
125        static $trunclen = null;
126        if(!$trunclen) {
127            global $conf;
128            $trunclen = strlen($conf['datadir'] . ':');
129        }
130
131        if(!is_dir($dir)) {
132            throw new DokuCLI_Exception("Unable to read directory $dir");
133        }
134
135        $pages = array();
136        $dh = opendir($dir);
137        while(false !== ($entry = readdir($dh))) {
138            $status = $this->dir_filter($entry, $dir);
139            if($status == WantedPagesCLI::DIR_CONTINUE) {
140                continue;
141            } else if($status == WantedPagesCLI::DIR_NS) {
142                $pages = array_merge($pages, $this->get_pages($dir . '/' . $entry));
143            } else {
144                $page = array(
145                    'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
146                    'file' => $dir . '/' . $entry,
147                );
148                $pages[] = $page;
149            }
150        }
151        closedir($dh);
152        return $pages;
153    }
154
155    /**
156     * Parse instructions and add the non-existing links to the result array
157     *
158     * @param array $page array with page id and file path
159     */
160    function internal_links($page) {
161        global $conf;
162        $instructions = p_get_instructions(file_get_contents($page['file']));
163        $cns = getNS($page['id']);
164        $exists = false;
165        $pid = $page['id'];
166        foreach($instructions as $ins) {
167            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
168                $mid = $ins[1][0];
169                resolve_pageid($cns, $mid, $exists);
170                if(!$exists) {
171                    list($mid) = explode('#', $mid); //record pages without hashes
172
173                    if($this->sort == 'origin') {
174                        $this->result[$pid][] = $mid;
175                    } else {
176                        $this->result[$mid][] = $pid;
177                    }
178                }
179            }
180        }
181    }
182}
183
184// Main
185$cli = new WantedPagesCLI();
186$cli->run();
187