xref: /dokuwiki/bin/wantedpages.php (revision a00078f7c6131eb152c1f51a05a0753843638bd6)
1#!/usr/bin/php
2<?php
3if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
4define('NOSESSION', 1);
5require_once(DOKU_INC . 'inc/init.php');
6
7/**
8 * Find wanted pages
9 */
10class WantedPagesCLI extends DokuCLI {
11
12    const DIR_CONTINUE = 1;
13    const DIR_NS = 2;
14    const DIR_PAGE = 3;
15
16    private $skip = false;
17    private $sort = 'wanted';
18
19    private $result = array();
20
21    /**
22     * Register options and arguments on the given $options object
23     *
24     * @param DokuCLI_Options $options
25     * @return void
26     */
27    protected function setup(DokuCLI_Options $options) {
28        $options->setHelp(
29            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
30            ' (the pages that are linkin to these missing pages).'
31        );
32        $options->registerArgument(
33            'namespace',
34            'The namespace to lookup. Defaults to root namespace',
35            false
36        );
37
38        $options->registerOption(
39            'sort',
40            'Sort by wanted or origin page',
41            's',
42            '(wanted|origin)'
43        );
44
45        $options->registerOption(
46            'skip',
47            'Do not show the second dimension',
48            'k'
49        );
50    }
51
52    /**
53     * Your main program
54     *
55     * Arguments and options have been parsed when this is run
56     *
57     * @param DokuCLI_Options $options
58     * @return void
59     */
60    protected function main(DokuCLI_Options $options) {
61
62        if($options->args) {
63            $startdir = dirname(wikiFN($options->args[0] . ':xxx'));
64        } else {
65            $startdir = dirname(wikiFN('xxx'));
66        }
67
68        $this->skip = $options->getOpt('skip');
69        $this->sort = $options->getOpt('sort');
70
71        $this->info("searching $startdir");
72
73        foreach($this->get_pages($startdir) as $page) {
74            $this->internal_links($page);
75        }
76        ksort($this->result);
77        foreach($this->result as $main => $subs) {
78            if($this->skip) {
79                print "$main\n";
80            } else {
81                $subs = array_unique($subs);
82                sort($subs);
83                foreach($subs as $sub) {
84                    printf("%-40s %s\n", $main, $sub);
85                }
86            }
87        }
88    }
89
90    /**
91     * Determine directions of the search loop
92     *
93     * @param string $entry
94     * @param string $basepath
95     * @return int
96     */
97    protected function dir_filter($entry, $basepath) {
98        if($entry == '.' || $entry == '..') {
99            return WantedPagesCLI::DIR_CONTINUE;
100        }
101        if(is_dir($basepath . '/' . $entry)) {
102            if(strpos($entry, '_') === 0) {
103                return WantedPagesCLI::DIR_CONTINUE;
104            }
105            return WantedPagesCLI::DIR_NS;
106        }
107        if(preg_match('/\.txt$/', $entry)) {
108            return WantedPagesCLI::DIR_PAGE;
109        }
110        return WantedPagesCLI::DIR_CONTINUE;
111    }
112
113    /**
114     * Collects recursively the pages in a namespace
115     *
116     * @param string $dir
117     * @return array
118     * @throws DokuCLI_Exception
119     */
120    protected function get_pages($dir) {
121        static $trunclen = null;
122        if(!$trunclen) {
123            global $conf;
124            $trunclen = strlen($conf['datadir'] . ':');
125        }
126
127        if(!is_dir($dir)) {
128            throw new DokuCLI_Exception("Unable to read directory $dir");
129        }
130
131        $pages = array();
132        $dh = opendir($dir);
133        while(false !== ($entry = readdir($dh))) {
134            $status = $this->dir_filter($entry, $dir);
135            if($status == WantedPagesCLI::DIR_CONTINUE) {
136                continue;
137            } else if($status == WantedPagesCLI::DIR_NS) {
138                $pages = array_merge($pages, $this->get_pages($dir . '/' . $entry));
139            } else {
140                $page = array(
141                    'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
142                    'file' => $dir . '/' . $entry,
143                );
144                $pages[] = $page;
145            }
146        }
147        closedir($dh);
148        return $pages;
149    }
150
151    /**
152     * Parse instructions and add the non-existing links to the result array
153     *
154     * @param array $page array with page id and file path
155     */
156    function internal_links($page) {
157        global $conf;
158        $instructions = p_get_instructions(file_get_contents($page['file']));
159        $cns = getNS($page['id']);
160        $exists = false;
161        $pid = $page['id'];
162        foreach($instructions as $ins) {
163            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
164                $mid = $ins[1][0];
165                resolve_pageid($cns, $mid, $exists);
166                if(!$exists) {
167                    list($mid) = explode('#', $mid); //record pages without hashes
168
169                    if($this->sort == 'origin') {
170                        $this->result[$pid][] = $mid;
171                    } else {
172                        $this->result[$mid][] = $pid;
173                    }
174                }
175            }
176        }
177    }
178}
179
180// Main
181$cli = new WantedPagesCLI();
182$cli->run();
183