xref: /dokuwiki/bin/wantedpages.php (revision d63d2c63ef8ddb2c9a78727fdfe5a5d5fbd14cd0)
1#!/usr/bin/php
2<?php
3if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/');
4define('NOSESSION', 1);
5require_once(DOKU_INC.'inc/init.php');
6
7/**
8 * Find wanted pages
9 */
10class WantedPagesCLI extends DokuCLI {
11
12    const DIR_CONTINUE = 1;
13    const DIR_NS       = 2;
14    const DIR_PAGE     = 3;
15    private $show_pages = false;
16    /**
17     * Register options and arguments on the given $options object
18     *
19     * @param DokuCLI_Options $options
20     * @return void
21     */
22    protected function setup(DokuCLI_Options $options) {
23        $options->setHelp(
24            'Outputs a list of wanted pages (pages which have internal links but do not yet exist).'
25        );
26        $options->registerArgument(
27            'namespace',
28            'The namespace to lookup. Defaults to root namespace',
29            false
30        );
31            $options->registerCommand(
32            'show-pages',
33            'Show wiki pages on which broken links (i.e. wanted pages) are found, listed as: wiki_page=>broken_link'
34        );
35
36    }
37
38    /**
39     * Your main program
40     *
41     * Arguments and options have been parsed when this is run
42     *
43     * @param DokuCLI_Options $options
44     * @return void
45     */
46    protected function main(DokuCLI_Options $options) {
47        global $argc, $argv;
48
49        if($options->args) {
50            $startdir = dirname(wikiFN($options->args[0].':xxx'));
51        } else {
52            $startdir = dirname(wikiFN('xxx'));
53        }
54        if($argv[1] == 'show-pages' || $argv[2] == 'show-pages') {
55            $this->show_pages = true;
56        }
57
58        $this->info("searching $startdir");
59
60        $wanted_pages = array();
61
62        foreach($this->get_pages($startdir) as $page) {
63            $wanted_pages = array_merge($wanted_pages, $this->internal_links($page));
64        }
65        $wanted_pages = array_unique($wanted_pages);
66        sort($wanted_pages);
67
68        foreach($wanted_pages as $page) {
69            print $page."\n";
70        }
71    }
72
73    /**
74     * Determine directions of the search loop
75     *
76     * @param string $entry
77     * @param string $basepath
78     * @return int
79     */
80    protected function dir_filter($entry, $basepath) {
81        if($entry == '.' || $entry == '..') {
82            return WantedPagesCLI::DIR_CONTINUE;
83        }
84        if(is_dir($basepath.'/'.$entry)) {
85            if(strpos($entry, '_') === 0) {
86                return WantedPagesCLI::DIR_CONTINUE;
87            }
88            return WantedPagesCLI::DIR_NS;
89        }
90        if(preg_match('/\.txt$/', $entry)) {
91            return WantedPagesCLI::DIR_PAGE;
92        }
93        return WantedPagesCLI::DIR_CONTINUE;
94    }
95
96    /**
97     * Collects recursively the pages in a namespace
98     *
99     * @param string $dir
100     * @return array
101     * @throws DokuCLI_Exception
102     */
103    protected function get_pages($dir) {
104        static $trunclen = null;
105        if(!$trunclen) {
106            global $conf;
107            $trunclen = strlen($conf['datadir'].':');
108        }
109
110        if(!is_dir($dir)) {
111            throw new DokuCLI_Exception("Unable to read directory $dir");
112        }
113
114        $pages = array();
115        $dh    = opendir($dir);
116        while(false !== ($entry = readdir($dh))) {
117            $status = $this->dir_filter($entry, $dir);
118            if($status == WantedPagesCLI::DIR_CONTINUE) {
119                continue;
120            } else if($status == WantedPagesCLI::DIR_NS) {
121                $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry));
122            } else {
123                $page    = array(
124                    'id'   => pathID(substr($dir.'/'.$entry, $trunclen)),
125                    'file' => $dir.'/'.$entry,
126                );
127                $pages[] = $page;
128            }
129        }
130        closedir($dh);
131        return $pages;
132    }
133
134    /**
135     * Parse instructions and returns the non-existing links
136     *
137     * @param array $page array with page id and file path
138     * @return array
139     */
140    function internal_links($page) {
141        global $conf;
142        $instructions = p_get_instructions(file_get_contents($page['file']));
143        $links        = array();
144        $cns          = getNS($page['id']);
145        $exists       = false;
146        $pid = $page['id'];
147        foreach($instructions as $ins) {
148            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
149                $mid = $ins[1][0];
150                resolve_pageid($cns, $mid, $exists);
151                if(!$exists) {
152                    list($mid) = explode('#', $mid); //record pages without hashs
153                    if($this->show_pages) {
154                    $links[] = "$pid => $mid";
155                    }
156                    else $links[] = $mid;
157                }
158            }
159        }
160        return $links;
161    }
162}
163
164// Main
165$cli = new WantedPagesCLI();
166$cli->run();