xref: /dokuwiki/bin/wantedpages.php (revision 7c3df9b4f9d2f084b649907bf1f6c3a71454cf9d)
1#!/usr/bin/php
2<?php
3if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/');
4define('NOSESSION', 1);
5require_once(DOKU_INC.'inc/init.php');
6
7/**
8 * Find wanted pages
9 */
10class WantedPagesCLI extends DokuCLI {
11
12    const DIR_CONTINUE = 1;
13    const DIR_NS       = 2;
14    const DIR_PAGE     = 3;
15
16    /**
17     * Register options and arguments on the given $options object
18     *
19     * @param DokuCLI_Options $options
20     * @return void
21     */
22    protected function setup(DokuCLI_Options $options) {
23        $options->setHelp(
24            'Outputs a list of wanted pages (pages which have internal links but do not yet exist).'
25        );
26        $options->registerArgument(
27            'namespace',
28            'The namespace to lookup. Defaults to root namespace',
29            false
30        );
31    }
32
33    /**
34     * Your main program
35     *
36     * Arguments and options have been parsed when this is run
37     *
38     * @param DokuCLI_Options $options
39     * @return void
40     */
41    protected function main(DokuCLI_Options $options) {
42
43        if($options->args) {
44            $startdir = dirname(wikiFN($options->args[0].':xxx'));
45        } else {
46            $startdir = dirname(wikiFN('xxx'));
47        }
48
49        $this->info("searching $startdir");
50
51        $wanted_pages = array();
52
53        foreach($this->get_pages($startdir) as $page) {
54            $wanted_pages = array_merge($wanted_pages, $this->internal_links($page));
55        }
56        $wanted_pages = array_unique($wanted_pages);
57        sort($wanted_pages);
58
59        foreach($wanted_pages as $page) {
60            print $page."\n";
61        }
62    }
63
64    /**
65     * Determine directions of the search loop
66     *
67     * @param string $entry
68     * @param string $basepath
69     * @return int
70     */
71    protected function dir_filter($entry, $basepath) {
72        if($entry == '.' || $entry == '..') {
73            return WantedPagesCLI::DIR_CONTINUE;
74        }
75        if(is_dir($basepath.'/'.$entry)) {
76            if(strpos($entry, '_') === 0) {
77                return WantedPagesCLI::DIR_CONTINUE;
78            }
79            return WantedPagesCLI::DIR_NS;
80        }
81        if(preg_match('/\.txt$/', $entry)) {
82            return WantedPagesCLI::DIR_PAGE;
83        }
84        return WantedPagesCLI::DIR_CONTINUE;
85    }
86
87    /**
88     * Collects recursively the pages in a namespace
89     *
90     * @param string $dir
91     * @return array
92     * @throws DokuCLI_Exception
93     */
94    protected function get_pages($dir) {
95        static $trunclen = null;
96        if(!$trunclen) {
97            global $conf;
98            $trunclen = strlen($conf['datadir'].':');
99        }
100
101        if(!is_dir($dir)) {
102            throw new DokuCLI_Exception("Unable to read directory $dir");
103        }
104
105        $pages = array();
106        $dh    = opendir($dir);
107        while(false !== ($entry = readdir($dh))) {
108            $status = $this->dir_filter($entry, $dir);
109            if($status == WantedPagesCLI::DIR_CONTINUE) {
110                continue;
111            } else if($status == WantedPagesCLI::DIR_NS) {
112                $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry));
113            } else {
114                $page    = array(
115                    'id'   => pathID(substr($dir.'/'.$entry, $trunclen)),
116                    'file' => $dir.'/'.$entry,
117                );
118                $pages[] = $page;
119            }
120        }
121        closedir($dh);
122        return $pages;
123    }
124
125    /**
126     * Parse instructions and returns the non-existing links
127     *
128     * @param array $page array with page id and file path
129     * @return array
130     */
131    function internal_links($page) {
132        global $conf;
133        $instructions = p_get_instructions(file_get_contents($page['file']));
134        $links        = array();
135        $cns          = getNS($page['id']);
136        $exists       = false;
137        foreach($instructions as $ins) {
138            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
139                $mid = $ins[1][0];
140                resolve_pageid($cns, $mid, $exists);
141                if(!$exists) {
142                    list($mid) = explode('#', $mid); //record pages without hashs
143                    $links[] = $mid;
144                }
145            }
146        }
147        return $links;
148    }
149}
150
151// Main
152$cli = new WantedPagesCLI();
153$cli->run();