xref: /dokuwiki/bin/wantedpages.php (revision 16ac1f4e01580b4679a076a8f895536b1e7d712c)
1#!/usr/bin/php
2<?php
3if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/');
4define('NOSESSION', 1);
5require_once(DOKU_INC.'inc/init.php');
6
7/**
8 * Find wanted pages
9 */
10class WantedPagesCLI extends DokuCLI {
11
12    const DIR_CONTINUE = 1;
13    const DIR_NS       = 2;
14    const DIR_PAGE     = 3;
15
16    /**
17     * Register options and arguments on the given $options object
18     *
19     * @param DokuCLI_Options $options
20     * @return void
21     */
22    protected function setup(DokuCLI_Options $options) {
23        $options->setHelp(
24            'Outputs a list of wanted pages (pages which have internal links but do not yet exist).'
25        );
26        $options->registerArgument(
27            'namespace',
28            'The namespace to lookup. Defaults to root namespace',
29            false
30        );
31    }
32
33    /**
34     * Your main program
35     *
36     * Arguments and options have been parsed when this is run
37     *
38     * @param DokuCLI_Options $options
39     * @return void
40     */
41    protected function main(DokuCLI_Options $options) {
42
43        if($options->args) {
44            $startdir = dirname(wikiFN($options->args[0].':xxx'));
45        } else {
46            $startdir = dirname(wikiFN('xxx'));
47        }
48
49        $this->info("searching $startdir");
50
51        $wanted_pages = array();
52
53        foreach($this->get_pages($startdir) as $page) {
54            $wanted_pages = array_merge($wanted_pages, $this->internal_links($page));
55        }
56        $wanted_pages = array_unique($wanted_pages);
57        sort($wanted_pages);
58
59        foreach($wanted_pages as $page) {
60            print $page."\n";
61        }
62    }
63
64    protected function dir_filter($entry, $basepath) {
65        if($entry == '.' || $entry == '..') {
66            return WantedPagesCLI::DIR_CONTINUE;
67        }
68        if(is_dir($basepath.'/'.$entry)) {
69            if(strpos($entry, '_') === 0) {
70                return WantedPagesCLI::DIR_CONTINUE;
71            }
72            return WantedPagesCLI::DIR_NS;
73        }
74        if(preg_match('/\.txt$/', $entry)) {
75            return WantedPagesCLI::DIR_PAGE;
76        }
77        return WantedPagesCLI::DIR_CONTINUE;
78    }
79
80    protected function get_pages($dir) {
81        static $trunclen = null;
82        if(!$trunclen) {
83            global $conf;
84            $trunclen = strlen($conf['datadir'].':');
85        }
86
87        if(!is_dir($dir)) {
88            throw new DokuCLI_Exception("Unable to read directory $dir");
89        }
90
91        $pages = array();
92        $dh    = opendir($dir);
93        while(false !== ($entry = readdir($dh))) {
94            $status = $this->dir_filter($entry, $dir);
95            if($status == WantedPagesCLI::DIR_CONTINUE) {
96                continue;
97            } else if($status == WantedPagesCLI::DIR_NS) {
98                $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry));
99            } else {
100                $page    = array(
101                    'id'   => pathID(substr($dir.'/'.$entry, $trunclen)),
102                    'file' => $dir.'/'.$entry,
103                );
104                $pages[] = $page;
105            }
106        }
107        closedir($dh);
108        return $pages;
109    }
110
111    function internal_links($page) {
112        global $conf;
113        $instructions = p_get_instructions(file_get_contents($page['file']));
114        $links        = array();
115        $cns          = getNS($page['id']);
116        $exists       = false;
117        foreach($instructions as $ins) {
118            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
119                $mid = $ins[1][0];
120                resolve_pageid($cns, $mid, $exists);
121                if(!$exists) {
122                    list($mid) = explode('#', $mid); //record pages without hashs
123                    $links[] = $mid;
124                }
125            }
126        }
127        return $links;
128    }
129}
130
131// Main
132$cli = new WantedPagesCLI();
133$cli->run();