xref: /dokuwiki/bin/wantedpages.php (revision 64cebf712e669b1e84428bbdca2a5653751c93ed)
150b78159SElan Ruusamäe#!/usr/bin/php
21caeb00aSHarry Fuecks<?php
31caeb00aSHarry Fuecksif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
4e8bb93a5SAndreas Gohrdefine('NOSESSION', 1);
5e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php');
61caeb00aSHarry Fuecks
71c36b3d8SAndreas Gohr/**
81c36b3d8SAndreas Gohr * Find wanted pages
91c36b3d8SAndreas Gohr */
10e8bb93a5SAndreas Gohrclass WantedPagesCLI extends DokuCLI {
111caeb00aSHarry Fuecks
12e8bb93a5SAndreas Gohr    const DIR_CONTINUE = 1;
13e8bb93a5SAndreas Gohr    const DIR_NS = 2;
14e8bb93a5SAndreas Gohr    const DIR_PAGE = 3;
15*64cebf71SAndreas Gohr
16*64cebf71SAndreas Gohr    private $skip = false;
17*64cebf71SAndreas Gohr    private $sort = 'wanted';
18*64cebf71SAndreas Gohr
19*64cebf71SAndreas Gohr    private $result = array();
20*64cebf71SAndreas Gohr
21e8bb93a5SAndreas Gohr    /**
22e8bb93a5SAndreas Gohr     * Register options and arguments on the given $options object
23e8bb93a5SAndreas Gohr     *
24e8bb93a5SAndreas Gohr     * @param DokuCLI_Options $options
25e8bb93a5SAndreas Gohr     * @return void
26e8bb93a5SAndreas Gohr     */
27e8bb93a5SAndreas Gohr    protected function setup(DokuCLI_Options $options) {
28e8bb93a5SAndreas Gohr        $options->setHelp(
29*64cebf71SAndreas Gohr            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
30*64cebf71SAndreas Gohr            ' (the pages that are linkin to these missing pages).'
31e8bb93a5SAndreas Gohr        );
32e8bb93a5SAndreas Gohr        $options->registerArgument(
33e8bb93a5SAndreas Gohr            'namespace',
34e8bb93a5SAndreas Gohr            'The namespace to lookup. Defaults to root namespace',
35e8bb93a5SAndreas Gohr            false
36e8bb93a5SAndreas Gohr        );
37*64cebf71SAndreas Gohr
38*64cebf71SAndreas Gohr        $options->registerOption(
39*64cebf71SAndreas Gohr            'sort',
40*64cebf71SAndreas Gohr            'Sort by wanted or origin page',
41*64cebf71SAndreas Gohr            's',
42*64cebf71SAndreas Gohr            '(wanted|origin)'
43d63d2c63SMyron Turner        );
44d63d2c63SMyron Turner
45*64cebf71SAndreas Gohr        $options->registerOption(
46*64cebf71SAndreas Gohr            'skip',
47*64cebf71SAndreas Gohr            'Do not show the second dimension',
48*64cebf71SAndreas Gohr            'k'
49*64cebf71SAndreas Gohr        );
501caeb00aSHarry Fuecks    }
511caeb00aSHarry Fuecks
52e8bb93a5SAndreas Gohr    /**
53e8bb93a5SAndreas Gohr     * Your main program
54e8bb93a5SAndreas Gohr     *
55e8bb93a5SAndreas Gohr     * Arguments and options have been parsed when this is run
56e8bb93a5SAndreas Gohr     *
57e8bb93a5SAndreas Gohr     * @param DokuCLI_Options $options
58e8bb93a5SAndreas Gohr     * @return void
59e8bb93a5SAndreas Gohr     */
60e8bb93a5SAndreas Gohr    protected function main(DokuCLI_Options $options) {
611caeb00aSHarry Fuecks
62e8bb93a5SAndreas Gohr        if($options->args) {
63e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN($options->args[0] . ':xxx'));
64e8bb93a5SAndreas Gohr        } else {
65e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN('xxx'));
66e8bb93a5SAndreas Gohr        }
67964efa9cSMyron Turner
68*64cebf71SAndreas Gohr        $this->skip = $options->getOpt('skip');
69*64cebf71SAndreas Gohr        $this->sort = $options->getOpt('sort');
70e8bb93a5SAndreas Gohr
71e8bb93a5SAndreas Gohr        $this->info("searching $startdir");
72e8bb93a5SAndreas Gohr
73e8bb93a5SAndreas Gohr        foreach($this->get_pages($startdir) as $page) {
74*64cebf71SAndreas Gohr            $this->internal_links($page);
75e8bb93a5SAndreas Gohr        }
76*64cebf71SAndreas Gohr        ksort($this->result);
77*64cebf71SAndreas Gohr        foreach($this->result as $main => $subs) {
78*64cebf71SAndreas Gohr            if($this->skip) {
79*64cebf71SAndreas Gohr                print "$main\n";
80*64cebf71SAndreas Gohr            } else {
81*64cebf71SAndreas Gohr                $subs = array_unique($subs);
82*64cebf71SAndreas Gohr                sort($subs);
83*64cebf71SAndreas Gohr                foreach($subs as $sub) {
84*64cebf71SAndreas Gohr                    printf("%-40s %s\n", $main, $sub);
85*64cebf71SAndreas Gohr                }
86*64cebf71SAndreas Gohr            }
87e8bb93a5SAndreas Gohr        }
88e8bb93a5SAndreas Gohr    }
89e8bb93a5SAndreas Gohr
9042ea7f44SGerrit Uitslag    /**
9142ea7f44SGerrit Uitslag     * Determine directions of the search loop
9242ea7f44SGerrit Uitslag     *
9342ea7f44SGerrit Uitslag     * @param string $entry
9442ea7f44SGerrit Uitslag     * @param string $basepath
9542ea7f44SGerrit Uitslag     * @return int
9642ea7f44SGerrit Uitslag     */
97e8bb93a5SAndreas Gohr    protected function dir_filter($entry, $basepath) {
981caeb00aSHarry Fuecks        if($entry == '.' || $entry == '..') {
99e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_CONTINUE;
1001caeb00aSHarry Fuecks        }
1011caeb00aSHarry Fuecks        if(is_dir($basepath . '/' . $entry)) {
1021caeb00aSHarry Fuecks            if(strpos($entry, '_') === 0) {
103e8bb93a5SAndreas Gohr                return WantedPagesCLI::DIR_CONTINUE;
1041caeb00aSHarry Fuecks            }
105e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_NS;
1061caeb00aSHarry Fuecks        }
1071caeb00aSHarry Fuecks        if(preg_match('/\.txt$/', $entry)) {
108e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_PAGE;
1091caeb00aSHarry Fuecks        }
110e8bb93a5SAndreas Gohr        return WantedPagesCLI::DIR_CONTINUE;
1111caeb00aSHarry Fuecks    }
1121caeb00aSHarry Fuecks
11342ea7f44SGerrit Uitslag    /**
11442ea7f44SGerrit Uitslag     * Collects recursively the pages in a namespace
11542ea7f44SGerrit Uitslag     *
11642ea7f44SGerrit Uitslag     * @param string $dir
11742ea7f44SGerrit Uitslag     * @return array
11842ea7f44SGerrit Uitslag     * @throws DokuCLI_Exception
11942ea7f44SGerrit Uitslag     */
120e8bb93a5SAndreas Gohr    protected function get_pages($dir) {
1210ea51e63SMatt Perry        static $trunclen = null;
1221caeb00aSHarry Fuecks        if(!$trunclen) {
1231caeb00aSHarry Fuecks            global $conf;
1241caeb00aSHarry Fuecks            $trunclen = strlen($conf['datadir'] . ':');
1251caeb00aSHarry Fuecks        }
1261caeb00aSHarry Fuecks
1271caeb00aSHarry Fuecks        if(!is_dir($dir)) {
128e8bb93a5SAndreas Gohr            throw new DokuCLI_Exception("Unable to read directory $dir");
1291caeb00aSHarry Fuecks        }
1301caeb00aSHarry Fuecks
1311caeb00aSHarry Fuecks        $pages = array();
1321caeb00aSHarry Fuecks        $dh = opendir($dir);
13344881bd0Shenning.noren        while(false !== ($entry = readdir($dh))) {
134e8bb93a5SAndreas Gohr            $status = $this->dir_filter($entry, $dir);
135e8bb93a5SAndreas Gohr            if($status == WantedPagesCLI::DIR_CONTINUE) {
1361caeb00aSHarry Fuecks                continue;
137e8bb93a5SAndreas Gohr            } else if($status == WantedPagesCLI::DIR_NS) {
138e8bb93a5SAndreas Gohr                $pages = array_merge($pages, $this->get_pages($dir . '/' . $entry));
1391caeb00aSHarry Fuecks            } else {
1401caeb00aSHarry Fuecks                $page = array(
141840583dcSAndreas Gohr                    'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
1421caeb00aSHarry Fuecks                    'file' => $dir . '/' . $entry,
1431caeb00aSHarry Fuecks                );
1441caeb00aSHarry Fuecks                $pages[] = $page;
1451caeb00aSHarry Fuecks            }
1461caeb00aSHarry Fuecks        }
1471caeb00aSHarry Fuecks        closedir($dh);
1481caeb00aSHarry Fuecks        return $pages;
1491caeb00aSHarry Fuecks    }
1501caeb00aSHarry Fuecks
15142ea7f44SGerrit Uitslag    /**
152*64cebf71SAndreas Gohr     * Parse instructions and add the non-existing links to the result array
15342ea7f44SGerrit Uitslag     *
15442ea7f44SGerrit Uitslag     * @param array $page array with page id and file path
15542ea7f44SGerrit Uitslag     */
156e8bb93a5SAndreas Gohr    function internal_links($page) {
1571caeb00aSHarry Fuecks        global $conf;
1581caeb00aSHarry Fuecks        $instructions = p_get_instructions(file_get_contents($page['file']));
1591caeb00aSHarry Fuecks        $cns = getNS($page['id']);
16044881bd0Shenning.noren        $exists = false;
161ffe3602cSMyron Turner        $pid = $page['id'];
1621caeb00aSHarry Fuecks        foreach($instructions as $ins) {
1631caeb00aSHarry Fuecks            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
1641caeb00aSHarry Fuecks                $mid = $ins[1][0];
1651caeb00aSHarry Fuecks                resolve_pageid($cns, $mid, $exists);
1661caeb00aSHarry Fuecks                if(!$exists) {
167*64cebf71SAndreas Gohr                    list($mid) = explode('#', $mid); //record pages without hashes
168*64cebf71SAndreas Gohr
169*64cebf71SAndreas Gohr                    if($this->sort == 'origin') {
170*64cebf71SAndreas Gohr                        $this->result[$pid][] = $mid;
171*64cebf71SAndreas Gohr                    } else {
172*64cebf71SAndreas Gohr                        $this->result[$mid][] = $pid;
173d63d2c63SMyron Turner                    }
1741caeb00aSHarry Fuecks                }
1751caeb00aSHarry Fuecks            }
176*64cebf71SAndreas Gohr        }
1771caeb00aSHarry Fuecks    }
1781caeb00aSHarry Fuecks}
1791caeb00aSHarry Fuecks
180e8bb93a5SAndreas Gohr// Main
181e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI();
182e8bb93a5SAndreas Gohr$cli->run();
183