xref: /dokuwiki/bin/wantedpages.php (revision 0489c64b7de1b71fdd124114dd18525156f26327)
150b78159SElan Ruusamäe#!/usr/bin/php
21caeb00aSHarry Fuecks<?php
3cbeaa4a0SAndreas Gohr
4cbeaa4a0SAndreas Gohruse splitbrain\phpcli\CLI;
5cbeaa4a0SAndreas Gohruse splitbrain\phpcli\Options;
6*0489c64bSMoisés Braga Ribeirouse dokuwiki\Utf8\Sort;
7cbeaa4a0SAndreas Gohr
81caeb00aSHarry Fuecksif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
9e8bb93a5SAndreas Gohrdefine('NOSESSION', 1);
10e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php');
111caeb00aSHarry Fuecks
121c36b3d8SAndreas Gohr/**
131c36b3d8SAndreas Gohr * Find wanted pages
141c36b3d8SAndreas Gohr */
15cbeaa4a0SAndreas Gohrclass WantedPagesCLI extends CLI {
161caeb00aSHarry Fuecks
17e8bb93a5SAndreas Gohr    const DIR_CONTINUE = 1;
18e8bb93a5SAndreas Gohr    const DIR_NS = 2;
19e8bb93a5SAndreas Gohr    const DIR_PAGE = 3;
2064cebf71SAndreas Gohr
2164cebf71SAndreas Gohr    private $skip = false;
2264cebf71SAndreas Gohr    private $sort = 'wanted';
2364cebf71SAndreas Gohr
2464cebf71SAndreas Gohr    private $result = array();
2564cebf71SAndreas Gohr
26e8bb93a5SAndreas Gohr    /**
27e8bb93a5SAndreas Gohr     * Register options and arguments on the given $options object
28e8bb93a5SAndreas Gohr     *
29cbeaa4a0SAndreas Gohr     * @param Options $options
30e8bb93a5SAndreas Gohr     * @return void
31e8bb93a5SAndreas Gohr     */
32cbeaa4a0SAndreas Gohr    protected function setup(Options $options) {
33e8bb93a5SAndreas Gohr        $options->setHelp(
3464cebf71SAndreas Gohr            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
3564cebf71SAndreas Gohr            ' (the pages that are linkin to these missing pages).'
36e8bb93a5SAndreas Gohr        );
37e8bb93a5SAndreas Gohr        $options->registerArgument(
38e8bb93a5SAndreas Gohr            'namespace',
39e8bb93a5SAndreas Gohr            'The namespace to lookup. Defaults to root namespace',
40e8bb93a5SAndreas Gohr            false
41e8bb93a5SAndreas Gohr        );
4264cebf71SAndreas Gohr
4364cebf71SAndreas Gohr        $options->registerOption(
4464cebf71SAndreas Gohr            'sort',
4564cebf71SAndreas Gohr            'Sort by wanted or origin page',
4664cebf71SAndreas Gohr            's',
4764cebf71SAndreas Gohr            '(wanted|origin)'
48d63d2c63SMyron Turner        );
49d63d2c63SMyron Turner
5064cebf71SAndreas Gohr        $options->registerOption(
5164cebf71SAndreas Gohr            'skip',
5264cebf71SAndreas Gohr            'Do not show the second dimension',
5364cebf71SAndreas Gohr            'k'
5464cebf71SAndreas Gohr        );
551caeb00aSHarry Fuecks    }
561caeb00aSHarry Fuecks
57e8bb93a5SAndreas Gohr    /**
58e8bb93a5SAndreas Gohr     * Your main program
59e8bb93a5SAndreas Gohr     *
60e8bb93a5SAndreas Gohr     * Arguments and options have been parsed when this is run
61e8bb93a5SAndreas Gohr     *
62cbeaa4a0SAndreas Gohr     * @param Options $options
63e8bb93a5SAndreas Gohr     * @return void
64e8bb93a5SAndreas Gohr     */
65cbeaa4a0SAndreas Gohr    protected function main(Options $options) {
66cbeaa4a0SAndreas Gohr        $args = $options->getArgs();
67cbeaa4a0SAndreas Gohr        if($args) {
68cbeaa4a0SAndreas Gohr            $startdir = dirname(wikiFN($args[0] . ':xxx'));
69e8bb93a5SAndreas Gohr        } else {
70e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN('xxx'));
71e8bb93a5SAndreas Gohr        }
72964efa9cSMyron Turner
7364cebf71SAndreas Gohr        $this->skip = $options->getOpt('skip');
7464cebf71SAndreas Gohr        $this->sort = $options->getOpt('sort');
75e8bb93a5SAndreas Gohr
76e8bb93a5SAndreas Gohr        $this->info("searching $startdir");
77e8bb93a5SAndreas Gohr
782b2d0ba9SAndreas Gohr        foreach($this->getPages($startdir) as $page) {
792b2d0ba9SAndreas Gohr            $this->internalLinks($page);
80e8bb93a5SAndreas Gohr        }
81*0489c64bSMoisés Braga Ribeiro        Sort::ksort($this->result);
8264cebf71SAndreas Gohr        foreach($this->result as $main => $subs) {
8364cebf71SAndreas Gohr            if($this->skip) {
8464cebf71SAndreas Gohr                print "$main\n";
8564cebf71SAndreas Gohr            } else {
8664cebf71SAndreas Gohr                $subs = array_unique($subs);
87*0489c64bSMoisés Braga Ribeiro                Sort::sort($subs);
8864cebf71SAndreas Gohr                foreach($subs as $sub) {
8964cebf71SAndreas Gohr                    printf("%-40s %s\n", $main, $sub);
9064cebf71SAndreas Gohr                }
9164cebf71SAndreas Gohr            }
92e8bb93a5SAndreas Gohr        }
93e8bb93a5SAndreas Gohr    }
94e8bb93a5SAndreas Gohr
9542ea7f44SGerrit Uitslag    /**
9642ea7f44SGerrit Uitslag     * Determine directions of the search loop
9742ea7f44SGerrit Uitslag     *
9842ea7f44SGerrit Uitslag     * @param string $entry
9942ea7f44SGerrit Uitslag     * @param string $basepath
10042ea7f44SGerrit Uitslag     * @return int
10142ea7f44SGerrit Uitslag     */
1022b2d0ba9SAndreas Gohr    protected function dirFilter($entry, $basepath) {
1031caeb00aSHarry Fuecks        if($entry == '.' || $entry == '..') {
104e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_CONTINUE;
1051caeb00aSHarry Fuecks        }
1061caeb00aSHarry Fuecks        if(is_dir($basepath . '/' . $entry)) {
1071caeb00aSHarry Fuecks            if(strpos($entry, '_') === 0) {
108e8bb93a5SAndreas Gohr                return WantedPagesCLI::DIR_CONTINUE;
1091caeb00aSHarry Fuecks            }
110e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_NS;
1111caeb00aSHarry Fuecks        }
1121caeb00aSHarry Fuecks        if(preg_match('/\.txt$/', $entry)) {
113e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_PAGE;
1141caeb00aSHarry Fuecks        }
115e8bb93a5SAndreas Gohr        return WantedPagesCLI::DIR_CONTINUE;
1161caeb00aSHarry Fuecks    }
1171caeb00aSHarry Fuecks
11842ea7f44SGerrit Uitslag    /**
11942ea7f44SGerrit Uitslag     * Collects recursively the pages in a namespace
12042ea7f44SGerrit Uitslag     *
12142ea7f44SGerrit Uitslag     * @param string $dir
12242ea7f44SGerrit Uitslag     * @return array
12342ea7f44SGerrit Uitslag     * @throws DokuCLI_Exception
12442ea7f44SGerrit Uitslag     */
1252b2d0ba9SAndreas Gohr    protected function getPages($dir) {
1260ea51e63SMatt Perry        static $trunclen = null;
1271caeb00aSHarry Fuecks        if(!$trunclen) {
1281caeb00aSHarry Fuecks            global $conf;
1291caeb00aSHarry Fuecks            $trunclen = strlen($conf['datadir'] . ':');
1301caeb00aSHarry Fuecks        }
1311caeb00aSHarry Fuecks
1321caeb00aSHarry Fuecks        if(!is_dir($dir)) {
133e8bb93a5SAndreas Gohr            throw new DokuCLI_Exception("Unable to read directory $dir");
1341caeb00aSHarry Fuecks        }
1351caeb00aSHarry Fuecks
1361caeb00aSHarry Fuecks        $pages = array();
1371caeb00aSHarry Fuecks        $dh = opendir($dir);
13844881bd0Shenning.noren        while(false !== ($entry = readdir($dh))) {
1392b2d0ba9SAndreas Gohr            $status = $this->dirFilter($entry, $dir);
140e8bb93a5SAndreas Gohr            if($status == WantedPagesCLI::DIR_CONTINUE) {
1411caeb00aSHarry Fuecks                continue;
142e8bb93a5SAndreas Gohr            } else if($status == WantedPagesCLI::DIR_NS) {
1432b2d0ba9SAndreas Gohr                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
1441caeb00aSHarry Fuecks            } else {
1451caeb00aSHarry Fuecks                $page = array(
146840583dcSAndreas Gohr                    'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
1471caeb00aSHarry Fuecks                    'file' => $dir . '/' . $entry,
1481caeb00aSHarry Fuecks                );
1491caeb00aSHarry Fuecks                $pages[] = $page;
1501caeb00aSHarry Fuecks            }
1511caeb00aSHarry Fuecks        }
1521caeb00aSHarry Fuecks        closedir($dh);
1531caeb00aSHarry Fuecks        return $pages;
1541caeb00aSHarry Fuecks    }
1551caeb00aSHarry Fuecks
15642ea7f44SGerrit Uitslag    /**
15764cebf71SAndreas Gohr     * Parse instructions and add the non-existing links to the result array
15842ea7f44SGerrit Uitslag     *
15942ea7f44SGerrit Uitslag     * @param array $page array with page id and file path
16042ea7f44SGerrit Uitslag     */
1612b2d0ba9SAndreas Gohr    protected function internalLinks($page) {
1621caeb00aSHarry Fuecks        global $conf;
1631caeb00aSHarry Fuecks        $instructions = p_get_instructions(file_get_contents($page['file']));
1641caeb00aSHarry Fuecks        $cns = getNS($page['id']);
16544881bd0Shenning.noren        $exists = false;
166ffe3602cSMyron Turner        $pid = $page['id'];
1671caeb00aSHarry Fuecks        foreach($instructions as $ins) {
1681caeb00aSHarry Fuecks            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
1691caeb00aSHarry Fuecks                $mid = $ins[1][0];
1701caeb00aSHarry Fuecks                resolve_pageid($cns, $mid, $exists);
1711caeb00aSHarry Fuecks                if(!$exists) {
17264cebf71SAndreas Gohr                    list($mid) = explode('#', $mid); //record pages without hashes
17364cebf71SAndreas Gohr
17464cebf71SAndreas Gohr                    if($this->sort == 'origin') {
17564cebf71SAndreas Gohr                        $this->result[$pid][] = $mid;
17664cebf71SAndreas Gohr                    } else {
17764cebf71SAndreas Gohr                        $this->result[$mid][] = $pid;
178d63d2c63SMyron Turner                    }
1791caeb00aSHarry Fuecks                }
1801caeb00aSHarry Fuecks            }
18164cebf71SAndreas Gohr        }
1821caeb00aSHarry Fuecks    }
1831caeb00aSHarry Fuecks}
1841caeb00aSHarry Fuecks
185e8bb93a5SAndreas Gohr// Main
186e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI();
187e8bb93a5SAndreas Gohr$cli->run();
188