xref: /dokuwiki/bin/wantedpages.php (revision cbfa4829d9bcd40d1cc3b9220fe78fa37c385c02)
1*cbfa4829SPhy#!/usr/bin/env php
21caeb00aSHarry Fuecks<?php
3cbeaa4a0SAndreas Gohr
4cbeaa4a0SAndreas Gohruse splitbrain\phpcli\CLI;
5cbeaa4a0SAndreas Gohruse splitbrain\phpcli\Options;
6cbeaa4a0SAndreas Gohr
71caeb00aSHarry Fuecksif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
8e8bb93a5SAndreas Gohrdefine('NOSESSION', 1);
9e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php');
101caeb00aSHarry Fuecks
111c36b3d8SAndreas Gohr/**
121c36b3d8SAndreas Gohr * Find wanted pages
131c36b3d8SAndreas Gohr */
14cbeaa4a0SAndreas Gohrclass WantedPagesCLI extends CLI {
151caeb00aSHarry Fuecks
16e8bb93a5SAndreas Gohr    const DIR_CONTINUE = 1;
17e8bb93a5SAndreas Gohr    const DIR_NS = 2;
18e8bb93a5SAndreas Gohr    const DIR_PAGE = 3;
1964cebf71SAndreas Gohr
2064cebf71SAndreas Gohr    private $skip = false;
2164cebf71SAndreas Gohr    private $sort = 'wanted';
2264cebf71SAndreas Gohr
2364cebf71SAndreas Gohr    private $result = array();
2464cebf71SAndreas Gohr
25e8bb93a5SAndreas Gohr    /**
26e8bb93a5SAndreas Gohr     * Register options and arguments on the given $options object
27e8bb93a5SAndreas Gohr     *
28cbeaa4a0SAndreas Gohr     * @param Options $options
29e8bb93a5SAndreas Gohr     * @return void
30e8bb93a5SAndreas Gohr     */
31cbeaa4a0SAndreas Gohr    protected function setup(Options $options) {
32e8bb93a5SAndreas Gohr        $options->setHelp(
3364cebf71SAndreas Gohr            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
3464cebf71SAndreas Gohr            ' (the pages that are linkin to these missing pages).'
35e8bb93a5SAndreas Gohr        );
36e8bb93a5SAndreas Gohr        $options->registerArgument(
37e8bb93a5SAndreas Gohr            'namespace',
38e8bb93a5SAndreas Gohr            'The namespace to lookup. Defaults to root namespace',
39e8bb93a5SAndreas Gohr            false
40e8bb93a5SAndreas Gohr        );
4164cebf71SAndreas Gohr
4264cebf71SAndreas Gohr        $options->registerOption(
4364cebf71SAndreas Gohr            'sort',
4464cebf71SAndreas Gohr            'Sort by wanted or origin page',
4564cebf71SAndreas Gohr            's',
4664cebf71SAndreas Gohr            '(wanted|origin)'
47d63d2c63SMyron Turner        );
48d63d2c63SMyron Turner
4964cebf71SAndreas Gohr        $options->registerOption(
5064cebf71SAndreas Gohr            'skip',
5164cebf71SAndreas Gohr            'Do not show the second dimension',
5264cebf71SAndreas Gohr            'k'
5364cebf71SAndreas Gohr        );
541caeb00aSHarry Fuecks    }
551caeb00aSHarry Fuecks
56e8bb93a5SAndreas Gohr    /**
57e8bb93a5SAndreas Gohr     * Your main program
58e8bb93a5SAndreas Gohr     *
59e8bb93a5SAndreas Gohr     * Arguments and options have been parsed when this is run
60e8bb93a5SAndreas Gohr     *
61cbeaa4a0SAndreas Gohr     * @param Options $options
62e8bb93a5SAndreas Gohr     * @return void
63e8bb93a5SAndreas Gohr     */
64cbeaa4a0SAndreas Gohr    protected function main(Options $options) {
65cbeaa4a0SAndreas Gohr        $args = $options->getArgs();
66cbeaa4a0SAndreas Gohr        if($args) {
67cbeaa4a0SAndreas Gohr            $startdir = dirname(wikiFN($args[0] . ':xxx'));
68e8bb93a5SAndreas Gohr        } else {
69e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN('xxx'));
70e8bb93a5SAndreas Gohr        }
71964efa9cSMyron Turner
7264cebf71SAndreas Gohr        $this->skip = $options->getOpt('skip');
7364cebf71SAndreas Gohr        $this->sort = $options->getOpt('sort');
74e8bb93a5SAndreas Gohr
75e8bb93a5SAndreas Gohr        $this->info("searching $startdir");
76e8bb93a5SAndreas Gohr
772b2d0ba9SAndreas Gohr        foreach($this->getPages($startdir) as $page) {
782b2d0ba9SAndreas Gohr            $this->internalLinks($page);
79e8bb93a5SAndreas Gohr        }
8064cebf71SAndreas Gohr        ksort($this->result);
8164cebf71SAndreas Gohr        foreach($this->result as $main => $subs) {
8264cebf71SAndreas Gohr            if($this->skip) {
8364cebf71SAndreas Gohr                print "$main\n";
8464cebf71SAndreas Gohr            } else {
8564cebf71SAndreas Gohr                $subs = array_unique($subs);
8664cebf71SAndreas Gohr                sort($subs);
8764cebf71SAndreas Gohr                foreach($subs as $sub) {
8864cebf71SAndreas Gohr                    printf("%-40s %s\n", $main, $sub);
8964cebf71SAndreas Gohr                }
9064cebf71SAndreas Gohr            }
91e8bb93a5SAndreas Gohr        }
92e8bb93a5SAndreas Gohr    }
93e8bb93a5SAndreas Gohr
9442ea7f44SGerrit Uitslag    /**
9542ea7f44SGerrit Uitslag     * Determine directions of the search loop
9642ea7f44SGerrit Uitslag     *
9742ea7f44SGerrit Uitslag     * @param string $entry
9842ea7f44SGerrit Uitslag     * @param string $basepath
9942ea7f44SGerrit Uitslag     * @return int
10042ea7f44SGerrit Uitslag     */
1012b2d0ba9SAndreas Gohr    protected function dirFilter($entry, $basepath) {
1021caeb00aSHarry Fuecks        if($entry == '.' || $entry == '..') {
103e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_CONTINUE;
1041caeb00aSHarry Fuecks        }
1051caeb00aSHarry Fuecks        if(is_dir($basepath . '/' . $entry)) {
1061caeb00aSHarry Fuecks            if(strpos($entry, '_') === 0) {
107e8bb93a5SAndreas Gohr                return WantedPagesCLI::DIR_CONTINUE;
1081caeb00aSHarry Fuecks            }
109e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_NS;
1101caeb00aSHarry Fuecks        }
1111caeb00aSHarry Fuecks        if(preg_match('/\.txt$/', $entry)) {
112e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_PAGE;
1131caeb00aSHarry Fuecks        }
114e8bb93a5SAndreas Gohr        return WantedPagesCLI::DIR_CONTINUE;
1151caeb00aSHarry Fuecks    }
1161caeb00aSHarry Fuecks
11742ea7f44SGerrit Uitslag    /**
11842ea7f44SGerrit Uitslag     * Collects recursively the pages in a namespace
11942ea7f44SGerrit Uitslag     *
12042ea7f44SGerrit Uitslag     * @param string $dir
12142ea7f44SGerrit Uitslag     * @return array
12242ea7f44SGerrit Uitslag     * @throws DokuCLI_Exception
12342ea7f44SGerrit Uitslag     */
1242b2d0ba9SAndreas Gohr    protected function getPages($dir) {
1250ea51e63SMatt Perry        static $trunclen = null;
1261caeb00aSHarry Fuecks        if(!$trunclen) {
1271caeb00aSHarry Fuecks            global $conf;
1281caeb00aSHarry Fuecks            $trunclen = strlen($conf['datadir'] . ':');
1291caeb00aSHarry Fuecks        }
1301caeb00aSHarry Fuecks
1311caeb00aSHarry Fuecks        if(!is_dir($dir)) {
132e8bb93a5SAndreas Gohr            throw new DokuCLI_Exception("Unable to read directory $dir");
1331caeb00aSHarry Fuecks        }
1341caeb00aSHarry Fuecks
1351caeb00aSHarry Fuecks        $pages = array();
1361caeb00aSHarry Fuecks        $dh = opendir($dir);
13744881bd0Shenning.noren        while(false !== ($entry = readdir($dh))) {
1382b2d0ba9SAndreas Gohr            $status = $this->dirFilter($entry, $dir);
139e8bb93a5SAndreas Gohr            if($status == WantedPagesCLI::DIR_CONTINUE) {
1401caeb00aSHarry Fuecks                continue;
141e8bb93a5SAndreas Gohr            } else if($status == WantedPagesCLI::DIR_NS) {
1422b2d0ba9SAndreas Gohr                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
1431caeb00aSHarry Fuecks            } else {
1441caeb00aSHarry Fuecks                $page = array(
145840583dcSAndreas Gohr                    'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
1461caeb00aSHarry Fuecks                    'file' => $dir . '/' . $entry,
1471caeb00aSHarry Fuecks                );
1481caeb00aSHarry Fuecks                $pages[] = $page;
1491caeb00aSHarry Fuecks            }
1501caeb00aSHarry Fuecks        }
1511caeb00aSHarry Fuecks        closedir($dh);
1521caeb00aSHarry Fuecks        return $pages;
1531caeb00aSHarry Fuecks    }
1541caeb00aSHarry Fuecks
15542ea7f44SGerrit Uitslag    /**
15664cebf71SAndreas Gohr     * Parse instructions and add the non-existing links to the result array
15742ea7f44SGerrit Uitslag     *
15842ea7f44SGerrit Uitslag     * @param array $page array with page id and file path
15942ea7f44SGerrit Uitslag     */
1602b2d0ba9SAndreas Gohr    protected function internalLinks($page) {
1611caeb00aSHarry Fuecks        global $conf;
1621caeb00aSHarry Fuecks        $instructions = p_get_instructions(file_get_contents($page['file']));
1631caeb00aSHarry Fuecks        $cns = getNS($page['id']);
16444881bd0Shenning.noren        $exists = false;
165ffe3602cSMyron Turner        $pid = $page['id'];
1661caeb00aSHarry Fuecks        foreach($instructions as $ins) {
1671caeb00aSHarry Fuecks            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
1681caeb00aSHarry Fuecks                $mid = $ins[1][0];
1691caeb00aSHarry Fuecks                resolve_pageid($cns, $mid, $exists);
1701caeb00aSHarry Fuecks                if(!$exists) {
17164cebf71SAndreas Gohr                    list($mid) = explode('#', $mid); //record pages without hashes
17264cebf71SAndreas Gohr
17364cebf71SAndreas Gohr                    if($this->sort == 'origin') {
17464cebf71SAndreas Gohr                        $this->result[$pid][] = $mid;
17564cebf71SAndreas Gohr                    } else {
17664cebf71SAndreas Gohr                        $this->result[$mid][] = $pid;
177d63d2c63SMyron Turner                    }
1781caeb00aSHarry Fuecks                }
1791caeb00aSHarry Fuecks            }
18064cebf71SAndreas Gohr        }
1811caeb00aSHarry Fuecks    }
1821caeb00aSHarry Fuecks}
1831caeb00aSHarry Fuecks
184e8bb93a5SAndreas Gohr// Main
185e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI();
186e8bb93a5SAndreas Gohr$cli->run();
187