xref: /dokuwiki/bin/wantedpages.php (revision 8c7c53b0321a3cd3116b8d3b2ad27863a38dece7)
1cbfa4829SPhy#!/usr/bin/env php
21caeb00aSHarry Fuecks<?php
3cbeaa4a0SAndreas Gohr
48c6be208SAndreas Gohruse dokuwiki\Utf8\Sort;
52cd6cc0aSAndreas Gohruse dokuwiki\File\PageResolver;
6cbeaa4a0SAndreas Gohruse splitbrain\phpcli\CLI;
7cbeaa4a0SAndreas Gohruse splitbrain\phpcli\Options;
8cbeaa4a0SAndreas Gohr
9b1f206e1SAndreas Gohrif(!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/');
10e8bb93a5SAndreas Gohrdefine('NOSESSION', 1);
11e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php');
121caeb00aSHarry Fuecks
131c36b3d8SAndreas Gohr/**
141c36b3d8SAndreas Gohr * Find wanted pages
151c36b3d8SAndreas Gohr */
16*8c7c53b0SAndreas Gohrclass WantedPagesCLI extends CLI
17*8c7c53b0SAndreas Gohr{
181caeb00aSHarry Fuecks
19b1f206e1SAndreas Gohr    protected const DIR_CONTINUE = 1;
20b1f206e1SAndreas Gohr    protected const DIR_NS = 2;
21b1f206e1SAndreas Gohr    protected const DIR_PAGE = 3;
2264cebf71SAndreas Gohr
2364cebf71SAndreas Gohr    private $skip = false;
2464cebf71SAndreas Gohr    private $sort = 'wanted';
2564cebf71SAndreas Gohr
26b1f206e1SAndreas Gohr    private $result = [];
2764cebf71SAndreas Gohr
28e8bb93a5SAndreas Gohr    /**
29e8bb93a5SAndreas Gohr     * Register options and arguments on the given $options object
30e8bb93a5SAndreas Gohr     *
31cbeaa4a0SAndreas Gohr     * @param Options $options
32e8bb93a5SAndreas Gohr     * @return void
33e8bb93a5SAndreas Gohr     */
34cbeaa4a0SAndreas Gohr    protected function setup(Options $options) {
35e8bb93a5SAndreas Gohr        $options->setHelp(
3664cebf71SAndreas Gohr            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
3764cebf71SAndreas Gohr            ' (the pages that are linkin to these missing pages).'
38e8bb93a5SAndreas Gohr        );
39e8bb93a5SAndreas Gohr        $options->registerArgument(
40e8bb93a5SAndreas Gohr            'namespace',
41e8bb93a5SAndreas Gohr            'The namespace to lookup. Defaults to root namespace',
42e8bb93a5SAndreas Gohr            false
43e8bb93a5SAndreas Gohr        );
4464cebf71SAndreas Gohr
4564cebf71SAndreas Gohr        $options->registerOption(
4664cebf71SAndreas Gohr            'sort',
4764cebf71SAndreas Gohr            'Sort by wanted or origin page',
4864cebf71SAndreas Gohr            's',
4964cebf71SAndreas Gohr            '(wanted|origin)'
50d63d2c63SMyron Turner        );
51d63d2c63SMyron Turner
5264cebf71SAndreas Gohr        $options->registerOption(
5364cebf71SAndreas Gohr            'skip',
5464cebf71SAndreas Gohr            'Do not show the second dimension',
5564cebf71SAndreas Gohr            'k'
5664cebf71SAndreas Gohr        );
571caeb00aSHarry Fuecks    }
581caeb00aSHarry Fuecks
59e8bb93a5SAndreas Gohr    /**
60e8bb93a5SAndreas Gohr     * Your main program
61e8bb93a5SAndreas Gohr     *
62e8bb93a5SAndreas Gohr     * Arguments and options have been parsed when this is run
63e8bb93a5SAndreas Gohr     *
64cbeaa4a0SAndreas Gohr     * @param Options $options
65e8bb93a5SAndreas Gohr     * @return void
66e8bb93a5SAndreas Gohr     */
67cbeaa4a0SAndreas Gohr    protected function main(Options $options) {
68cbeaa4a0SAndreas Gohr        $args = $options->getArgs();
69cbeaa4a0SAndreas Gohr        if($args) {
70cbeaa4a0SAndreas Gohr            $startdir = dirname(wikiFN($args[0] . ':xxx'));
71e8bb93a5SAndreas Gohr        } else {
72e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN('xxx'));
73e8bb93a5SAndreas Gohr        }
74964efa9cSMyron Turner
7564cebf71SAndreas Gohr        $this->skip = $options->getOpt('skip');
7664cebf71SAndreas Gohr        $this->sort = $options->getOpt('sort');
77e8bb93a5SAndreas Gohr
78e8bb93a5SAndreas Gohr        $this->info("searching $startdir");
79e8bb93a5SAndreas Gohr
802b2d0ba9SAndreas Gohr        foreach($this->getPages($startdir) as $page) {
812b2d0ba9SAndreas Gohr            $this->internalLinks($page);
82e8bb93a5SAndreas Gohr        }
830489c64bSMoisés Braga Ribeiro        Sort::ksort($this->result);
8464cebf71SAndreas Gohr        foreach($this->result as $main => $subs) {
8564cebf71SAndreas Gohr            if($this->skip) {
8664cebf71SAndreas Gohr                print "$main\n";
8764cebf71SAndreas Gohr            } else {
8864cebf71SAndreas Gohr                $subs = array_unique($subs);
890489c64bSMoisés Braga Ribeiro                Sort::sort($subs);
9064cebf71SAndreas Gohr                foreach($subs as $sub) {
9164cebf71SAndreas Gohr                    printf("%-40s %s\n", $main, $sub);
9264cebf71SAndreas Gohr                }
9364cebf71SAndreas Gohr            }
94e8bb93a5SAndreas Gohr        }
95e8bb93a5SAndreas Gohr    }
96e8bb93a5SAndreas Gohr
9742ea7f44SGerrit Uitslag    /**
9842ea7f44SGerrit Uitslag     * Determine directions of the search loop
9942ea7f44SGerrit Uitslag     *
10042ea7f44SGerrit Uitslag     * @param string $entry
10142ea7f44SGerrit Uitslag     * @param string $basepath
10242ea7f44SGerrit Uitslag     * @return int
10342ea7f44SGerrit Uitslag     */
1042b2d0ba9SAndreas Gohr    protected function dirFilter($entry, $basepath) {
1051caeb00aSHarry Fuecks        if($entry == '.' || $entry == '..') {
106e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_CONTINUE;
1071caeb00aSHarry Fuecks        }
1081caeb00aSHarry Fuecks        if(is_dir($basepath . '/' . $entry)) {
1091caeb00aSHarry Fuecks            if(strpos($entry, '_') === 0) {
110e8bb93a5SAndreas Gohr                return WantedPagesCLI::DIR_CONTINUE;
1111caeb00aSHarry Fuecks            }
112e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_NS;
1131caeb00aSHarry Fuecks        }
1141caeb00aSHarry Fuecks        if(preg_match('/\.txt$/', $entry)) {
115e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_PAGE;
1161caeb00aSHarry Fuecks        }
117e8bb93a5SAndreas Gohr        return WantedPagesCLI::DIR_CONTINUE;
1181caeb00aSHarry Fuecks    }
1191caeb00aSHarry Fuecks
12042ea7f44SGerrit Uitslag    /**
12142ea7f44SGerrit Uitslag     * Collects recursively the pages in a namespace
12242ea7f44SGerrit Uitslag     *
12342ea7f44SGerrit Uitslag     * @param string $dir
12442ea7f44SGerrit Uitslag     * @return array
12542ea7f44SGerrit Uitslag     * @throws DokuCLI_Exception
12642ea7f44SGerrit Uitslag     */
1272b2d0ba9SAndreas Gohr    protected function getPages($dir) {
1280ea51e63SMatt Perry        static $trunclen = null;
1291caeb00aSHarry Fuecks        if(!$trunclen) {
1301caeb00aSHarry Fuecks            global $conf;
1311caeb00aSHarry Fuecks            $trunclen = strlen($conf['datadir'] . ':');
1321caeb00aSHarry Fuecks        }
1331caeb00aSHarry Fuecks
1341caeb00aSHarry Fuecks        if(!is_dir($dir)) {
135e8bb93a5SAndreas Gohr            throw new DokuCLI_Exception("Unable to read directory $dir");
1361caeb00aSHarry Fuecks        }
1371caeb00aSHarry Fuecks
138b1f206e1SAndreas Gohr        $pages = [];
1391caeb00aSHarry Fuecks        $dh = opendir($dir);
14044881bd0Shenning.noren        while(false !== ($entry = readdir($dh))) {
1412b2d0ba9SAndreas Gohr            $status = $this->dirFilter($entry, $dir);
142e8bb93a5SAndreas Gohr            if ($status == WantedPagesCLI::DIR_CONTINUE) {
1431caeb00aSHarry Fuecks                continue;
144e8bb93a5SAndreas Gohr            } elseif ($status == WantedPagesCLI::DIR_NS) {
1452b2d0ba9SAndreas Gohr                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
1461caeb00aSHarry Fuecks            } else {
147b1f206e1SAndreas Gohr                $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry];
1481caeb00aSHarry Fuecks                $pages[] = $page;
1491caeb00aSHarry Fuecks            }
1501caeb00aSHarry Fuecks        }
1511caeb00aSHarry Fuecks        closedir($dh);
1521caeb00aSHarry Fuecks        return $pages;
1531caeb00aSHarry Fuecks    }
1541caeb00aSHarry Fuecks
15542ea7f44SGerrit Uitslag    /**
15664cebf71SAndreas Gohr     * Parse instructions and add the non-existing links to the result array
15742ea7f44SGerrit Uitslag     *
15842ea7f44SGerrit Uitslag     * @param array $page array with page id and file path
15942ea7f44SGerrit Uitslag     */
1602b2d0ba9SAndreas Gohr    protected function internalLinks($page) {
1611caeb00aSHarry Fuecks        global $conf;
1621caeb00aSHarry Fuecks        $instructions = p_get_instructions(file_get_contents($page['file']));
1638c6be208SAndreas Gohr        $resolver = new PageResolver($page['id']);
164ffe3602cSMyron Turner        $pid = $page['id'];
1651caeb00aSHarry Fuecks        foreach($instructions as $ins) {
1661caeb00aSHarry Fuecks            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
1678c6be208SAndreas Gohr                $mid = $resolver->resolveId($ins[1][0]);
1688c6be208SAndreas Gohr                if(!page_exists($mid)) {
169b1f206e1SAndreas Gohr                    [$mid] = explode('#', $mid); //record pages without hashes
17064cebf71SAndreas Gohr
17164cebf71SAndreas Gohr                    if($this->sort == 'origin') {
17264cebf71SAndreas Gohr                        $this->result[$pid][] = $mid;
17364cebf71SAndreas Gohr                    } else {
17464cebf71SAndreas Gohr                        $this->result[$mid][] = $pid;
175d63d2c63SMyron Turner                    }
1761caeb00aSHarry Fuecks                }
1771caeb00aSHarry Fuecks            }
17864cebf71SAndreas Gohr        }
1791caeb00aSHarry Fuecks    }
1801caeb00aSHarry Fuecks}
1811caeb00aSHarry Fuecks
182e8bb93a5SAndreas Gohr// Main
183e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI();
184e8bb93a5SAndreas Gohr$cli->run();
185