xref: /dokuwiki/bin/wantedpages.php (revision d4f83172d9533c4d84f450fe22ef630816b21d75)
1cbfa4829SPhy#!/usr/bin/env php
21caeb00aSHarry Fuecks<?php
3cbeaa4a0SAndreas Gohr
48c6be208SAndreas Gohruse dokuwiki\Utf8\Sort;
52cd6cc0aSAndreas Gohruse dokuwiki\File\PageResolver;
6cbeaa4a0SAndreas Gohruse splitbrain\phpcli\CLI;
7cbeaa4a0SAndreas Gohruse splitbrain\phpcli\Options;
8cbeaa4a0SAndreas Gohr
9b1f206e1SAndreas Gohrif (!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/');
10e8bb93a5SAndreas Gohrdefine('NOSESSION', 1);
11e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php');
121caeb00aSHarry Fuecks
131c36b3d8SAndreas Gohr/**
141c36b3d8SAndreas Gohr * Find wanted pages
151c36b3d8SAndreas Gohr */
168c7c53b0SAndreas Gohrclass WantedPagesCLI extends CLI
178c7c53b0SAndreas Gohr{
18b1f206e1SAndreas Gohr    protected const DIR_CONTINUE = 1;
19b1f206e1SAndreas Gohr    protected const DIR_NS = 2;
20b1f206e1SAndreas Gohr    protected const DIR_PAGE = 3;
2164cebf71SAndreas Gohr
2264cebf71SAndreas Gohr    private $skip = false;
2364cebf71SAndreas Gohr    private $sort = 'wanted';
2464cebf71SAndreas Gohr
25b1f206e1SAndreas Gohr    private $result = [];
2664cebf71SAndreas Gohr
27e8bb93a5SAndreas Gohr    /**
28e8bb93a5SAndreas Gohr     * Register options and arguments on the given $options object
29e8bb93a5SAndreas Gohr     *
30cbeaa4a0SAndreas Gohr     * @param Options $options
31e8bb93a5SAndreas Gohr     * @return void
32e8bb93a5SAndreas Gohr     */
33d868eb89SAndreas Gohr    protected function setup(Options $options)
34d868eb89SAndreas Gohr    {
35e8bb93a5SAndreas Gohr        $options->setHelp(
3664cebf71SAndreas Gohr            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
3764cebf71SAndreas Gohr            ' (the pages that are linkin to these missing pages).'
38e8bb93a5SAndreas Gohr        );
39e8bb93a5SAndreas Gohr        $options->registerArgument(
40e8bb93a5SAndreas Gohr            'namespace',
41e8bb93a5SAndreas Gohr            'The namespace to lookup. Defaults to root namespace',
42e8bb93a5SAndreas Gohr            false
43e8bb93a5SAndreas Gohr        );
4464cebf71SAndreas Gohr
4564cebf71SAndreas Gohr        $options->registerOption(
4664cebf71SAndreas Gohr            'sort',
4764cebf71SAndreas Gohr            'Sort by wanted or origin page',
4864cebf71SAndreas Gohr            's',
4964cebf71SAndreas Gohr            '(wanted|origin)'
50d63d2c63SMyron Turner        );
51d63d2c63SMyron Turner
5264cebf71SAndreas Gohr        $options->registerOption(
5364cebf71SAndreas Gohr            'skip',
5464cebf71SAndreas Gohr            'Do not show the second dimension',
5564cebf71SAndreas Gohr            'k'
5664cebf71SAndreas Gohr        );
571caeb00aSHarry Fuecks    }
581caeb00aSHarry Fuecks
59e8bb93a5SAndreas Gohr    /**
60e8bb93a5SAndreas Gohr     * Your main program
61e8bb93a5SAndreas Gohr     *
62e8bb93a5SAndreas Gohr     * Arguments and options have been parsed when this is run
63e8bb93a5SAndreas Gohr     *
64cbeaa4a0SAndreas Gohr     * @param Options $options
65e8bb93a5SAndreas Gohr     * @return void
66e8bb93a5SAndreas Gohr     */
67d868eb89SAndreas Gohr    protected function main(Options $options)
68d868eb89SAndreas Gohr    {
69cbeaa4a0SAndreas Gohr        $args = $options->getArgs();
70cbeaa4a0SAndreas Gohr        if ($args) {
71cbeaa4a0SAndreas Gohr            $startdir = dirname(wikiFN($args[0] . ':xxx'));
72e8bb93a5SAndreas Gohr        } else {
73e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN('xxx'));
74e8bb93a5SAndreas Gohr        }
75964efa9cSMyron Turner
7664cebf71SAndreas Gohr        $this->skip = $options->getOpt('skip');
7764cebf71SAndreas Gohr        $this->sort = $options->getOpt('sort');
78e8bb93a5SAndreas Gohr
79e8bb93a5SAndreas Gohr        $this->info("searching $startdir");
80e8bb93a5SAndreas Gohr
812b2d0ba9SAndreas Gohr        foreach ($this->getPages($startdir) as $page) {
822b2d0ba9SAndreas Gohr            $this->internalLinks($page);
83e8bb93a5SAndreas Gohr        }
840489c64bSMoisés Braga Ribeiro        Sort::ksort($this->result);
8564cebf71SAndreas Gohr        foreach ($this->result as $main => $subs) {
8664cebf71SAndreas Gohr            if ($this->skip) {
87*26dfc232SAndreas Gohr                echo "$main\n";
8864cebf71SAndreas Gohr            } else {
8964cebf71SAndreas Gohr                $subs = array_unique($subs);
900489c64bSMoisés Braga Ribeiro                Sort::sort($subs);
9164cebf71SAndreas Gohr                foreach ($subs as $sub) {
9264cebf71SAndreas Gohr                    printf("%-40s %s\n", $main, $sub);
9364cebf71SAndreas Gohr                }
9464cebf71SAndreas Gohr            }
95e8bb93a5SAndreas Gohr        }
96e8bb93a5SAndreas Gohr    }
97e8bb93a5SAndreas Gohr
9842ea7f44SGerrit Uitslag    /**
9942ea7f44SGerrit Uitslag     * Determine directions of the search loop
10042ea7f44SGerrit Uitslag     *
10142ea7f44SGerrit Uitslag     * @param string $entry
10242ea7f44SGerrit Uitslag     * @param string $basepath
10342ea7f44SGerrit Uitslag     * @return int
10442ea7f44SGerrit Uitslag     */
105d868eb89SAndreas Gohr    protected function dirFilter($entry, $basepath)
106d868eb89SAndreas Gohr    {
1071caeb00aSHarry Fuecks        if ($entry == '.' || $entry == '..') {
108e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_CONTINUE;
1091caeb00aSHarry Fuecks        }
1101caeb00aSHarry Fuecks        if (is_dir($basepath . '/' . $entry)) {
1111caeb00aSHarry Fuecks            if (strpos($entry, '_') === 0) {
112e8bb93a5SAndreas Gohr                return WantedPagesCLI::DIR_CONTINUE;
1131caeb00aSHarry Fuecks            }
114e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_NS;
1151caeb00aSHarry Fuecks        }
1161caeb00aSHarry Fuecks        if (preg_match('/\.txt$/', $entry)) {
117e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_PAGE;
1181caeb00aSHarry Fuecks        }
119e8bb93a5SAndreas Gohr        return WantedPagesCLI::DIR_CONTINUE;
1201caeb00aSHarry Fuecks    }
1211caeb00aSHarry Fuecks
12242ea7f44SGerrit Uitslag    /**
12342ea7f44SGerrit Uitslag     * Collects recursively the pages in a namespace
12442ea7f44SGerrit Uitslag     *
12542ea7f44SGerrit Uitslag     * @param string $dir
12642ea7f44SGerrit Uitslag     * @return array
12742ea7f44SGerrit Uitslag     * @throws DokuCLI_Exception
12842ea7f44SGerrit Uitslag     */
129d868eb89SAndreas Gohr    protected function getPages($dir)
130d868eb89SAndreas Gohr    {
1310ea51e63SMatt Perry        static $trunclen = null;
1321caeb00aSHarry Fuecks        if (!$trunclen) {
1331caeb00aSHarry Fuecks            global $conf;
1341caeb00aSHarry Fuecks            $trunclen = strlen($conf['datadir'] . ':');
1351caeb00aSHarry Fuecks        }
1361caeb00aSHarry Fuecks
1371caeb00aSHarry Fuecks        if (!is_dir($dir)) {
138e8bb93a5SAndreas Gohr            throw new DokuCLI_Exception("Unable to read directory $dir");
1391caeb00aSHarry Fuecks        }
1401caeb00aSHarry Fuecks
141b1f206e1SAndreas Gohr        $pages = [];
1421caeb00aSHarry Fuecks        $dh = opendir($dir);
14344881bd0Shenning.noren        while (false !== ($entry = readdir($dh))) {
1442b2d0ba9SAndreas Gohr            $status = $this->dirFilter($entry, $dir);
145e8bb93a5SAndreas Gohr            if ($status == WantedPagesCLI::DIR_CONTINUE) {
1461caeb00aSHarry Fuecks                continue;
147e8bb93a5SAndreas Gohr            } elseif ($status == WantedPagesCLI::DIR_NS) {
1482b2d0ba9SAndreas Gohr                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
1491caeb00aSHarry Fuecks            } else {
150b1f206e1SAndreas Gohr                $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry];
1511caeb00aSHarry Fuecks                $pages[] = $page;
1521caeb00aSHarry Fuecks            }
1531caeb00aSHarry Fuecks        }
1541caeb00aSHarry Fuecks        closedir($dh);
1551caeb00aSHarry Fuecks        return $pages;
1561caeb00aSHarry Fuecks    }
1571caeb00aSHarry Fuecks
15842ea7f44SGerrit Uitslag    /**
15964cebf71SAndreas Gohr     * Parse instructions and add the non-existing links to the result array
16042ea7f44SGerrit Uitslag     *
16142ea7f44SGerrit Uitslag     * @param array $page array with page id and file path
16242ea7f44SGerrit Uitslag     */
163d868eb89SAndreas Gohr    protected function internalLinks($page)
164d868eb89SAndreas Gohr    {
1651caeb00aSHarry Fuecks        global $conf;
1661caeb00aSHarry Fuecks        $instructions = p_get_instructions(file_get_contents($page['file']));
1678c6be208SAndreas Gohr        $resolver = new PageResolver($page['id']);
168ffe3602cSMyron Turner        $pid = $page['id'];
1691caeb00aSHarry Fuecks        foreach ($instructions as $ins) {
1701caeb00aSHarry Fuecks            if ($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
1718c6be208SAndreas Gohr                $mid = $resolver->resolveId($ins[1][0]);
1728c6be208SAndreas Gohr                if (!page_exists($mid)) {
173b1f206e1SAndreas Gohr                    [$mid] = explode('#', $mid); //record pages without hashes
17464cebf71SAndreas Gohr
17564cebf71SAndreas Gohr                    if ($this->sort == 'origin') {
17664cebf71SAndreas Gohr                        $this->result[$pid][] = $mid;
17764cebf71SAndreas Gohr                    } else {
17864cebf71SAndreas Gohr                        $this->result[$mid][] = $pid;
179d63d2c63SMyron Turner                    }
1801caeb00aSHarry Fuecks                }
1811caeb00aSHarry Fuecks            }
18264cebf71SAndreas Gohr        }
1831caeb00aSHarry Fuecks    }
1841caeb00aSHarry Fuecks}
1851caeb00aSHarry Fuecks
186e8bb93a5SAndreas Gohr// Main
187e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI();
188e8bb93a5SAndreas Gohr$cli->run();
189