xref: /dokuwiki/bin/wantedpages.php (revision 26dfc2323f8f70cb69aac4c8c51bf7997809f2ca)
1cbfa4829SPhy#!/usr/bin/env php
21caeb00aSHarry Fuecks<?php
3cbeaa4a0SAndreas Gohr
48c6be208SAndreas Gohruse dokuwiki\Utf8\Sort;
52cd6cc0aSAndreas Gohruse dokuwiki\File\PageResolver;
6cbeaa4a0SAndreas Gohruse splitbrain\phpcli\CLI;
7cbeaa4a0SAndreas Gohruse splitbrain\phpcli\Options;
8cbeaa4a0SAndreas Gohr
9b1f206e1SAndreas Gohrif (!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/');
10e8bb93a5SAndreas Gohrdefine('NOSESSION', 1);
11e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php');
121caeb00aSHarry Fuecks
131c36b3d8SAndreas Gohr/**
141c36b3d8SAndreas Gohr * Find wanted pages
151c36b3d8SAndreas Gohr */
168c7c53b0SAndreas Gohrclass WantedPagesCLI extends CLI
178c7c53b0SAndreas Gohr{
181caeb00aSHarry Fuecks
19b1f206e1SAndreas Gohr    protected const DIR_CONTINUE = 1;
20b1f206e1SAndreas Gohr    protected const DIR_NS = 2;
21b1f206e1SAndreas Gohr    protected const DIR_PAGE = 3;
2264cebf71SAndreas Gohr
2364cebf71SAndreas Gohr    private $skip = false;
2464cebf71SAndreas Gohr    private $sort = 'wanted';
2564cebf71SAndreas Gohr
26b1f206e1SAndreas Gohr    private $result = [];
2764cebf71SAndreas Gohr
28e8bb93a5SAndreas Gohr    /**
29e8bb93a5SAndreas Gohr     * Register options and arguments on the given $options object
30e8bb93a5SAndreas Gohr     *
31cbeaa4a0SAndreas Gohr     * @param Options $options
32e8bb93a5SAndreas Gohr     * @return void
33e8bb93a5SAndreas Gohr     */
34d868eb89SAndreas Gohr    protected function setup(Options $options)
35d868eb89SAndreas Gohr    {
36e8bb93a5SAndreas Gohr        $options->setHelp(
3764cebf71SAndreas Gohr            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
3864cebf71SAndreas Gohr            ' (the pages that are linkin to these missing pages).'
39e8bb93a5SAndreas Gohr        );
40e8bb93a5SAndreas Gohr        $options->registerArgument(
41e8bb93a5SAndreas Gohr            'namespace',
42e8bb93a5SAndreas Gohr            'The namespace to lookup. Defaults to root namespace',
43e8bb93a5SAndreas Gohr            false
44e8bb93a5SAndreas Gohr        );
4564cebf71SAndreas Gohr
4664cebf71SAndreas Gohr        $options->registerOption(
4764cebf71SAndreas Gohr            'sort',
4864cebf71SAndreas Gohr            'Sort by wanted or origin page',
4964cebf71SAndreas Gohr            's',
5064cebf71SAndreas Gohr            '(wanted|origin)'
51d63d2c63SMyron Turner        );
52d63d2c63SMyron Turner
5364cebf71SAndreas Gohr        $options->registerOption(
5464cebf71SAndreas Gohr            'skip',
5564cebf71SAndreas Gohr            'Do not show the second dimension',
5664cebf71SAndreas Gohr            'k'
5764cebf71SAndreas Gohr        );
581caeb00aSHarry Fuecks    }
591caeb00aSHarry Fuecks
60e8bb93a5SAndreas Gohr    /**
61e8bb93a5SAndreas Gohr     * Your main program
62e8bb93a5SAndreas Gohr     *
63e8bb93a5SAndreas Gohr     * Arguments and options have been parsed when this is run
64e8bb93a5SAndreas Gohr     *
65cbeaa4a0SAndreas Gohr     * @param Options $options
66e8bb93a5SAndreas Gohr     * @return void
67e8bb93a5SAndreas Gohr     */
68d868eb89SAndreas Gohr    protected function main(Options $options)
69d868eb89SAndreas Gohr    {
70cbeaa4a0SAndreas Gohr        $args = $options->getArgs();
71cbeaa4a0SAndreas Gohr        if ($args) {
72cbeaa4a0SAndreas Gohr            $startdir = dirname(wikiFN($args[0] . ':xxx'));
73e8bb93a5SAndreas Gohr        } else {
74e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN('xxx'));
75e8bb93a5SAndreas Gohr        }
76964efa9cSMyron Turner
7764cebf71SAndreas Gohr        $this->skip = $options->getOpt('skip');
7864cebf71SAndreas Gohr        $this->sort = $options->getOpt('sort');
79e8bb93a5SAndreas Gohr
80e8bb93a5SAndreas Gohr        $this->info("searching $startdir");
81e8bb93a5SAndreas Gohr
822b2d0ba9SAndreas Gohr        foreach ($this->getPages($startdir) as $page) {
832b2d0ba9SAndreas Gohr            $this->internalLinks($page);
84e8bb93a5SAndreas Gohr        }
850489c64bSMoisés Braga Ribeiro        Sort::ksort($this->result);
8664cebf71SAndreas Gohr        foreach ($this->result as $main => $subs) {
8764cebf71SAndreas Gohr            if ($this->skip) {
88*26dfc232SAndreas Gohr                echo "$main\n";
8964cebf71SAndreas Gohr            } else {
9064cebf71SAndreas Gohr                $subs = array_unique($subs);
910489c64bSMoisés Braga Ribeiro                Sort::sort($subs);
9264cebf71SAndreas Gohr                foreach ($subs as $sub) {
9364cebf71SAndreas Gohr                    printf("%-40s %s\n", $main, $sub);
9464cebf71SAndreas Gohr                }
9564cebf71SAndreas Gohr            }
96e8bb93a5SAndreas Gohr        }
97e8bb93a5SAndreas Gohr    }
98e8bb93a5SAndreas Gohr
9942ea7f44SGerrit Uitslag    /**
10042ea7f44SGerrit Uitslag     * Determine directions of the search loop
10142ea7f44SGerrit Uitslag     *
10242ea7f44SGerrit Uitslag     * @param string $entry
10342ea7f44SGerrit Uitslag     * @param string $basepath
10442ea7f44SGerrit Uitslag     * @return int
10542ea7f44SGerrit Uitslag     */
106d868eb89SAndreas Gohr    protected function dirFilter($entry, $basepath)
107d868eb89SAndreas Gohr    {
1081caeb00aSHarry Fuecks        if ($entry == '.' || $entry == '..') {
109e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_CONTINUE;
1101caeb00aSHarry Fuecks        }
1111caeb00aSHarry Fuecks        if (is_dir($basepath . '/' . $entry)) {
1121caeb00aSHarry Fuecks            if (strpos($entry, '_') === 0) {
113e8bb93a5SAndreas Gohr                return WantedPagesCLI::DIR_CONTINUE;
1141caeb00aSHarry Fuecks            }
115e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_NS;
1161caeb00aSHarry Fuecks        }
1171caeb00aSHarry Fuecks        if (preg_match('/\.txt$/', $entry)) {
118e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_PAGE;
1191caeb00aSHarry Fuecks        }
120e8bb93a5SAndreas Gohr        return WantedPagesCLI::DIR_CONTINUE;
1211caeb00aSHarry Fuecks    }
1221caeb00aSHarry Fuecks
12342ea7f44SGerrit Uitslag    /**
12442ea7f44SGerrit Uitslag     * Collects recursively the pages in a namespace
12542ea7f44SGerrit Uitslag     *
12642ea7f44SGerrit Uitslag     * @param string $dir
12742ea7f44SGerrit Uitslag     * @return array
12842ea7f44SGerrit Uitslag     * @throws DokuCLI_Exception
12942ea7f44SGerrit Uitslag     */
130d868eb89SAndreas Gohr    protected function getPages($dir)
131d868eb89SAndreas Gohr    {
1320ea51e63SMatt Perry        static $trunclen = null;
1331caeb00aSHarry Fuecks        if (!$trunclen) {
1341caeb00aSHarry Fuecks            global $conf;
1351caeb00aSHarry Fuecks            $trunclen = strlen($conf['datadir'] . ':');
1361caeb00aSHarry Fuecks        }
1371caeb00aSHarry Fuecks
1381caeb00aSHarry Fuecks        if (!is_dir($dir)) {
139e8bb93a5SAndreas Gohr            throw new DokuCLI_Exception("Unable to read directory $dir");
1401caeb00aSHarry Fuecks        }
1411caeb00aSHarry Fuecks
142b1f206e1SAndreas Gohr        $pages = [];
1431caeb00aSHarry Fuecks        $dh = opendir($dir);
14444881bd0Shenning.noren        while (false !== ($entry = readdir($dh))) {
1452b2d0ba9SAndreas Gohr            $status = $this->dirFilter($entry, $dir);
146e8bb93a5SAndreas Gohr            if ($status == WantedPagesCLI::DIR_CONTINUE) {
1471caeb00aSHarry Fuecks                continue;
148e8bb93a5SAndreas Gohr            } elseif ($status == WantedPagesCLI::DIR_NS) {
1492b2d0ba9SAndreas Gohr                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
1501caeb00aSHarry Fuecks            } else {
151b1f206e1SAndreas Gohr                $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry];
1521caeb00aSHarry Fuecks                $pages[] = $page;
1531caeb00aSHarry Fuecks            }
1541caeb00aSHarry Fuecks        }
1551caeb00aSHarry Fuecks        closedir($dh);
1561caeb00aSHarry Fuecks        return $pages;
1571caeb00aSHarry Fuecks    }
1581caeb00aSHarry Fuecks
15942ea7f44SGerrit Uitslag    /**
16064cebf71SAndreas Gohr     * Parse instructions and add the non-existing links to the result array
16142ea7f44SGerrit Uitslag     *
16242ea7f44SGerrit Uitslag     * @param array $page array with page id and file path
16342ea7f44SGerrit Uitslag     */
164d868eb89SAndreas Gohr    protected function internalLinks($page)
165d868eb89SAndreas Gohr    {
1661caeb00aSHarry Fuecks        global $conf;
1671caeb00aSHarry Fuecks        $instructions = p_get_instructions(file_get_contents($page['file']));
1688c6be208SAndreas Gohr        $resolver = new PageResolver($page['id']);
169ffe3602cSMyron Turner        $pid = $page['id'];
1701caeb00aSHarry Fuecks        foreach ($instructions as $ins) {
1711caeb00aSHarry Fuecks            if ($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
1728c6be208SAndreas Gohr                $mid = $resolver->resolveId($ins[1][0]);
1738c6be208SAndreas Gohr                if (!page_exists($mid)) {
174b1f206e1SAndreas Gohr                    [$mid] = explode('#', $mid); //record pages without hashes
17564cebf71SAndreas Gohr
17664cebf71SAndreas Gohr                    if ($this->sort == 'origin') {
17764cebf71SAndreas Gohr                        $this->result[$pid][] = $mid;
17864cebf71SAndreas Gohr                    } else {
17964cebf71SAndreas Gohr                        $this->result[$mid][] = $pid;
180d63d2c63SMyron Turner                    }
1811caeb00aSHarry Fuecks                }
1821caeb00aSHarry Fuecks            }
18364cebf71SAndreas Gohr        }
1841caeb00aSHarry Fuecks    }
1851caeb00aSHarry Fuecks}
1861caeb00aSHarry Fuecks
187e8bb93a5SAndreas Gohr// Main
188e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI();
189e8bb93a5SAndreas Gohr$cli->run();
190