xref: /dokuwiki/bin/wantedpages.php (revision b1f206e1d439d693686c99955ab305e7ca94e760)
1cbfa4829SPhy#!/usr/bin/env php
21caeb00aSHarry Fuecks<?php
3cbeaa4a0SAndreas Gohr
48c6be208SAndreas Gohruse dokuwiki\Utf8\Sort;
52cd6cc0aSAndreas Gohruse dokuwiki\File\PageResolver;
6cbeaa4a0SAndreas Gohruse splitbrain\phpcli\CLI;
7cbeaa4a0SAndreas Gohruse splitbrain\phpcli\Options;
8cbeaa4a0SAndreas Gohr
9*b1f206e1SAndreas Gohrif(!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/');
10e8bb93a5SAndreas Gohrdefine('NOSESSION', 1);
11e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php');
121caeb00aSHarry Fuecks
131c36b3d8SAndreas Gohr/**
141c36b3d8SAndreas Gohr * Find wanted pages
151c36b3d8SAndreas Gohr */
16cbeaa4a0SAndreas Gohrclass WantedPagesCLI extends CLI {
171caeb00aSHarry Fuecks
18*b1f206e1SAndreas Gohr    protected const DIR_CONTINUE = 1;
19*b1f206e1SAndreas Gohr    protected const DIR_NS = 2;
20*b1f206e1SAndreas Gohr    protected const DIR_PAGE = 3;
2164cebf71SAndreas Gohr
2264cebf71SAndreas Gohr    private $skip = false;
2364cebf71SAndreas Gohr    private $sort = 'wanted';
2464cebf71SAndreas Gohr
25*b1f206e1SAndreas Gohr    private $result = [];
2664cebf71SAndreas Gohr
27e8bb93a5SAndreas Gohr    /**
28e8bb93a5SAndreas Gohr     * Register options and arguments on the given $options object
29e8bb93a5SAndreas Gohr     *
30cbeaa4a0SAndreas Gohr     * @param Options $options
31e8bb93a5SAndreas Gohr     * @return void
32e8bb93a5SAndreas Gohr     */
33cbeaa4a0SAndreas Gohr    protected function setup(Options $options) {
34e8bb93a5SAndreas Gohr        $options->setHelp(
3564cebf71SAndreas Gohr            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
3664cebf71SAndreas Gohr            ' (the pages that are linkin to these missing pages).'
37e8bb93a5SAndreas Gohr        );
38e8bb93a5SAndreas Gohr        $options->registerArgument(
39e8bb93a5SAndreas Gohr            'namespace',
40e8bb93a5SAndreas Gohr            'The namespace to lookup. Defaults to root namespace',
41e8bb93a5SAndreas Gohr            false
42e8bb93a5SAndreas Gohr        );
4364cebf71SAndreas Gohr
4464cebf71SAndreas Gohr        $options->registerOption(
4564cebf71SAndreas Gohr            'sort',
4664cebf71SAndreas Gohr            'Sort by wanted or origin page',
4764cebf71SAndreas Gohr            's',
4864cebf71SAndreas Gohr            '(wanted|origin)'
49d63d2c63SMyron Turner        );
50d63d2c63SMyron Turner
5164cebf71SAndreas Gohr        $options->registerOption(
5264cebf71SAndreas Gohr            'skip',
5364cebf71SAndreas Gohr            'Do not show the second dimension',
5464cebf71SAndreas Gohr            'k'
5564cebf71SAndreas Gohr        );
561caeb00aSHarry Fuecks    }
571caeb00aSHarry Fuecks
58e8bb93a5SAndreas Gohr    /**
59e8bb93a5SAndreas Gohr     * Your main program
60e8bb93a5SAndreas Gohr     *
61e8bb93a5SAndreas Gohr     * Arguments and options have been parsed when this is run
62e8bb93a5SAndreas Gohr     *
63cbeaa4a0SAndreas Gohr     * @param Options $options
64e8bb93a5SAndreas Gohr     * @return void
65e8bb93a5SAndreas Gohr     */
66cbeaa4a0SAndreas Gohr    protected function main(Options $options) {
67cbeaa4a0SAndreas Gohr        $args = $options->getArgs();
68cbeaa4a0SAndreas Gohr        if($args) {
69cbeaa4a0SAndreas Gohr            $startdir = dirname(wikiFN($args[0] . ':xxx'));
70e8bb93a5SAndreas Gohr        } else {
71e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN('xxx'));
72e8bb93a5SAndreas Gohr        }
73964efa9cSMyron Turner
7464cebf71SAndreas Gohr        $this->skip = $options->getOpt('skip');
7564cebf71SAndreas Gohr        $this->sort = $options->getOpt('sort');
76e8bb93a5SAndreas Gohr
77e8bb93a5SAndreas Gohr        $this->info("searching $startdir");
78e8bb93a5SAndreas Gohr
792b2d0ba9SAndreas Gohr        foreach($this->getPages($startdir) as $page) {
802b2d0ba9SAndreas Gohr            $this->internalLinks($page);
81e8bb93a5SAndreas Gohr        }
820489c64bSMoisés Braga Ribeiro        Sort::ksort($this->result);
8364cebf71SAndreas Gohr        foreach($this->result as $main => $subs) {
8464cebf71SAndreas Gohr            if($this->skip) {
8564cebf71SAndreas Gohr                print "$main\n";
8664cebf71SAndreas Gohr            } else {
8764cebf71SAndreas Gohr                $subs = array_unique($subs);
880489c64bSMoisés Braga Ribeiro                Sort::sort($subs);
8964cebf71SAndreas Gohr                foreach($subs as $sub) {
9064cebf71SAndreas Gohr                    printf("%-40s %s\n", $main, $sub);
9164cebf71SAndreas Gohr                }
9264cebf71SAndreas Gohr            }
93e8bb93a5SAndreas Gohr        }
94e8bb93a5SAndreas Gohr    }
95e8bb93a5SAndreas Gohr
9642ea7f44SGerrit Uitslag    /**
9742ea7f44SGerrit Uitslag     * Determine directions of the search loop
9842ea7f44SGerrit Uitslag     *
9942ea7f44SGerrit Uitslag     * @param string $entry
10042ea7f44SGerrit Uitslag     * @param string $basepath
10142ea7f44SGerrit Uitslag     * @return int
10242ea7f44SGerrit Uitslag     */
1032b2d0ba9SAndreas Gohr    protected function dirFilter($entry, $basepath) {
1041caeb00aSHarry Fuecks        if($entry == '.' || $entry == '..') {
105e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_CONTINUE;
1061caeb00aSHarry Fuecks        }
1071caeb00aSHarry Fuecks        if(is_dir($basepath . '/' . $entry)) {
1081caeb00aSHarry Fuecks            if(strpos($entry, '_') === 0) {
109e8bb93a5SAndreas Gohr                return WantedPagesCLI::DIR_CONTINUE;
1101caeb00aSHarry Fuecks            }
111e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_NS;
1121caeb00aSHarry Fuecks        }
1131caeb00aSHarry Fuecks        if(preg_match('/\.txt$/', $entry)) {
114e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_PAGE;
1151caeb00aSHarry Fuecks        }
116e8bb93a5SAndreas Gohr        return WantedPagesCLI::DIR_CONTINUE;
1171caeb00aSHarry Fuecks    }
1181caeb00aSHarry Fuecks
11942ea7f44SGerrit Uitslag    /**
12042ea7f44SGerrit Uitslag     * Collects recursively the pages in a namespace
12142ea7f44SGerrit Uitslag     *
12242ea7f44SGerrit Uitslag     * @param string $dir
12342ea7f44SGerrit Uitslag     * @return array
12442ea7f44SGerrit Uitslag     * @throws DokuCLI_Exception
12542ea7f44SGerrit Uitslag     */
1262b2d0ba9SAndreas Gohr    protected function getPages($dir) {
1270ea51e63SMatt Perry        static $trunclen = null;
1281caeb00aSHarry Fuecks        if(!$trunclen) {
1291caeb00aSHarry Fuecks            global $conf;
1301caeb00aSHarry Fuecks            $trunclen = strlen($conf['datadir'] . ':');
1311caeb00aSHarry Fuecks        }
1321caeb00aSHarry Fuecks
1331caeb00aSHarry Fuecks        if(!is_dir($dir)) {
134e8bb93a5SAndreas Gohr            throw new DokuCLI_Exception("Unable to read directory $dir");
1351caeb00aSHarry Fuecks        }
1361caeb00aSHarry Fuecks
137*b1f206e1SAndreas Gohr        $pages = [];
1381caeb00aSHarry Fuecks        $dh = opendir($dir);
13944881bd0Shenning.noren        while(false !== ($entry = readdir($dh))) {
1402b2d0ba9SAndreas Gohr            $status = $this->dirFilter($entry, $dir);
141e8bb93a5SAndreas Gohr            if ($status == WantedPagesCLI::DIR_CONTINUE) {
1421caeb00aSHarry Fuecks                continue;
143e8bb93a5SAndreas Gohr            } elseif ($status == WantedPagesCLI::DIR_NS) {
1442b2d0ba9SAndreas Gohr                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
1451caeb00aSHarry Fuecks            } else {
146*b1f206e1SAndreas Gohr                $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry];
1471caeb00aSHarry Fuecks                $pages[] = $page;
1481caeb00aSHarry Fuecks            }
1491caeb00aSHarry Fuecks        }
1501caeb00aSHarry Fuecks        closedir($dh);
1511caeb00aSHarry Fuecks        return $pages;
1521caeb00aSHarry Fuecks    }
1531caeb00aSHarry Fuecks
15442ea7f44SGerrit Uitslag    /**
15564cebf71SAndreas Gohr     * Parse instructions and add the non-existing links to the result array
15642ea7f44SGerrit Uitslag     *
15742ea7f44SGerrit Uitslag     * @param array $page array with page id and file path
15842ea7f44SGerrit Uitslag     */
1592b2d0ba9SAndreas Gohr    protected function internalLinks($page) {
1601caeb00aSHarry Fuecks        global $conf;
1611caeb00aSHarry Fuecks        $instructions = p_get_instructions(file_get_contents($page['file']));
1628c6be208SAndreas Gohr        $resolver = new PageResolver($page['id']);
163ffe3602cSMyron Turner        $pid = $page['id'];
1641caeb00aSHarry Fuecks        foreach($instructions as $ins) {
1651caeb00aSHarry Fuecks            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
1668c6be208SAndreas Gohr                $mid = $resolver->resolveId($ins[1][0]);
1678c6be208SAndreas Gohr                if(!page_exists($mid)) {
168*b1f206e1SAndreas Gohr                    [$mid] = explode('#', $mid); //record pages without hashes
16964cebf71SAndreas Gohr
17064cebf71SAndreas Gohr                    if($this->sort == 'origin') {
17164cebf71SAndreas Gohr                        $this->result[$pid][] = $mid;
17264cebf71SAndreas Gohr                    } else {
17364cebf71SAndreas Gohr                        $this->result[$mid][] = $pid;
174d63d2c63SMyron Turner                    }
1751caeb00aSHarry Fuecks                }
1761caeb00aSHarry Fuecks            }
17764cebf71SAndreas Gohr        }
1781caeb00aSHarry Fuecks    }
1791caeb00aSHarry Fuecks}
1801caeb00aSHarry Fuecks
181e8bb93a5SAndreas Gohr// Main
182e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI();
183e8bb93a5SAndreas Gohr$cli->run();
184