xref: /dokuwiki/bin/wantedpages.php (revision ffe3602c5b571dceb438bc1041c20f8f35a07d07)
150b78159SElan Ruusamäe#!/usr/bin/php
21caeb00aSHarry Fuecks<?php
31caeb00aSHarry Fuecksif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/');
4e8bb93a5SAndreas Gohrdefine('NOSESSION', 1);
5e8bb93a5SAndreas Gohrrequire_once(DOKU_INC.'inc/init.php');
61caeb00aSHarry Fuecks
71c36b3d8SAndreas Gohr/**
81c36b3d8SAndreas Gohr * Find wanted pages
91c36b3d8SAndreas Gohr */
10e8bb93a5SAndreas Gohrclass WantedPagesCLI extends DokuCLI {
111caeb00aSHarry Fuecks
12e8bb93a5SAndreas Gohr    const DIR_CONTINUE = 1;
13e8bb93a5SAndreas Gohr    const DIR_NS       = 2;
14e8bb93a5SAndreas Gohr    const DIR_PAGE     = 3;
151caeb00aSHarry Fuecks
16e8bb93a5SAndreas Gohr    /**
17e8bb93a5SAndreas Gohr     * Register options and arguments on the given $options object
18e8bb93a5SAndreas Gohr     *
19e8bb93a5SAndreas Gohr     * @param DokuCLI_Options $options
20e8bb93a5SAndreas Gohr     * @return void
21e8bb93a5SAndreas Gohr     */
22e8bb93a5SAndreas Gohr    protected function setup(DokuCLI_Options $options) {
23e8bb93a5SAndreas Gohr        $options->setHelp(
24e8bb93a5SAndreas Gohr            'Outputs a list of wanted pages (pages which have internal links but do not yet exist).'
25e8bb93a5SAndreas Gohr        );
26e8bb93a5SAndreas Gohr        $options->registerArgument(
27e8bb93a5SAndreas Gohr            'namespace',
28e8bb93a5SAndreas Gohr            'The namespace to lookup. Defaults to root namespace',
29e8bb93a5SAndreas Gohr            false
30e8bb93a5SAndreas Gohr        );
311caeb00aSHarry Fuecks    }
321caeb00aSHarry Fuecks
33e8bb93a5SAndreas Gohr    /**
34e8bb93a5SAndreas Gohr     * Your main program
35e8bb93a5SAndreas Gohr     *
36e8bb93a5SAndreas Gohr     * Arguments and options have been parsed when this is run
37e8bb93a5SAndreas Gohr     *
38e8bb93a5SAndreas Gohr     * @param DokuCLI_Options $options
39e8bb93a5SAndreas Gohr     * @return void
40e8bb93a5SAndreas Gohr     */
41e8bb93a5SAndreas Gohr    protected function main(DokuCLI_Options $options) {
421caeb00aSHarry Fuecks
43e8bb93a5SAndreas Gohr        if($options->args) {
44e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN($options->args[0].':xxx'));
45e8bb93a5SAndreas Gohr        } else {
46e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN('xxx'));
47e8bb93a5SAndreas Gohr        }
48e8bb93a5SAndreas Gohr
49e8bb93a5SAndreas Gohr        $this->info("searching $startdir");
50e8bb93a5SAndreas Gohr
51e8bb93a5SAndreas Gohr        $wanted_pages = array();
52e8bb93a5SAndreas Gohr
53e8bb93a5SAndreas Gohr        foreach($this->get_pages($startdir) as $page) {
54e8bb93a5SAndreas Gohr            $wanted_pages = array_merge($wanted_pages, $this->internal_links($page));
55e8bb93a5SAndreas Gohr        }
56e8bb93a5SAndreas Gohr        $wanted_pages = array_unique($wanted_pages);
57e8bb93a5SAndreas Gohr        sort($wanted_pages);
58e8bb93a5SAndreas Gohr
59e8bb93a5SAndreas Gohr        foreach($wanted_pages as $page) {
60e8bb93a5SAndreas Gohr            print $page."\n";
61e8bb93a5SAndreas Gohr        }
62e8bb93a5SAndreas Gohr    }
63e8bb93a5SAndreas Gohr
6442ea7f44SGerrit Uitslag    /**
6542ea7f44SGerrit Uitslag     * Determine directions of the search loop
6642ea7f44SGerrit Uitslag     *
6742ea7f44SGerrit Uitslag     * @param string $entry
6842ea7f44SGerrit Uitslag     * @param string $basepath
6942ea7f44SGerrit Uitslag     * @return int
7042ea7f44SGerrit Uitslag     */
71e8bb93a5SAndreas Gohr    protected function dir_filter($entry, $basepath) {
721caeb00aSHarry Fuecks        if($entry == '.' || $entry == '..') {
73e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_CONTINUE;
741caeb00aSHarry Fuecks        }
751caeb00aSHarry Fuecks        if(is_dir($basepath.'/'.$entry)) {
761caeb00aSHarry Fuecks            if(strpos($entry, '_') === 0) {
77e8bb93a5SAndreas Gohr                return WantedPagesCLI::DIR_CONTINUE;
781caeb00aSHarry Fuecks            }
79e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_NS;
801caeb00aSHarry Fuecks        }
811caeb00aSHarry Fuecks        if(preg_match('/\.txt$/', $entry)) {
82e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_PAGE;
831caeb00aSHarry Fuecks        }
84e8bb93a5SAndreas Gohr        return WantedPagesCLI::DIR_CONTINUE;
851caeb00aSHarry Fuecks    }
861caeb00aSHarry Fuecks
8742ea7f44SGerrit Uitslag    /**
8842ea7f44SGerrit Uitslag     * Collects recursively the pages in a namespace
8942ea7f44SGerrit Uitslag     *
9042ea7f44SGerrit Uitslag     * @param string $dir
9142ea7f44SGerrit Uitslag     * @return array
9242ea7f44SGerrit Uitslag     * @throws DokuCLI_Exception
9342ea7f44SGerrit Uitslag     */
94e8bb93a5SAndreas Gohr    protected function get_pages($dir) {
950ea51e63SMatt Perry        static $trunclen = null;
961caeb00aSHarry Fuecks        if(!$trunclen) {
971caeb00aSHarry Fuecks            global $conf;
981caeb00aSHarry Fuecks            $trunclen = strlen($conf['datadir'].':');
991caeb00aSHarry Fuecks        }
1001caeb00aSHarry Fuecks
1011caeb00aSHarry Fuecks        if(!is_dir($dir)) {
102e8bb93a5SAndreas Gohr            throw new DokuCLI_Exception("Unable to read directory $dir");
1031caeb00aSHarry Fuecks        }
1041caeb00aSHarry Fuecks
1051caeb00aSHarry Fuecks        $pages = array();
1061caeb00aSHarry Fuecks        $dh    = opendir($dir);
10744881bd0Shenning.noren        while(false !== ($entry = readdir($dh))) {
108e8bb93a5SAndreas Gohr            $status = $this->dir_filter($entry, $dir);
109e8bb93a5SAndreas Gohr            if($status == WantedPagesCLI::DIR_CONTINUE) {
1101caeb00aSHarry Fuecks                continue;
111e8bb93a5SAndreas Gohr            } else if($status == WantedPagesCLI::DIR_NS) {
112e8bb93a5SAndreas Gohr                $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry));
1131caeb00aSHarry Fuecks            } else {
1141caeb00aSHarry Fuecks                $page    = array(
115840583dcSAndreas Gohr                    'id'   => pathID(substr($dir.'/'.$entry, $trunclen)),
1161caeb00aSHarry Fuecks                    'file' => $dir.'/'.$entry,
1171caeb00aSHarry Fuecks                );
1181caeb00aSHarry Fuecks                $pages[] = $page;
1191caeb00aSHarry Fuecks            }
1201caeb00aSHarry Fuecks        }
1211caeb00aSHarry Fuecks        closedir($dh);
1221caeb00aSHarry Fuecks        return $pages;
1231caeb00aSHarry Fuecks    }
1241caeb00aSHarry Fuecks
12542ea7f44SGerrit Uitslag    /**
12642ea7f44SGerrit Uitslag     * Parse instructions and returns the non-existing links
12742ea7f44SGerrit Uitslag     *
12842ea7f44SGerrit Uitslag     * @param array $page array with page id and file path
12942ea7f44SGerrit Uitslag     * @return array
13042ea7f44SGerrit Uitslag     */
131e8bb93a5SAndreas Gohr    function internal_links($page) {
1321caeb00aSHarry Fuecks        global $conf;
1331caeb00aSHarry Fuecks        $instructions = p_get_instructions(file_get_contents($page['file']));
1341caeb00aSHarry Fuecks        $links        = array();
1351caeb00aSHarry Fuecks        $cns          = getNS($page['id']);
13644881bd0Shenning.noren        $exists       = false;
137*ffe3602cSMyron Turner        $pid = $page['id'];
1381caeb00aSHarry Fuecks        foreach($instructions as $ins) {
1391caeb00aSHarry Fuecks            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
1401caeb00aSHarry Fuecks                $mid = $ins[1][0];
1411caeb00aSHarry Fuecks                resolve_pageid($cns, $mid, $exists);
1421caeb00aSHarry Fuecks                if(!$exists) {
143840583dcSAndreas Gohr                    list($mid) = explode('#', $mid); //record pages without hashs
144*ffe3602cSMyron Turner                    $links[] = "$pid => $mid";
1451caeb00aSHarry Fuecks                }
1461caeb00aSHarry Fuecks            }
1471caeb00aSHarry Fuecks        }
1481caeb00aSHarry Fuecks        return $links;
1491caeb00aSHarry Fuecks    }
1501caeb00aSHarry Fuecks}
1511caeb00aSHarry Fuecks
152e8bb93a5SAndreas Gohr// Main
153e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI();
154e8bb93a5SAndreas Gohr$cli->run();