xref: /dokuwiki/bin/wantedpages.php (revision e8bb93a50f98b9389d6bdca6124744208cde7728)
150b78159SElan Ruusamäe#!/usr/bin/php
21caeb00aSHarry Fuecks<?php
31caeb00aSHarry Fuecksif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
4*e8bb93a5SAndreas Gohrdefine('NOSESSION', 1);
5*e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php');
61caeb00aSHarry Fuecks
7*e8bb93a5SAndreas Gohrclass WantedPagesCLI extends DokuCLI {
81caeb00aSHarry Fuecks
9*e8bb93a5SAndreas Gohr    const DIR_CONTINUE = 1;
10*e8bb93a5SAndreas Gohr    const DIR_NS       = 2;
11*e8bb93a5SAndreas Gohr    const DIR_PAGE     = 3;
121caeb00aSHarry Fuecks
13*e8bb93a5SAndreas Gohr    /**
14*e8bb93a5SAndreas Gohr     * Register options and arguments on the given $options object
15*e8bb93a5SAndreas Gohr     *
16*e8bb93a5SAndreas Gohr     * @param DokuCLI_Options $options
17*e8bb93a5SAndreas Gohr     * @return void
18*e8bb93a5SAndreas Gohr     */
19*e8bb93a5SAndreas Gohr    protected function setup(DokuCLI_Options $options) {
20*e8bb93a5SAndreas Gohr        $options->setHelp(
21*e8bb93a5SAndreas Gohr                'Outputs a list of wanted pages (pages which have internal links but do not yet exist).'
22*e8bb93a5SAndreas Gohr        );
23*e8bb93a5SAndreas Gohr        $options->registerArgument(
24*e8bb93a5SAndreas Gohr                'namespace',
25*e8bb93a5SAndreas Gohr                'The namespace to lookup. Defaults to root namespace',
26*e8bb93a5SAndreas Gohr                false
27*e8bb93a5SAndreas Gohr        );
281caeb00aSHarry Fuecks    }
291caeb00aSHarry Fuecks
30*e8bb93a5SAndreas Gohr    /**
31*e8bb93a5SAndreas Gohr     * Your main program
32*e8bb93a5SAndreas Gohr     *
33*e8bb93a5SAndreas Gohr     * Arguments and options have been parsed when this is run
34*e8bb93a5SAndreas Gohr     *
35*e8bb93a5SAndreas Gohr     * @param DokuCLI_Options $options
36*e8bb93a5SAndreas Gohr     * @return void
37*e8bb93a5SAndreas Gohr     */
38*e8bb93a5SAndreas Gohr    protected function main(DokuCLI_Options $options) {
391caeb00aSHarry Fuecks
40*e8bb93a5SAndreas Gohr        if($options->args) {
41*e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN($options->args[0] . ':xxx'));
42*e8bb93a5SAndreas Gohr        } else {
43*e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN('xxx'));
44*e8bb93a5SAndreas Gohr        }
45*e8bb93a5SAndreas Gohr
46*e8bb93a5SAndreas Gohr        $this->info("searching $startdir");
47*e8bb93a5SAndreas Gohr
48*e8bb93a5SAndreas Gohr        $wanted_pages = array();
49*e8bb93a5SAndreas Gohr
50*e8bb93a5SAndreas Gohr        foreach($this->get_pages($startdir) as $page) {
51*e8bb93a5SAndreas Gohr            $wanted_pages = array_merge($wanted_pages, $this->internal_links($page));
52*e8bb93a5SAndreas Gohr        }
53*e8bb93a5SAndreas Gohr        $wanted_pages = array_unique($wanted_pages);
54*e8bb93a5SAndreas Gohr        sort($wanted_pages);
55*e8bb93a5SAndreas Gohr
56*e8bb93a5SAndreas Gohr        foreach($wanted_pages as $page) {
57*e8bb93a5SAndreas Gohr            print $page . "\n";
58*e8bb93a5SAndreas Gohr        }
59*e8bb93a5SAndreas Gohr    }
60*e8bb93a5SAndreas Gohr
61*e8bb93a5SAndreas Gohr    protected function dir_filter($entry, $basepath) {
621caeb00aSHarry Fuecks        if($entry == '.' || $entry == '..') {
63*e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_CONTINUE;
641caeb00aSHarry Fuecks        }
651caeb00aSHarry Fuecks        if(is_dir($basepath . '/' . $entry)) {
661caeb00aSHarry Fuecks            if(strpos($entry, '_') === 0) {
67*e8bb93a5SAndreas Gohr                return WantedPagesCLI::DIR_CONTINUE;
681caeb00aSHarry Fuecks            }
69*e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_NS;
701caeb00aSHarry Fuecks        }
711caeb00aSHarry Fuecks        if(preg_match('/\.txt$/', $entry)) {
72*e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_PAGE;
731caeb00aSHarry Fuecks        }
74*e8bb93a5SAndreas Gohr        return WantedPagesCLI::DIR_CONTINUE;
751caeb00aSHarry Fuecks    }
761caeb00aSHarry Fuecks
77*e8bb93a5SAndreas Gohr    protected function get_pages($dir) {
780ea51e63SMatt Perry        static $trunclen = null;
791caeb00aSHarry Fuecks        if(!$trunclen) {
801caeb00aSHarry Fuecks            global $conf;
811caeb00aSHarry Fuecks            $trunclen = strlen($conf['datadir'] . ':');
821caeb00aSHarry Fuecks        }
831caeb00aSHarry Fuecks
841caeb00aSHarry Fuecks        if(!is_dir($dir)) {
85*e8bb93a5SAndreas Gohr            throw new DokuCLI_Exception("Unable to read directory $dir");
861caeb00aSHarry Fuecks        }
871caeb00aSHarry Fuecks
881caeb00aSHarry Fuecks        $pages = array();
891caeb00aSHarry Fuecks        $dh    = opendir($dir);
9044881bd0Shenning.noren        while(false !== ($entry = readdir($dh))) {
91*e8bb93a5SAndreas Gohr            $status = $this->dir_filter($entry, $dir);
92*e8bb93a5SAndreas Gohr            if($status == WantedPagesCLI::DIR_CONTINUE) {
931caeb00aSHarry Fuecks                continue;
94*e8bb93a5SAndreas Gohr            } else if($status == WantedPagesCLI::DIR_NS) {
95*e8bb93a5SAndreas Gohr                $pages = array_merge($pages, $this->get_pages($dir . '/' . $entry));
961caeb00aSHarry Fuecks            } else {
971caeb00aSHarry Fuecks                $page    = array(
98840583dcSAndreas Gohr                    'id'   => pathID(substr($dir . '/' . $entry, $trunclen)),
991caeb00aSHarry Fuecks                    'file' => $dir . '/' . $entry,
1001caeb00aSHarry Fuecks                );
1011caeb00aSHarry Fuecks                $pages[] = $page;
1021caeb00aSHarry Fuecks            }
1031caeb00aSHarry Fuecks        }
1041caeb00aSHarry Fuecks        closedir($dh);
1051caeb00aSHarry Fuecks        return $pages;
1061caeb00aSHarry Fuecks    }
1071caeb00aSHarry Fuecks
108*e8bb93a5SAndreas Gohr    function internal_links($page) {
1091caeb00aSHarry Fuecks        global $conf;
1101caeb00aSHarry Fuecks        $instructions = p_get_instructions(file_get_contents($page['file']));
1111caeb00aSHarry Fuecks        $links        = array();
1121caeb00aSHarry Fuecks        $cns          = getNS($page['id']);
11344881bd0Shenning.noren        $exists       = false;
1141caeb00aSHarry Fuecks        foreach($instructions as $ins) {
1151caeb00aSHarry Fuecks            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
1161caeb00aSHarry Fuecks                $mid = $ins[1][0];
1171caeb00aSHarry Fuecks                resolve_pageid($cns, $mid, $exists);
1181caeb00aSHarry Fuecks                if(!$exists) {
119840583dcSAndreas Gohr                    list($mid) = explode('#', $mid); //record pages without hashs
1201caeb00aSHarry Fuecks                    $links[] = $mid;
1211caeb00aSHarry Fuecks                }
1221caeb00aSHarry Fuecks            }
1231caeb00aSHarry Fuecks        }
1241caeb00aSHarry Fuecks        return $links;
1251caeb00aSHarry Fuecks    }
1261caeb00aSHarry Fuecks}
1271caeb00aSHarry Fuecks
128*e8bb93a5SAndreas Gohr// Main
129*e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI();
130*e8bb93a5SAndreas Gohr$cli->run();