xref: /plugin/combo/cli.php (revision e8b2ff590c848541e718216df3a67061e98c1761)
1007225e5Sgerardnico<?php
2007225e5Sgerardnico/**
3007225e5Sgerardnico * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved.
4007225e5Sgerardnico *
5007225e5Sgerardnico * This source code is licensed under the GPL license found in the
6007225e5Sgerardnico * COPYING  file in the root directory of this source tree.
7007225e5Sgerardnico *
8007225e5Sgerardnico * @license  GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html)
9007225e5Sgerardnico * @author   ComboStrap <support@combostrap.com>
10007225e5Sgerardnico *
11007225e5Sgerardnico */
12007225e5Sgerardnicoif (!defined('DOKU_INC')) die();
13007225e5Sgerardnico
14007225e5Sgerardnicouse ComboStrap\Analytics;
1571f916b9Sgerardnicouse ComboStrap\Page;
1671f916b9Sgerardnicouse ComboStrap\Sqlite;
17007225e5Sgerardnicouse splitbrain\phpcli\Options;
18007225e5Sgerardnico
19*e8b2ff59SNickeau/**
20*e8b2ff59SNickeau * All dependency are loaded in plugin utility
21*e8b2ff59SNickeau */
22*e8b2ff59SNickeaurequire_once(__DIR__ . '/class/PluginUtility.php');
23007225e5Sgerardnico
24007225e5Sgerardnico/**
25007225e5Sgerardnico * The memory of the server 128 is not enough
26007225e5Sgerardnico */
27007225e5Sgerardnicoini_set('memory_limit', '256M');
28007225e5Sgerardnico
29007225e5Sgerardnico/**
30007225e5Sgerardnico * Class cli_plugin_combo
31007225e5Sgerardnico *
32007225e5Sgerardnico * This is a cli:
33007225e5Sgerardnico * https://www.dokuwiki.org/devel:cli_plugins#example
34007225e5Sgerardnico *
35007225e5Sgerardnico * Usage:
36007225e5Sgerardnico *
37007225e5Sgerardnico * ```
38007225e5Sgerardnico * docker exec -ti $(CONTAINER) /bin/bash
39007225e5Sgerardnico * ./bin/plugin.php combo -c
40007225e5Sgerardnico * ```
41007225e5Sgerardnico * or via the IDE
42007225e5Sgerardnico *
43007225e5Sgerardnico *
44007225e5Sgerardnico * Example:
45007225e5Sgerardnico * https://www.dokuwiki.org/tips:grapher
46007225e5Sgerardnico *
47007225e5Sgerardnico */
48007225e5Sgerardnicoclass cli_plugin_combo extends DokuWiki_CLI_Plugin
49007225e5Sgerardnico{
5071f916b9Sgerardnico    const ANALYTICS = "analytics";
5171f916b9Sgerardnico    const SYNC = "sync";
52007225e5Sgerardnico
53007225e5Sgerardnico    /**
54007225e5Sgerardnico     * register options and arguments
55007225e5Sgerardnico     * @param Options $options
56007225e5Sgerardnico     */
57007225e5Sgerardnico    protected function setup(Options $options)
58007225e5Sgerardnico    {
5971f916b9Sgerardnico        $options->setHelp(
6071f916b9Sgerardnico            "Manage the analytics database\n\n" .
6171f916b9Sgerardnico            "analytics\n" .
6271f916b9Sgerardnico            "sync"
6371f916b9Sgerardnico        );
64007225e5Sgerardnico        $options->registerOption('version', 'print version', 'v');
6571f916b9Sgerardnico        $options->registerCommand(self::ANALYTICS, "Update the analytics data");
6671f916b9Sgerardnico        $options->registerOption(
67007225e5Sgerardnico            'namespaces',
68007225e5Sgerardnico            "If no namespace is given, the root namespace is assumed.",
6971f916b9Sgerardnico            'n',
7071f916b9Sgerardnico            true
7171f916b9Sgerardnico        );
72007225e5Sgerardnico        $options->registerOption(
73007225e5Sgerardnico            'output',
74007225e5Sgerardnico            "Optional, where to store the analytical data as csv eg. a filename.",
75007225e5Sgerardnico            'o', 'file');
76007225e5Sgerardnico        $options->registerOption(
77007225e5Sgerardnico            'cache',
78007225e5Sgerardnico            "Optional, returns from the cache if set",
79007225e5Sgerardnico            'c', false);
8071f916b9Sgerardnico        $options->registerOption(
8171f916b9Sgerardnico            'dry',
8271f916b9Sgerardnico            "Optional, dry-run",
8371f916b9Sgerardnico            'd', false);
8471f916b9Sgerardnico        $options->registerCommand(self::SYNC, "Sync the database");
85007225e5Sgerardnico
86007225e5Sgerardnico    }
87007225e5Sgerardnico
88007225e5Sgerardnico    /**
89007225e5Sgerardnico     * The main entry
90007225e5Sgerardnico     * @param Options $options
91007225e5Sgerardnico     */
92007225e5Sgerardnico    protected function main(Options $options)
93007225e5Sgerardnico    {
94007225e5Sgerardnico
95007225e5Sgerardnico        $namespaces = array_map('cleanID', $options->getArgs());
96007225e5Sgerardnico        if (!count($namespaces)) $namespaces = array(''); //import from top
97007225e5Sgerardnico
9871f916b9Sgerardnico        $cache = $options->getOpt('cache', false);
9971f916b9Sgerardnico        $depth = $options->getOpt('depth', 0);
10021913ab3SNickeau        $cmd = $options->getCmd();
10121913ab3SNickeau        if ($cmd == "") {
10221913ab3SNickeau            $cmd = self::ANALYTICS;
10321913ab3SNickeau        }
10421913ab3SNickeau        switch ($cmd) {
10571f916b9Sgerardnico            case self::ANALYTICS:
106007225e5Sgerardnico                $output = $options->getOpt('output', '');
107007225e5Sgerardnico                //if ($output == '-') $output = 'php://stdout';
10871f916b9Sgerardnico                $this->updateAnalyticsData($namespaces, $output, $cache, $depth);
10971f916b9Sgerardnico                break;
11071f916b9Sgerardnico            case self::SYNC:
11171f916b9Sgerardnico                $this->syncPages();
11271f916b9Sgerardnico                break;
11371f916b9Sgerardnico            default:
11421913ab3SNickeau                throw new \RuntimeException("Combo: Command unknown (" . $cmd . ")");
11571f916b9Sgerardnico        }
116007225e5Sgerardnico
117007225e5Sgerardnico
118007225e5Sgerardnico    }
119007225e5Sgerardnico
120007225e5Sgerardnico    /**
12171f916b9Sgerardnico     * @param array $namespaces
122007225e5Sgerardnico     * @param $output
123007225e5Sgerardnico     * @param bool $cache
124007225e5Sgerardnico     * @param int $depth recursion depth. 0 for unlimited
125007225e5Sgerardnico     */
12671f916b9Sgerardnico    private function updateAnalyticsData($namespaces = array(), $output = null, $cache = false, $depth = 0)
127007225e5Sgerardnico    {
128007225e5Sgerardnico
129007225e5Sgerardnico        $fileHandle = null;
130007225e5Sgerardnico        if (!empty($output)) {
131007225e5Sgerardnico            $fileHandle = @fopen($output, 'w');
132007225e5Sgerardnico            if (!$fileHandle) $this->fatal("Failed to open $output");
133007225e5Sgerardnico        }
134007225e5Sgerardnico
13571f916b9Sgerardnico        $pages = $this->findPages($namespaces, $depth);
136007225e5Sgerardnico
137007225e5Sgerardnico
138007225e5Sgerardnico        if (!empty($fileHandle)) {
139007225e5Sgerardnico            $header = array(
140007225e5Sgerardnico                'id',
141007225e5Sgerardnico                'backlinks',
142007225e5Sgerardnico                'broken_links',
143007225e5Sgerardnico                'changes',
144007225e5Sgerardnico                'chars',
145007225e5Sgerardnico                'external_links',
146007225e5Sgerardnico                'external_medias',
147007225e5Sgerardnico                'h1',
148007225e5Sgerardnico                'h2',
149007225e5Sgerardnico                'h3',
150007225e5Sgerardnico                'h4',
151007225e5Sgerardnico                'h5',
152007225e5Sgerardnico                'internal_links',
153007225e5Sgerardnico                'internal_medias',
154007225e5Sgerardnico                'words',
155007225e5Sgerardnico                'score'
156007225e5Sgerardnico            );
157007225e5Sgerardnico            fwrite($fileHandle, implode(",", $header) . PHP_EOL);
158007225e5Sgerardnico        }
1599da76789Sgerardnico        $pageCounter = 0;
160*e8b2ff59SNickeau        $totalNumberOfPages = sizeof($pages);
161007225e5Sgerardnico        while ($page = array_shift($pages)) {
162007225e5Sgerardnico            $id = $page['id'];
163007225e5Sgerardnico
1649da76789Sgerardnico            $pageCounter++;
165*e8b2ff59SNickeau            echo "Processing the page {$id} ($pageCounter / $totalNumberOfPages)\n";
166007225e5Sgerardnico
1671c5862d3Sgerardnico            $data = Analytics::processAndGetDataAsArray($id, $cache);
168007225e5Sgerardnico            if (!empty($fileHandle)) {
169007225e5Sgerardnico                $statistics = $data[Analytics::STATISTICS];
170007225e5Sgerardnico                $row = array(
171007225e5Sgerardnico                    'id' => $id,
172007225e5Sgerardnico                    'backlinks' => $statistics[Analytics::INTERNAL_BACKLINKS_COUNT],
173007225e5Sgerardnico                    'broken_links' => $statistics[Analytics::INTERNAL_LINKS_BROKEN_COUNT],
174007225e5Sgerardnico                    'changes' => $statistics[Analytics::EDITS_COUNT],
175007225e5Sgerardnico                    'chars' => $statistics[Analytics::CHARS_COUNT],
176007225e5Sgerardnico                    'external_links' => $statistics[Analytics::EXTERNAL_LINKS_COUNT],
177*e8b2ff59SNickeau                    'external_medias' => $statistics[Analytics::EXTERNAL_MEDIAS_COUNT],
178531e725cSNickeau                    Analytics::H1 => $statistics[Analytics::HEADERS_COUNT][Analytics::H1],
179007225e5Sgerardnico                    'h2' => $statistics[Analytics::HEADERS_COUNT]['h2'],
180007225e5Sgerardnico                    'h3' => $statistics[Analytics::HEADERS_COUNT]['h3'],
181007225e5Sgerardnico                    'h4' => $statistics[Analytics::HEADERS_COUNT]['h4'],
182007225e5Sgerardnico                    'h5' => $statistics[Analytics::HEADERS_COUNT]['h5'],
183007225e5Sgerardnico                    'internal_links' => $statistics[Analytics::INTERNAL_LINKS_COUNT],
184007225e5Sgerardnico                    'internal_medias' => $statistics[Analytics::INTERNAL_MEDIAS_COUNT],
185007225e5Sgerardnico                    'words' => $statistics[Analytics::WORDS_COUNT],
186007225e5Sgerardnico                    'low' => $data[Analytics::QUALITY]['low']
187007225e5Sgerardnico                );
188007225e5Sgerardnico                fwrite($fileHandle, implode(",", $row) . PHP_EOL);
189007225e5Sgerardnico            }
190007225e5Sgerardnico        }
191007225e5Sgerardnico        if (!empty($fileHandle)) {
192007225e5Sgerardnico            fclose($fileHandle);
193007225e5Sgerardnico        }
194007225e5Sgerardnico
195007225e5Sgerardnico    }
19671f916b9Sgerardnico
19771f916b9Sgerardnico    /**
19871f916b9Sgerardnico     * Find the pages in the tree
19971f916b9Sgerardnico     * @param $namespaces
20071f916b9Sgerardnico     * @param $depth
20171f916b9Sgerardnico     * @return array
20271f916b9Sgerardnico     */
20371f916b9Sgerardnico    private function findPages($namespaces = array(), $depth = 0)
20471f916b9Sgerardnico    {
205325fe0c5Sgerardnico        // Run as admin to overcome the fact that
206325fe0c5Sgerardnico        // anonymous user cannot set all links and backlinnks
207325fe0c5Sgerardnico
208325fe0c5Sgerardnico
20971f916b9Sgerardnico        global $conf;
21071f916b9Sgerardnico        $datadir = $conf['datadir'];
21171f916b9Sgerardnico
212325fe0c5Sgerardnico        /**
213325fe0c5Sgerardnico         * Run as admin to overcome the fact that
214325fe0c5Sgerardnico         * anonymous user cannot see all links and backlinnks
215325fe0c5Sgerardnico         */
216325fe0c5Sgerardnico        global $USERINFO;
217325fe0c5Sgerardnico        $USERINFO['grps'] = array('admin');
218325fe0c5Sgerardnico        global $INPUT;
219325fe0c5Sgerardnico        $INPUT->server->set('REMOTE_USER', "cli");
220325fe0c5Sgerardnico
22171f916b9Sgerardnico        $pages = array();
22271f916b9Sgerardnico        foreach ($namespaces as $ns) {
22371f916b9Sgerardnico
22471f916b9Sgerardnico            search(
22571f916b9Sgerardnico                $pages,
22671f916b9Sgerardnico                $datadir,
22771f916b9Sgerardnico                'search_universal',
22871f916b9Sgerardnico                array(
22971f916b9Sgerardnico                    'depth' => $depth,
23071f916b9Sgerardnico                    'listfiles' => true,
23171f916b9Sgerardnico                    'listdirs' => false,
23271f916b9Sgerardnico                    'pagesonly' => true,
23371f916b9Sgerardnico                    'skipacl' => true,
23471f916b9Sgerardnico                    'firsthead' => false,
23571f916b9Sgerardnico                    'meta' => false,
23671f916b9Sgerardnico                ),
23771f916b9Sgerardnico                str_replace(':', '/', $ns)
23871f916b9Sgerardnico            );
23971f916b9Sgerardnico
24071f916b9Sgerardnico            // add the ns start page
24171f916b9Sgerardnico            if ($ns && page_exists($ns)) {
24271f916b9Sgerardnico                $pages[] = array(
24371f916b9Sgerardnico                    'id' => $ns,
24471f916b9Sgerardnico                    'ns' => getNS($ns),
24571f916b9Sgerardnico                    'title' => p_get_first_heading($ns, false),
24671f916b9Sgerardnico                    'size' => filesize(wikiFN($ns)),
24771f916b9Sgerardnico                    'mtime' => filemtime(wikiFN($ns)),
24871f916b9Sgerardnico                    'perm' => 16,
24971f916b9Sgerardnico                    'type' => 'f',
25071f916b9Sgerardnico                    'level' => 0,
25171f916b9Sgerardnico                    'open' => 1,
25271f916b9Sgerardnico                );
25371f916b9Sgerardnico            }
25471f916b9Sgerardnico
25571f916b9Sgerardnico        }
25671f916b9Sgerardnico        return $pages;
25771f916b9Sgerardnico    }
25871f916b9Sgerardnico
25971f916b9Sgerardnico    private function syncPages()
26071f916b9Sgerardnico    {
26171f916b9Sgerardnico        $sqlite = Sqlite::getSqlite();
26271f916b9Sgerardnico        $res = $sqlite->query("select ID from pages");
26371f916b9Sgerardnico        if (!$res) {
26471f916b9Sgerardnico            throw new \RuntimeException("An exception has occurred with the alias selection query");
26571f916b9Sgerardnico        }
26671f916b9Sgerardnico        $res2arr = $sqlite->res2arr($res);
26771f916b9Sgerardnico        $sqlite->res_close($res);
26871f916b9Sgerardnico        foreach ($res2arr as $row) {
26971f916b9Sgerardnico            $id = $row['ID'];
27071f916b9Sgerardnico            if (!page_exists($id)) {
27171f916b9Sgerardnico                echo 'Page does not exist on the file system. Deleted from the database (' . $id . ")\n";
27285e82846SNickeau                Page::createPageFromId($id)->deleteInDb();
27371f916b9Sgerardnico            }
27471f916b9Sgerardnico        }
27571f916b9Sgerardnico
27671f916b9Sgerardnico
27771f916b9Sgerardnico    }
278007225e5Sgerardnico}
279