xref: /plugin/combo/cli.php (revision e8b2ff590c848541e718216df3a67061e98c1761)
1<?php
2/**
3 * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved.
4 *
5 * This source code is licensed under the GPL license found in the
6 * COPYING  file in the root directory of this source tree.
7 *
8 * @license  GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html)
9 * @author   ComboStrap <support@combostrap.com>
10 *
11 */
12if (!defined('DOKU_INC')) die();
13
14use ComboStrap\Analytics;
15use ComboStrap\Page;
16use ComboStrap\Sqlite;
17use splitbrain\phpcli\Options;
18
19/**
20 * All dependency are loaded in plugin utility
21 */
22require_once(__DIR__ . '/class/PluginUtility.php');
23
24/**
25 * The memory of the server 128 is not enough
26 */
27ini_set('memory_limit', '256M');
28
29/**
30 * Class cli_plugin_combo
31 *
32 * This is a cli:
33 * https://www.dokuwiki.org/devel:cli_plugins#example
34 *
35 * Usage:
36 *
37 * ```
38 * docker exec -ti $(CONTAINER) /bin/bash
39 * ./bin/plugin.php combo -c
40 * ```
41 * or via the IDE
42 *
43 *
44 * Example:
45 * https://www.dokuwiki.org/tips:grapher
46 *
47 */
48class cli_plugin_combo extends DokuWiki_CLI_Plugin
49{
50    const ANALYTICS = "analytics";
51    const SYNC = "sync";
52
53    /**
54     * register options and arguments
55     * @param Options $options
56     */
57    protected function setup(Options $options)
58    {
59        $options->setHelp(
60            "Manage the analytics database\n\n" .
61            "analytics\n" .
62            "sync"
63        );
64        $options->registerOption('version', 'print version', 'v');
65        $options->registerCommand(self::ANALYTICS, "Update the analytics data");
66        $options->registerOption(
67            'namespaces',
68            "If no namespace is given, the root namespace is assumed.",
69            'n',
70            true
71        );
72        $options->registerOption(
73            'output',
74            "Optional, where to store the analytical data as csv eg. a filename.",
75            'o', 'file');
76        $options->registerOption(
77            'cache',
78            "Optional, returns from the cache if set",
79            'c', false);
80        $options->registerOption(
81            'dry',
82            "Optional, dry-run",
83            'd', false);
84        $options->registerCommand(self::SYNC, "Sync the database");
85
86    }
87
88    /**
89     * The main entry
90     * @param Options $options
91     */
92    protected function main(Options $options)
93    {
94
95        $namespaces = array_map('cleanID', $options->getArgs());
96        if (!count($namespaces)) $namespaces = array(''); //import from top
97
98        $cache = $options->getOpt('cache', false);
99        $depth = $options->getOpt('depth', 0);
100        $cmd = $options->getCmd();
101        if ($cmd == "") {
102            $cmd = self::ANALYTICS;
103        }
104        switch ($cmd) {
105            case self::ANALYTICS:
106                $output = $options->getOpt('output', '');
107                //if ($output == '-') $output = 'php://stdout';
108                $this->updateAnalyticsData($namespaces, $output, $cache, $depth);
109                break;
110            case self::SYNC:
111                $this->syncPages();
112                break;
113            default:
114                throw new \RuntimeException("Combo: Command unknown (" . $cmd . ")");
115        }
116
117
118    }
119
120    /**
121     * @param array $namespaces
122     * @param $output
123     * @param bool $cache
124     * @param int $depth recursion depth. 0 for unlimited
125     */
126    private function updateAnalyticsData($namespaces = array(), $output = null, $cache = false, $depth = 0)
127    {
128
129        $fileHandle = null;
130        if (!empty($output)) {
131            $fileHandle = @fopen($output, 'w');
132            if (!$fileHandle) $this->fatal("Failed to open $output");
133        }
134
135        $pages = $this->findPages($namespaces, $depth);
136
137
138        if (!empty($fileHandle)) {
139            $header = array(
140                'id',
141                'backlinks',
142                'broken_links',
143                'changes',
144                'chars',
145                'external_links',
146                'external_medias',
147                'h1',
148                'h2',
149                'h3',
150                'h4',
151                'h5',
152                'internal_links',
153                'internal_medias',
154                'words',
155                'score'
156            );
157            fwrite($fileHandle, implode(",", $header) . PHP_EOL);
158        }
159        $pageCounter = 0;
160        $totalNumberOfPages = sizeof($pages);
161        while ($page = array_shift($pages)) {
162            $id = $page['id'];
163
164            $pageCounter++;
165            echo "Processing the page {$id} ($pageCounter / $totalNumberOfPages)\n";
166
167            $data = Analytics::processAndGetDataAsArray($id, $cache);
168            if (!empty($fileHandle)) {
169                $statistics = $data[Analytics::STATISTICS];
170                $row = array(
171                    'id' => $id,
172                    'backlinks' => $statistics[Analytics::INTERNAL_BACKLINKS_COUNT],
173                    'broken_links' => $statistics[Analytics::INTERNAL_LINKS_BROKEN_COUNT],
174                    'changes' => $statistics[Analytics::EDITS_COUNT],
175                    'chars' => $statistics[Analytics::CHARS_COUNT],
176                    'external_links' => $statistics[Analytics::EXTERNAL_LINKS_COUNT],
177                    'external_medias' => $statistics[Analytics::EXTERNAL_MEDIAS_COUNT],
178                    Analytics::H1 => $statistics[Analytics::HEADERS_COUNT][Analytics::H1],
179                    'h2' => $statistics[Analytics::HEADERS_COUNT]['h2'],
180                    'h3' => $statistics[Analytics::HEADERS_COUNT]['h3'],
181                    'h4' => $statistics[Analytics::HEADERS_COUNT]['h4'],
182                    'h5' => $statistics[Analytics::HEADERS_COUNT]['h5'],
183                    'internal_links' => $statistics[Analytics::INTERNAL_LINKS_COUNT],
184                    'internal_medias' => $statistics[Analytics::INTERNAL_MEDIAS_COUNT],
185                    'words' => $statistics[Analytics::WORDS_COUNT],
186                    'low' => $data[Analytics::QUALITY]['low']
187                );
188                fwrite($fileHandle, implode(",", $row) . PHP_EOL);
189            }
190        }
191        if (!empty($fileHandle)) {
192            fclose($fileHandle);
193        }
194
195    }
196
197    /**
198     * Find the pages in the tree
199     * @param $namespaces
200     * @param $depth
201     * @return array
202     */
203    private function findPages($namespaces = array(), $depth = 0)
204    {
205        // Run as admin to overcome the fact that
206        // anonymous user cannot set all links and backlinnks
207
208
209        global $conf;
210        $datadir = $conf['datadir'];
211
212        /**
213         * Run as admin to overcome the fact that
214         * anonymous user cannot see all links and backlinnks
215         */
216        global $USERINFO;
217        $USERINFO['grps'] = array('admin');
218        global $INPUT;
219        $INPUT->server->set('REMOTE_USER', "cli");
220
221        $pages = array();
222        foreach ($namespaces as $ns) {
223
224            search(
225                $pages,
226                $datadir,
227                'search_universal',
228                array(
229                    'depth' => $depth,
230                    'listfiles' => true,
231                    'listdirs' => false,
232                    'pagesonly' => true,
233                    'skipacl' => true,
234                    'firsthead' => false,
235                    'meta' => false,
236                ),
237                str_replace(':', '/', $ns)
238            );
239
240            // add the ns start page
241            if ($ns && page_exists($ns)) {
242                $pages[] = array(
243                    'id' => $ns,
244                    'ns' => getNS($ns),
245                    'title' => p_get_first_heading($ns, false),
246                    'size' => filesize(wikiFN($ns)),
247                    'mtime' => filemtime(wikiFN($ns)),
248                    'perm' => 16,
249                    'type' => 'f',
250                    'level' => 0,
251                    'open' => 1,
252                );
253            }
254
255        }
256        return $pages;
257    }
258
259    private function syncPages()
260    {
261        $sqlite = Sqlite::getSqlite();
262        $res = $sqlite->query("select ID from pages");
263        if (!$res) {
264            throw new \RuntimeException("An exception has occurred with the alias selection query");
265        }
266        $res2arr = $sqlite->res2arr($res);
267        $sqlite->res_close($res);
268        foreach ($res2arr as $row) {
269            $id = $row['ID'];
270            if (!page_exists($id)) {
271                echo 'Page does not exist on the file system. Deleted from the database (' . $id . ")\n";
272                Page::createPageFromId($id)->deleteInDb();
273            }
274        }
275
276
277    }
278}
279