1<?php
2/**
3 * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved.
4 *
5 * This source code is licensed under the GPL license found in the
6 * COPYING  file in the root directory of this source tree.
7 *
8 * @license  GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html)
9 * @author   ComboStrap <support@combostrap.com>
10 *
11 */
12
13use ComboStrap\DatabasePageRow;
14use ComboStrap\Event;
15use ComboStrap\ExceptionBadSyntax;
16use ComboStrap\ExceptionCompile;
17use ComboStrap\ExceptionNotExists;
18use ComboStrap\ExceptionNotFound;
19use ComboStrap\ExceptionRuntime;
20use ComboStrap\ExecutionContext;
21use ComboStrap\FsWikiUtility;
22use ComboStrap\LogUtility;
23use ComboStrap\MarkupPath;
24use ComboStrap\Meta\Field\BacklinkCount;
25use ComboStrap\Meta\Field\PageH1;
26use ComboStrap\MetadataFrontmatterStore;
27use ComboStrap\Sqlite;
28use splitbrain\phpcli\Options;
29
30/**
31 * All dependency are loaded
32 */
33require_once(__DIR__ . '/vendor/autoload.php');
34
35/**
36 * The memory of the server 128 is not enough
37 */
38ini_set('memory_limit', '256M');
39
40
41/**
42 * Class cli_plugin_combo
43 *
44 * This is a cli:
45 * https://www.dokuwiki.org/devel:cli_plugins#example
46 *
47 * Usage:
48 *
49 * ```
50 * docker exec -ti $(CONTAINER) /bin/bash
51 * ```
52 * ```
53 * set animal=animal-directory-name
54 * php ./bin/plugin.php combo --help
55 * ```
56 * or via the IDE
57 *
58 *
59 * Example:
60 * https://www.dokuwiki.org/tips:grapher
61 *
62 */
63class cli_plugin_combo extends DokuWiki_CLI_Plugin
64{
65
66    const METADATA_TO_DATABASE = "metadata-to-database";
67    const ANALYTICS = "analytics";
68    const METADATA_TO_FRONTMATTER = "metadata-to-frontmatter";
69    const SYNC = "sync";
70    const PLUGINS_TO_UPDATE = "plugins-to-update";
71    const FORCE_OPTION = 'force';
72    const PORT_OPTION = 'port';
73    const HOST_OPTION = 'host';
74    const CANONICAL = "combo-cli";
75
76
77    /**
78     * register options and arguments
79     * @param Options $options
80     *
81     * Note the animal is set in {@link DokuWikiFarmCore::detectAnimal()}
82     * via the environment variable `animal` that is passed in the $_SERVER variable
83     */
84    protected function setup(Options $options)
85    {
86        $help = <<<EOF
87ComboStrap Administrative Commands
88
89
90Example:
91  * Replicate all pages into the database
92```bash
93php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 :
94# or
95php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 /
96```
97  * Replicate only the page `:namespace:my-page`
98```bash
99php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 :namespace:my-page
100# or
101php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 /namespace/my-page
102```
103
104Animal: If you want to use it for an animal farm, you need to set first the animal directory name in a environment variable
105```bash
106animal=animal-directory-name php ./bin/plugin.php combo
107```
108
109EOF;
110
111        $options->setHelp($help);
112        $options->registerOption('version', 'print version', 'v');
113        $options->registerCommand(self::METADATA_TO_DATABASE, "Replicate the file system metadata into the database");
114        $options->registerCommand(self::ANALYTICS, "Start the analytics and export optionally the data");
115        $options->registerCommand(self::PLUGINS_TO_UPDATE, "List the plugins to update");
116        $options->registerCommand(self::METADATA_TO_FRONTMATTER, "Replicate the file system metadata into the page frontmatter");
117        $options->registerCommand(self::SYNC, "Delete the non-existing pages in the database");
118        $options->registerArgument(
119            'path',
120            "The start path (a page or a directory). For all pages, type the root directory '/'",
121            false
122        );
123        $options->registerOption(
124            'output',
125            "Optional, where to store the analytical data as csv eg. a filename.",
126            'o',
127            true
128        );
129        $options->registerOption(
130            self::HOST_OPTION,
131            "The http host name of your server. This value is used by dokuwiki in the rendering cache key",
132            null,
133            true,
134            self::METADATA_TO_DATABASE
135        );
136        $options->registerOption(
137            self::PORT_OPTION,
138            "The http host port of your server. This value is used by dokuwiki in the rendering cache key",
139            null,
140            true,
141            self::METADATA_TO_DATABASE
142        );
143        $options->registerOption(
144            self::FORCE_OPTION,
145            "Replicate with force",
146            'f',
147            false,
148            self::METADATA_TO_DATABASE
149        );
150        $options->registerOption(
151            'dry',
152            "Optional, dry-run",
153            'd', false);
154
155
156    }
157
158    /**
159     * The main entry
160     * @param Options $options
161     * @throws ExceptionCompile
162     */
163    protected function main(Options $options)
164    {
165
166
167        if (isset($_REQUEST['animal'])) {
168            // on linux
169            echo "Animal detected: " . $_REQUEST['animal'] . "\n";
170        } else {
171            // on windows
172            echo "No Animal detected\n";
173            echo "Conf: " . DOKU_CONF . "\n";
174        }
175
176        $args = $options->getArgs();
177
178
179        $depth = $options->getOpt('depth', 0);
180        $cmd = $options->getCmd();
181
182        try {
183            switch ($cmd) {
184                case self::METADATA_TO_DATABASE:
185                    $startPath = $this->getStartPath($args);
186                    $force = $options->getOpt(self::FORCE_OPTION, false);
187                    $hostOptionValue = $options->getOpt(self::HOST_OPTION, null);
188                    if ($hostOptionValue === null) {
189                        fwrite(STDERR, "The host name is mandatory");
190                        return;
191                    }
192                    $_SERVER['HTTP_HOST'] = $hostOptionValue;
193                    $portOptionName = $options->getOpt(self::PORT_OPTION, null);
194                    if ($portOptionName === null) {
195                        fwrite(STDERR, "The host port is mandatory");
196                        return;
197                    }
198                    $_SERVER['SERVER_PORT'] = $portOptionName;
199                    $this->index($startPath, $force, $depth);
200                    break;
201                case self::METADATA_TO_FRONTMATTER:
202                    $startPath = $this->getStartPath($args);
203                    $this->frontmatter($startPath, $depth);
204                    break;
205                case self::ANALYTICS:
206                    $startPath = $this->getStartPath($args);
207                    $output = $options->getOpt('output', '');
208                    //if ($output == '-') $output = 'php://stdout';
209                    $this->analytics($startPath, $output, $depth);
210                    break;
211                case self::SYNC:
212                    $this->deleteNonExistingPageFromDatabase();
213                    break;
214                case self::PLUGINS_TO_UPDATE:
215                    /**
216                     * Endpoint:
217                     * self::EXTENSION_REPOSITORY_API.'?fmt=php&ext[]='.urlencode($name)
218                     * `http://www.dokuwiki.org/lib/plugins/pluginrepo/api.php?fmt=php&ext[]=`.urlencode($name)
219                     */
220                    $pluginList = plugin_list('', true);
221                    /* @var helper_plugin_extension_extension $extension */
222                    $extension = $this->loadHelper('extension_extension');
223                    foreach ($pluginList as $name) {
224                        $extension->setExtension($name);
225                        if ($extension->updateAvailable()) {
226                            echo "The extension $name should be updated";
227                        }
228                    }
229                    break;
230                default:
231                    if ($cmd !== "") {
232                        fwrite(STDERR, "Combo: Command unknown (" . $cmd . ")");
233                    } else {
234                        echo $options->help();
235                    }
236                    exit(1);
237            }
238        } catch (\Exception $exception) {
239            fwrite(STDERR, "An internal error has occured. " . $exception->getMessage() . "\n" . $exception->getTraceAsString());
240            exit(1);
241        }
242
243
244    }
245
246    /**
247     * @param array $namespaces
248     * @param bool $rebuild
249     * @param int $depth recursion depth. 0 for unlimited
250     * @throws ExceptionCompile
251     */
252    private function index($namespaces = array(), $rebuild = false, $depth = 0)
253    {
254
255        /**
256         * Run as admin to overcome the fact that
257         * anonymous user cannot see all links and backlinks
258         */
259        global $USERINFO;
260        $USERINFO['grps'] = array('admin');
261        global $INPUT;
262        $INPUT->server->set('REMOTE_USER', "cli");
263
264        $pages = FsWikiUtility::getPages($namespaces, $depth);
265
266        $pageCounter = 0;
267        $totalNumberOfPages = sizeof($pages);
268        while ($pageArray = array_shift($pages)) {
269            $id = $pageArray['id'];
270            global $ID;
271            $ID = $id;
272            /**
273             * Indexing the page start the database replication
274             * See {@link action_plugin_combo_indexer}
275             */
276            $pageCounter++;
277            $executionContext = ExecutionContext::getActualOrCreateFromEnv();
278            try {
279                /**
280                 * If the page does not need to be indexed, there is no run
281                 * and false is returned
282                 */
283                $indexedOrNot = idx_addPage($id, true, true);
284                if ($indexedOrNot) {
285                    LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) was indexed and replicated", LogUtility::LVL_MSG_INFO);
286                } else {
287                    LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error", LogUtility::LVL_MSG_ERROR);
288                }
289            } catch (ExceptionRuntime $e) {
290                LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error: " . $e->getMessage(), LogUtility::LVL_MSG_ERROR);
291            } finally {
292                $executionContext->close();
293            }
294        }
295        /**
296         * Process all backlinks
297         */
298        echo "Processing Replication Request\n";
299        Event::dispatchEvent(PHP_INT_MAX);
300
301    }
302
303    private function analytics($namespaces = array(), $output = null, $depth = 0)
304    {
305
306        $fileHandle = null;
307        if (!empty($output)) {
308            $fileHandle = @fopen($output, 'w');
309            if (!$fileHandle) $this->fatal("Failed to open $output");
310        }
311
312        /**
313         * Run as admin to overcome the fact that
314         * anonymous user cannot see all links and backlinks
315         */
316        global $USERINFO;
317        $USERINFO['grps'] = array('admin');
318        global $INPUT;
319        $INPUT->server->set('REMOTE_USER', "cli");
320
321        $pages = FsWikiUtility::getPages($namespaces, $depth);
322
323
324        if (!empty($fileHandle)) {
325            $header = array(
326                'id',
327                'backlinks',
328                'broken_links',
329                'changes',
330                'chars',
331                'external_links',
332                'external_medias',
333                'h1',
334                'h2',
335                'h3',
336                'h4',
337                'h5',
338                'internal_links',
339                'internal_medias',
340                'words',
341                'score'
342            );
343            fwrite($fileHandle, implode(",", $header) . PHP_EOL);
344        }
345        $pageCounter = 0;
346        $totalNumberOfPages = sizeof($pages);
347        while ($pageArray = array_shift($pages)) {
348            $id = $pageArray['id'];
349            $page = MarkupPath::createMarkupFromId($id);
350
351
352            $pageCounter++;
353            /**
354             * Analytics
355             */
356            echo "Analytics Processing for the page {$id} ($pageCounter / $totalNumberOfPages)\n";
357            $executionContext = ExecutionContext::getActualOrCreateFromEnv();
358            try {
359                $analyticsPath = $page->fetchAnalyticsPath();
360            } catch (ExceptionNotExists $e) {
361                LogUtility::error("The analytics document for the page ($page) was not found");
362                continue;
363            } catch (ExceptionCompile $e) {
364                LogUtility::error("Error when get the analytics.", self::CANONICAL, $e);
365                continue;
366            } finally {
367                $executionContext->close();
368            }
369
370            try {
371                $data = \ComboStrap\Json::createFromPath($analyticsPath)->toArray();
372            } catch (ExceptionBadSyntax $e) {
373                LogUtility::error("The analytics json of the page ($page) is not conform");
374                continue;
375            } catch (ExceptionNotFound|ExceptionNotExists $e) {
376                LogUtility::error("The analytics document ({$analyticsPath}) for the page ($page) was not found");
377                continue;
378            }
379
380            if (!empty($fileHandle)) {
381                $statistics = $data[renderer_plugin_combo_analytics::STATISTICS];
382                $row = array(
383                    'id' => $id,
384                    'backlinks' => $statistics[BacklinkCount::getPersistentName()],
385                    'broken_links' => $statistics[renderer_plugin_combo_analytics::INTERNAL_LINK_BROKEN_COUNT],
386                    'changes' => $statistics[renderer_plugin_combo_analytics::EDITS_COUNT],
387                    'chars' => $statistics[renderer_plugin_combo_analytics::CHAR_COUNT],
388                    'external_links' => $statistics[renderer_plugin_combo_analytics::EXTERNAL_LINK_COUNT],
389                    'external_medias' => $statistics[renderer_plugin_combo_analytics::EXTERNAL_MEDIA_COUNT],
390                    PageH1::PROPERTY_NAME => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT][PageH1::PROPERTY_NAME],
391                    'h2' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h2'],
392                    'h3' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h3'],
393                    'h4' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h4'],
394                    'h5' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h5'],
395                    'internal_links' => $statistics[renderer_plugin_combo_analytics::INTERNAL_LINK_COUNT],
396                    'internal_medias' => $statistics[renderer_plugin_combo_analytics::INTERNAL_MEDIA_COUNT],
397                    'words' => $statistics[renderer_plugin_combo_analytics::WORD_COUNT],
398                    'low' => $data[renderer_plugin_combo_analytics::QUALITY]['low']
399                );
400                fwrite($fileHandle, implode(",", $row) . PHP_EOL);
401            }
402
403        }
404        if (!empty($fileHandle)) {
405            fclose($fileHandle);
406        }
407
408    }
409
410
411    /**
412     * @throws \ComboStrap\ExceptionSqliteNotAvailable
413     */
414    private function deleteNonExistingPageFromDatabase()
415    {
416        LogUtility::msg("Starting: Deleting non-existing page from database");
417        $sqlite = Sqlite::createOrGetSqlite();
418        $request = $sqlite
419            ->createRequest()
420            ->setQuery("select id as \"id\" from pages");
421        $rows = [];
422        try {
423            $rows = $request
424                ->execute()
425                ->getRows();
426        } catch (ExceptionCompile $e) {
427            LogUtility::msg("Error while getting the id pages. {$e->getMessage()}");
428            return;
429        } finally {
430            $request->close();
431        }
432        $counter = 0;
433
434        foreach ($rows as $row) {
435            /**
436             * Context
437             * PHP Fatal error:  Allowed memory size of 268435456 bytes exhausted (tried to allocate 20480 bytes)
438             * in /opt/www/datacadamia.com/inc/ErrorHandler.php on line 102
439             */
440            $executionContext = ExecutionContext::getActualOrCreateFromEnv();
441            try {
442                $counter++;
443                $id = $row['id'];
444                if (!page_exists($id)) {
445                    echo 'Page does not exist on the file system. Delete from the database (' . $id . ")\n";
446                    try {
447                        $dbRow = DatabasePageRow::getFromDokuWikiId($id);
448                        $dbRow->delete();
449                    } catch (ExceptionNotFound $e) {
450                        // ok
451                    }
452                }
453            } finally {
454                $executionContext->close();
455            }
456
457        }
458        LogUtility::msg("Sync finished ($counter pages checked)");
459
460    }
461
462    private function frontmatter($namespaces, $depth)
463    {
464        $pages = FsWikiUtility::getPages($namespaces, $depth);
465        $pageCounter = 0;
466        $totalNumberOfPages = sizeof($pages);
467        $pagesWithChanges = [];
468        $pagesWithError = [];
469        $pagesWithOthers = [];
470        $notChangedCounter = 0;
471        while ($pageArray = array_shift($pages)) {
472            $id = $pageArray['id'];
473            global $ID;
474            $ID = $id;
475            $page = MarkupPath::createMarkupFromId($id);
476            $pageCounter++;
477            LogUtility::msg("Processing page {$id} ($pageCounter / $totalNumberOfPages) ", LogUtility::LVL_MSG_INFO);
478            $executionContext = ExecutionContext::getActualOrCreateFromEnv();
479            try {
480                $message = MetadataFrontmatterStore::createFromPage($page)
481                    ->sync();
482                switch ($message->getStatus()) {
483                    case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_NOT_CHANGED:
484                        $notChangedCounter++;
485                        break;
486                    case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_DONE:
487                        $pagesWithChanges[] = $id;
488                        break;
489                    case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_ERROR:
490                        $pagesWithError[$id] = $message->getPlainTextContent();
491                        break;
492                    default:
493                        $pagesWithOthers[$id] = $message->getPlainTextContent();
494                        break;
495
496                }
497            } catch (ExceptionCompile $e) {
498                $pagesWithError[$id] = $e->getMessage();
499            } finally {
500                $executionContext->close();
501            }
502
503        }
504
505        echo "\n";
506        echo "Result:\n";
507        echo "$notChangedCounter pages without any frontmatter modifications\n";
508
509        if (sizeof($pagesWithError) > 0) {
510            echo "\n";
511            echo "The following pages had errors\n";
512            $pageCounter = 0;
513            $totalNumberOfPages = sizeof($pagesWithError);
514            foreach ($pagesWithError as $id => $message) {
515                $pageCounter++;
516                LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages): " . $message, LogUtility::LVL_MSG_ERROR);
517            }
518        } else {
519            echo "No error\n";
520        }
521
522        if (sizeof($pagesWithChanges) > 0) {
523            echo "\n";
524            echo "The following pages had changed:\n";
525            $pageCounter = 0;
526            $totalNumberOfPages = sizeof($pagesWithChanges);
527            foreach ($pagesWithChanges as $id) {
528                $pageCounter++;
529                LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages) ", LogUtility::LVL_MSG_ERROR);
530            }
531        } else {
532            echo "No changes\n";
533        }
534
535        if (sizeof($pagesWithOthers) > 0) {
536            echo "\n";
537            echo "The following pages had an other status";
538            $pageCounter = 0;
539            $totalNumberOfPages = sizeof($pagesWithOthers);
540            foreach ($pagesWithOthers as $id => $message) {
541                $pageCounter++;
542                LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages) " . $message, LogUtility::LVL_MSG_ERROR);
543            }
544        }
545    }
546
547    private function getStartPath($args)
548    {
549        $sizeof = sizeof($args);
550        switch ($sizeof) {
551            case 0:
552                fwrite(STDERR, "The start path is mandatory and was not given");
553                exit(1);
554            case 1:
555                $startPath = $args[0];
556                if (!in_array($startPath, [":", "/"])) {
557                    // cleanId would return blank for a root
558                    $startPath = cleanID($startPath);
559                }
560                break;
561            default:
562                fwrite(STDERR, "Too much arguments given $sizeof");
563                exit(1);
564        }
565        return $startPath;
566    }
567}
568