xref: /plugin/combo/cli.php (revision 328b625d295860658952af40bd37dab2cf646c0b)
1<?php
2/**
3 * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved.
4 *
5 * This source code is licensed under the GPL license found in the
6 * COPYING  file in the root directory of this source tree.
7 *
8 * @license  GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html)
9 * @author   ComboStrap <support@combostrap.com>
10 *
11 */
12
13use ComboStrap\DatabasePageRow;
14use ComboStrap\Event;
15use ComboStrap\ExceptionBadSyntax;
16use ComboStrap\ExceptionCompile;
17use ComboStrap\ExceptionNotExists;
18use ComboStrap\ExceptionNotFound;
19use ComboStrap\ExceptionRuntime;
20use ComboStrap\ExceptionSqliteNotAvailable;
21use ComboStrap\ExecutionContext;
22use ComboStrap\FsWikiUtility;
23use ComboStrap\LogUtility;
24use ComboStrap\MarkupPath;
25use ComboStrap\Meta\Field\BacklinkCount;
26use ComboStrap\Meta\Field\PageH1;
27use ComboStrap\MetadataFrontmatterStore;
28use ComboStrap\Sqlite;
29use splitbrain\phpcli\Options;
30
31/**
32 * All dependency are loaded
33 */
34require_once(__DIR__ . '/vendor/autoload.php');
35
36/**
37 * The memory of the server 128 is not enough
38 */
39ini_set('memory_limit', '256M');
40
41
42/**
43 * Class cli_plugin_combo
44 *
45 * This is a cli:
46 * https://www.dokuwiki.org/devel:cli_plugins#example
47 *
48 * Usage:
49 *
50 * ```
51 * docker exec -ti $(CONTAINER) /bin/bash
52 * ```
53 * ```
54 * set animal=animal-directory-name
55 * php ./bin/plugin.php combo --help
56 * ```
57 * or via the IDE
58 *
59 *
60 * Example:
61 * https://www.dokuwiki.org/tips:grapher
62 *
63 */
64class cli_plugin_combo extends DokuWiki_CLI_Plugin
65{
66
67    const METADATA_TO_DATABASE = "metadata-to-database";
68    const ANALYTICS = "analytics";
69    const METADATA_TO_FRONTMATTER = "metadata-to-frontmatter";
70    const BROKEN_LINKS = "broken-links";
71    const SYNC = "sync";
72    const PLUGINS_TO_UPDATE = "plugins-to-update";
73    const FORCE_OPTION = 'force';
74    const PORT_OPTION = 'port';
75    const HOST_OPTION = 'host';
76    const CANONICAL = "combo-cli";
77
78
79    /**
80     * register options and arguments
81     * @param Options $options
82     *
83     * Note the animal is set in {@link DokuWikiFarmCore::detectAnimal()}
84     * via the environment variable `animal` that is passed in the $_SERVER variable
85     */
86    protected function setup(Options $options)
87    {
88        $help = <<<EOF
89ComboStrap Administrative Commands
90
91
92Example:
93  * Replicate all pages into the database
94```bash
95php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 :
96# or
97php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 /
98```
99  * Replicate only the page `:namespace:my-page`
100```bash
101php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 :namespace:my-page
102# or
103php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 /namespace/my-page
104```
105
106Animal: If you want to use it for an animal farm, you need to set first the animal directory name in a environment variable
107```bash
108animal=animal-directory-name php ./bin/plugin.php combo
109```
110
111EOF;
112
113        /**
114         * Global Options
115         */
116        $options->setHelp($help);
117        $options->registerOption('version', 'print version', 'v');
118        /** @noinspection PhpRedundantOptionalArgumentInspection */
119        $options->registerOption(
120            'dry',
121            "Optional, dry-run",
122            'd', false);
123        $options->registerOption(
124            'output',
125            "Optional, where to store the analytical data as csv eg. a filename.",
126            'o',
127            true
128        );
129
130        /**
131         * Command without options
132         */
133        $options->registerCommand(self::ANALYTICS, "Start the analytics and export optionally the data");
134        $options->registerCommand(self::PLUGINS_TO_UPDATE, "List the plugins to update");
135        $options->registerCommand(self::BROKEN_LINKS, "Output Broken Links");
136
137
138        // Metadata to database command
139        $options->registerCommand(self::METADATA_TO_DATABASE, "Replicate the file system metadata into the database");
140        $options->registerOption(
141            self::HOST_OPTION,
142            "The http host name of your server. This value is used by dokuwiki in the rendering cache key",
143            null,
144            true,
145            self::METADATA_TO_DATABASE
146        );
147        $options->registerOption(
148            self::PORT_OPTION,
149            "The http host port of your server. This value is used by dokuwiki in the rendering cache key",
150            null,
151            true,
152            self::METADATA_TO_DATABASE
153        );
154        $options->registerOption(
155            self::FORCE_OPTION,
156            "Replicate with force",
157            'f',
158            false,
159            self::METADATA_TO_DATABASE
160        );
161        $startPathArgName = 'startPath';
162        $startPathHelpDescription = "The start path (a page or a directory). For all pages, type the root directory '/' or ':'";
163        $options->registerArgument(
164            $startPathArgName,
165            $startPathHelpDescription,
166            true,
167            self::METADATA_TO_DATABASE
168        );
169
170
171        // Metadata Command definition
172        $options->registerCommand(self::METADATA_TO_FRONTMATTER, "Replicate the file system metadata into the page frontmatter");
173        $options->registerArgument(
174            $startPathArgName,
175            $startPathHelpDescription,
176            true,
177            self::METADATA_TO_FRONTMATTER
178        );
179
180        // Sync Command Definition
181        $options->registerCommand(self::SYNC, "Delete the non-existing pages in the database");
182        $options->registerArgument(
183            $startPathArgName,
184            $startPathHelpDescription,
185            true,
186            self::SYNC
187        );
188
189    }
190
191    /**
192     * The main entry
193     * @param Options $options
194     */
195    protected function main(Options $options)
196    {
197
198
199        if (isset($_REQUEST['animal'])) {
200            // on linux
201            echo "Animal detected: " . $_REQUEST['animal'] . "\n";
202        } else {
203            // on windows
204            echo "No Animal detected\n";
205            echo "Conf: " . DOKU_CONF . "\n";
206        }
207
208        $args = $options->getArgs();
209
210
211        $depth = $options->getOpt('depth', 0);
212        $cmd = $options->getCmd();
213
214        try {
215            switch ($cmd) {
216                case self::METADATA_TO_DATABASE:
217                    $startPath = $this->getStartPath($args);
218                    $force = $options->getOpt(self::FORCE_OPTION, false);
219                    $hostOptionValue = $options->getOpt(self::HOST_OPTION, null);
220                    if ($hostOptionValue === null) {
221                        fwrite(STDERR, "The host name is mandatory");
222                        return;
223                    }
224                    $_SERVER['HTTP_HOST'] = $hostOptionValue;
225                    $portOptionName = $options->getOpt(self::PORT_OPTION, null);
226                    if ($portOptionName === null) {
227                        fwrite(STDERR, "The host port is mandatory");
228                        return;
229                    }
230                    $_SERVER['SERVER_PORT'] = $portOptionName;
231                    $this->index($startPath, $force, $depth);
232                    break;
233                case self::METADATA_TO_FRONTMATTER:
234                    $startPath = $this->getStartPath($args);
235                    $this->frontmatter($startPath, $depth);
236                    break;
237                case self::BROKEN_LINKS:
238                    $this->brokenLinks();
239                    break;
240                case self::ANALYTICS:
241                    $startPath = $this->getStartPath($args);
242                    $output = $options->getOpt('output', '');
243                    //if ($output == '-') $output = 'php://stdout';
244                    $this->analytics($startPath, $output, $depth);
245                    break;
246                case self::SYNC:
247                    $this->deleteNonExistingPageFromDatabase();
248                    break;
249                case self::PLUGINS_TO_UPDATE:
250                    $this->pluginToUpdate();
251                    break;
252                default:
253                    if ($cmd !== "") {
254                        fwrite(STDERR, "Combo: Command unknown (" . $cmd . ")");
255                    } else {
256                        echo $options->help();
257                    }
258                    exit(1);
259            }
260        } catch (Exception $exception) {
261            fwrite(STDERR, "An internal error has occurred. " . $exception->getMessage() . "\n" . $exception->getTraceAsString());
262            exit(1);
263        }
264
265
266    }
267
268    /**
269     * @param array $namespaces
270     * @param bool $rebuild
271     * @param int $depth recursion depth. 0 for unlimited
272     * @throws ExceptionCompile
273     */
274    private function index($namespaces = array(), $rebuild = false, $depth = 0)
275    {
276
277        /**
278         * Run as admin to overcome the fact that
279         * anonymous user cannot see all links and backlinks
280         */
281        global $USERINFO;
282        $USERINFO['grps'] = array('admin');
283        global $INPUT;
284        $INPUT->server->set('REMOTE_USER', "cli");
285
286        $pages = FsWikiUtility::getPages($namespaces, $depth);
287
288        $pageCounter = 0;
289        $totalNumberOfPages = sizeof($pages);
290        while ($pageArray = array_shift($pages)) {
291            $id = $pageArray['id'];
292            global $ID;
293            $ID = $id;
294            /**
295             * Indexing the page start the database replication
296             * See {@link action_plugin_combo_indexer}
297             */
298            $pageCounter++;
299            $executionContext = ExecutionContext::getActualOrCreateFromEnv();
300            try {
301                /**
302                 * If the page does not need to be indexed, there is no run
303                 * and false is returned
304                 */
305                $indexedOrNot = idx_addPage($id, true, true);
306                if ($indexedOrNot) {
307                    LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) was indexed and replicated", LogUtility::LVL_MSG_INFO);
308                } else {
309                    LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error", LogUtility::LVL_MSG_ERROR);
310                }
311            } catch (ExceptionRuntime $e) {
312                LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error: " . $e->getMessage(), LogUtility::LVL_MSG_ERROR);
313            } finally {
314                $executionContext->close();
315            }
316        }
317        /**
318         * Process all backlinks
319         */
320        echo "Processing Replication Request\n";
321        Event::dispatchEvent(PHP_INT_MAX);
322
323    }
324
325    private function analytics($namespaces = array(), $output = null, $depth = 0)
326    {
327
328        $fileHandle = null;
329        if (!empty($output)) {
330            $fileHandle = @fopen($output, 'w');
331            if (!$fileHandle) $this->fatal("Failed to open $output");
332        }
333
334        /**
335         * Run as admin to overcome the fact that
336         * anonymous user cannot see all links and backlinks
337         */
338        global $USERINFO;
339        $USERINFO['grps'] = array('admin');
340        global $INPUT;
341        $INPUT->server->set('REMOTE_USER', "cli");
342
343        $pages = FsWikiUtility::getPages($namespaces, $depth);
344
345
346        if (!empty($fileHandle)) {
347            $header = array(
348                'id',
349                'backlinks',
350                'broken_links',
351                'changes',
352                'chars',
353                'external_links',
354                'external_medias',
355                'h1',
356                'h2',
357                'h3',
358                'h4',
359                'h5',
360                'internal_links',
361                'internal_medias',
362                'words',
363                'score'
364            );
365            fwrite($fileHandle, implode(",", $header) . PHP_EOL);
366        }
367        $pageCounter = 0;
368        $totalNumberOfPages = sizeof($pages);
369        while ($pageArray = array_shift($pages)) {
370            $id = $pageArray['id'];
371            $page = MarkupPath::createMarkupFromId($id);
372
373
374            $pageCounter++;
375            /**
376             * Analytics
377             */
378            echo "Analytics Processing for the page {$id} ($pageCounter / $totalNumberOfPages)\n";
379            $executionContext = ExecutionContext::getActualOrCreateFromEnv();
380            try {
381                $analyticsPath = $page->fetchAnalyticsPath();
382            } catch (ExceptionNotExists $e) {
383                LogUtility::error("The analytics document for the page ($page) was not found");
384                continue;
385            } catch (ExceptionCompile $e) {
386                LogUtility::error("Error when get the analytics.", self::CANONICAL, $e);
387                continue;
388            } finally {
389                $executionContext->close();
390            }
391
392            try {
393                $data = \ComboStrap\Json::createFromPath($analyticsPath)->toArray();
394            } catch (ExceptionBadSyntax $e) {
395                LogUtility::error("The analytics json of the page ($page) is not conform");
396                continue;
397            } catch (ExceptionNotFound|ExceptionNotExists $e) {
398                LogUtility::error("The analytics document ({$analyticsPath}) for the page ($page) was not found");
399                continue;
400            }
401
402            if (!empty($fileHandle)) {
403                $statistics = $data[renderer_plugin_combo_analytics::STATISTICS];
404                $row = array(
405                    'id' => $id,
406                    'backlinks' => $statistics[BacklinkCount::getPersistentName()],
407                    'broken_links' => $statistics[renderer_plugin_combo_analytics::INTERNAL_LINK_BROKEN_COUNT],
408                    'changes' => $statistics[renderer_plugin_combo_analytics::EDITS_COUNT],
409                    'chars' => $statistics[renderer_plugin_combo_analytics::CHAR_COUNT],
410                    'external_links' => $statistics[renderer_plugin_combo_analytics::EXTERNAL_LINK_COUNT],
411                    'external_medias' => $statistics[renderer_plugin_combo_analytics::EXTERNAL_MEDIA_COUNT],
412                    PageH1::PROPERTY_NAME => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT][PageH1::PROPERTY_NAME],
413                    'h2' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h2'],
414                    'h3' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h3'],
415                    'h4' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h4'],
416                    'h5' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h5'],
417                    'internal_links' => $statistics[renderer_plugin_combo_analytics::INTERNAL_LINK_COUNT],
418                    'internal_medias' => $statistics[renderer_plugin_combo_analytics::INTERNAL_MEDIA_COUNT],
419                    'words' => $statistics[renderer_plugin_combo_analytics::WORD_COUNT],
420                    'low' => $data[renderer_plugin_combo_analytics::QUALITY]['low']
421                );
422                fwrite($fileHandle, implode(",", $row) . PHP_EOL);
423            }
424
425        }
426        if (!empty($fileHandle)) {
427            fclose($fileHandle);
428        }
429
430    }
431
432
433    /**
434     * @throws ExceptionSqliteNotAvailable
435     */
436    private function deleteNonExistingPageFromDatabase()
437    {
438        LogUtility::msg("Starting: Deleting non-existing page from database");
439        $sqlite = Sqlite::createOrGetSqlite();
440        /** @noinspection SqlNoDataSourceInspection */
441        $request = $sqlite
442            ->createRequest()
443            ->setQuery("select id as \"id\" from pages");
444        $rows = [];
445        try {
446            $rows = $request
447                ->execute()
448                ->getRows();
449        } catch (ExceptionCompile $e) {
450            LogUtility::msg("Error while getting the id pages. {$e->getMessage()}");
451            return;
452        } finally {
453            $request->close();
454        }
455        $counter = 0;
456
457        foreach ($rows as $row) {
458            /**
459             * Context
460             * PHP Fatal error:  Allowed memory size of 268435456 bytes exhausted (tried to allocate 20480 bytes)
461             * in /opt/www/datacadamia.com/inc/ErrorHandler.php on line 102
462             */
463            $executionContext = ExecutionContext::getActualOrCreateFromEnv();
464            try {
465                $counter++;
466                $id = $row['id'];
467                if (!page_exists($id)) {
468                    echo 'Page does not exist on the file system. Delete from the database (' . $id . ")\n";
469                    try {
470                        $dbRow = DatabasePageRow::getFromDokuWikiId($id);
471                        $dbRow->delete();
472                    } catch (ExceptionNotFound $e) {
473                        // ok
474                    }
475                }
476            } finally {
477                $executionContext->close();
478            }
479
480        }
481        LogUtility::msg("Sync finished ($counter pages checked)");
482
483    }
484
485    private function frontmatter($namespaces, $depth)
486    {
487        $pages = FsWikiUtility::getPages($namespaces, $depth);
488        $pageCounter = 0;
489        $totalNumberOfPages = sizeof($pages);
490        $pagesWithChanges = [];
491        $pagesWithError = [];
492        $pagesWithOthers = [];
493        $notChangedCounter = 0;
494        while ($pageArray = array_shift($pages)) {
495            $id = $pageArray['id'];
496            global $ID;
497            $ID = $id;
498            $page = MarkupPath::createMarkupFromId($id);
499            $pageCounter++;
500            LogUtility::msg("Processing page $id ($pageCounter / $totalNumberOfPages) ", LogUtility::LVL_MSG_INFO);
501            $executionContext = ExecutionContext::getActualOrCreateFromEnv();
502            try {
503                $message = MetadataFrontmatterStore::createFromPage($page)
504                    ->sync();
505                switch ($message->getStatus()) {
506                    case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_NOT_CHANGED:
507                        $notChangedCounter++;
508                        break;
509                    case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_DONE:
510                        $pagesWithChanges[] = $id;
511                        break;
512                    case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_ERROR:
513                        $pagesWithError[$id] = $message->getPlainTextContent();
514                        break;
515                    default:
516                        $pagesWithOthers[$id] = $message->getPlainTextContent();
517                        break;
518
519                }
520            } catch (ExceptionCompile $e) {
521                $pagesWithError[$id] = $e->getMessage();
522            } finally {
523                $executionContext->close();
524            }
525
526        }
527
528        echo "\n";
529        echo "Result:\n";
530        echo "$notChangedCounter pages without any frontmatter modifications\n";
531
532        if (sizeof($pagesWithError) > 0) {
533            echo "\n";
534            echo "The following pages had errors\n";
535            $pageCounter = 0;
536            $totalNumberOfPages = sizeof($pagesWithError);
537            foreach ($pagesWithError as $id => $message) {
538                $pageCounter++;
539                LogUtility::msg("Page $id ($pageCounter / $totalNumberOfPages): " . $message);
540            }
541        } else {
542            echo "No error\n";
543        }
544
545        if (sizeof($pagesWithChanges) > 0) {
546            echo "\n";
547            echo "The following pages had changed:\n";
548            $pageCounter = 0;
549            $totalNumberOfPages = sizeof($pagesWithChanges);
550            foreach ($pagesWithChanges as $id) {
551                $pageCounter++;
552                LogUtility::msg("Page $id ($pageCounter / $totalNumberOfPages) ");
553            }
554        } else {
555            echo "No changes\n";
556        }
557
558        if (sizeof($pagesWithOthers) > 0) {
559            echo "\n";
560            echo "The following pages had an other status";
561            $pageCounter = 0;
562            $totalNumberOfPages = sizeof($pagesWithOthers);
563            foreach ($pagesWithOthers as $id => $message) {
564                $pageCounter++;
565                LogUtility::msg("Page $id ($pageCounter / $totalNumberOfPages) " . $message, LogUtility::LVL_MSG_ERROR);
566            }
567        }
568    }
569
570    private function getStartPath($args)
571    {
572        $sizeof = sizeof($args);
573        switch ($sizeof) {
574            case 0:
575                fwrite(STDERR, "The start path is mandatory and was not given");
576                exit(1);
577            case 1:
578                $startPath = $args[0];
579                if (!in_array($startPath, [":", "/"])) {
580                    // cleanId would return blank for a root
581                    $startPath = cleanID($startPath);
582                }
583                break;
584            default:
585                fwrite(STDERR, "Too much arguments given $sizeof");
586                exit(1);
587        }
588        return $startPath;
589    }
590
591    /**
592     *
593     * Print the extension/plugin to update
594     *
595     * Note, there is also an Endpoint:
596     * self::EXTENSION_REPOSITORY_API.'?fmt=php&ext[]='.urlencode($name)
597     * `http://www.dokuwiki.org/lib/plugins/pluginrepo/api.php?fmt=php&ext[]=`.urlencode($name)
598     *
599     * @noinspection PhpUndefinedClassInspection
600     */
601    private function pluginToUpdate()
602    {
603
604        if (class_exists(Local::class)) {
605            /**
606             * Release 2025-05-14 "Librarian"
607             * https://www.dokuwiki.org/changes#release_2025-05-14_librarian
608             * https://www.patreon.com/posts/new-extension-116501986
609             * ./bin/plugin.php extension list
610             * @link lib/plugins/extension/cli.php
611             * Code based on https://github.com/giterlizzi/dokuwiki-template-bootstrap3/pull/617/files
612             */
613            try {
614                $extensions = (new Local())->getExtensions();
615                Repository::getInstance()->initExtensions(array_keys($extensions));
616                foreach ($extensions as $extension) {
617                    if ($extension->isEnabled() && $extension->isUpdateAvailable()) {
618                        echo "The extension {$extension->getDisplayName()} should be updated";
619                    }
620                }
621            } /** @noinspection PhpUndefinedClassInspection */ catch (ExtensionException $ignore) {
622                // Ignore the exception
623            }
624            return;
625        }
626
627
628        $pluginList = plugin_list('', true);
629        $extension = $this->loadHelper('extension_extension');
630        foreach ($pluginList as $name) {
631
632            /* @var helper_plugin_extension_extension $extension
633             * old extension manager until Kaos
634             */
635            $extension->setExtension($name);
636            /** @noinspection PhpUndefinedMethodInspection */
637            if ($extension->updateAvailable()) {
638                echo "The extension $name should be updated";
639            }
640        }
641
642
643    }
644
645    /**
646     * @return void
647     * Print the broken Links
648     * @throws ExceptionSqliteNotAvailable
649     */
650    private function brokenLinks()
651    {
652        LogUtility::msg("Broken Links Started");
653        $sqlite = Sqlite::createOrGetSqlite();
654        $request = $sqlite
655            ->createRequest()
656            ->setQuery("with validPages as (select path, analytics
657                     from pages
658                     where json_valid(analytics) = 1)
659select path,
660       json_extract(analytics, '$.statistics.internal_broken_link_count') as broken_link,
661       json_extract(analytics, '$.statistics.media.internal_broken_count') as broken_media
662from validPages
663where json_extract(analytics, '$.statistics.internal_broken_link_count') is not null
664   or json_extract(analytics, '$.statistics.media.internal_broken_count') != 0");
665        $rows = [];
666        try {
667            $rows = $request
668                ->execute()
669                ->getRows();
670        } catch (ExceptionCompile $e) {
671            LogUtility::msg("Error while getting the id pages. {$e->getMessage()}");
672            return;
673        } finally {
674            $request->close();
675        }
676        if (count($rows) == 0) {
677            LogUtility::msg("No Broken Links");
678            exit();
679        }
680        LogUtility::msg("Broken Links:");
681        foreach ($rows as $row) {
682            $path = $row["path"];
683            $broken_link = $row["broken_link"];
684            $broken_media = $row["broken_media"];
685            echo "$path (Page: $broken_link, Media: $broken_media)    \n";
686        }
687        if (count($rows) != 0) {
688            exit(1);
689        }
690    }
691}
692