1 <?php
2 /**
3  * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved.
4  *
5  * This source code is licensed under the GPL license found in the
6  * COPYING  file in the root directory of this source tree.
7  *
8  * @license  GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html)
9  * @author   ComboStrap <support@combostrap.com>
10  *
11  */
12 
13 use ComboStrap\DatabasePageRow;
14 use ComboStrap\Event;
15 use ComboStrap\ExceptionBadSyntax;
16 use ComboStrap\ExceptionCompile;
17 use ComboStrap\ExceptionNotExists;
18 use ComboStrap\ExceptionNotFound;
19 use ComboStrap\ExceptionRuntime;
20 use ComboStrap\ExecutionContext;
21 use ComboStrap\FsWikiUtility;
22 use ComboStrap\LogUtility;
23 use ComboStrap\MarkupPath;
24 use ComboStrap\Meta\Field\BacklinkCount;
25 use ComboStrap\Meta\Field\PageH1;
26 use ComboStrap\MetadataFrontmatterStore;
27 use ComboStrap\Sqlite;
28 use splitbrain\phpcli\Options;
29 
30 /**
31  * All dependency are loaded
32  */
33 require_once(__DIR__ . '/vendor/autoload.php');
34 
35 /**
36  * The memory of the server 128 is not enough
37  */
38 ini_set('memory_limit', '256M');
39 
40 
41 /**
42  * Class cli_plugin_combo
43  *
44  * This is a cli:
45  * https://www.dokuwiki.org/devel:cli_plugins#example
46  *
47  * Usage:
48  *
49  * ```
50  * docker exec -ti $(CONTAINER) /bin/bash
51  * ```
52  * ```
53  * set animal=animal-directory-name
54  * php ./bin/plugin.php combo --help
55  * ```
56  * or via the IDE
57  *
58  *
59  * Example:
60  * https://www.dokuwiki.org/tips:grapher
61  *
62  */
63 class cli_plugin_combo extends DokuWiki_CLI_Plugin
64 {
65 
66     const METADATA_TO_DATABASE = "metadata-to-database";
67     const ANALYTICS = "analytics";
68     const METADATA_TO_FRONTMATTER = "metadata-to-frontmatter";
69     const SYNC = "sync";
70     const PLUGINS_TO_UPDATE = "plugins-to-update";
71     const FORCE_OPTION = 'force';
72     const PORT_OPTION = 'port';
73     const HOST_OPTION = 'host';
74     const CANONICAL = "combo-cli";
75 
76 
77     /**
78      * register options and arguments
79      * @param Options $options
80      *
81      * Note the animal is set in {@link DokuWikiFarmCore::detectAnimal()}
82      * via the environment variable `animal` that is passed in the $_SERVER variable
83      */
84     protected function setup(Options $options)
85     {
86         $help = <<<EOF
87 ComboStrap Administrative Commands
88 
89 
90 Example:
91   * Replicate all pages into the database
92 ```bash
93 php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 :
94 # or
95 php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 /
96 ```
97   * Replicate only the page `:namespace:my-page`
98 ```bash
99 php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 :namespace:my-page
100 # or
101 php ./bin/plugin.php combo metadata-to-database --host serverHostName  --port 80 /namespace/my-page
102 ```
103 
104 Animal: If you want to use it for an animal farm, you need to set first the animal directory name in a environment variable
105 ```bash
106 animal=animal-directory-name php ./bin/plugin.php combo
107 ```
108 
109 EOF;
110 
111         $options->setHelp($help);
112         $options->registerOption('version', 'print version', 'v');
113         $options->registerCommand(self::METADATA_TO_DATABASE, "Replicate the file system metadata into the database");
114         $options->registerCommand(self::ANALYTICS, "Start the analytics and export optionally the data");
115         $options->registerCommand(self::PLUGINS_TO_UPDATE, "List the plugins to update");
116         $options->registerCommand(self::METADATA_TO_FRONTMATTER, "Replicate the file system metadata into the page frontmatter");
117         $options->registerCommand(self::SYNC, "Delete the non-existing pages in the database");
118         $options->registerArgument(
119             'path',
120             "The start path (a page or a directory). For all pages, type the root directory '/'",
121             false
122         );
123         $options->registerOption(
124             'output',
125             "Optional, where to store the analytical data as csv eg. a filename.",
126             'o',
127             true
128         );
129         $options->registerOption(
130             self::HOST_OPTION,
131             "The http host name of your server. This value is used by dokuwiki in the rendering cache key",
132             null,
133             true,
134             self::METADATA_TO_DATABASE
135         );
136         $options->registerOption(
137             self::PORT_OPTION,
138             "The http host port of your server. This value is used by dokuwiki in the rendering cache key",
139             null,
140             true,
141             self::METADATA_TO_DATABASE
142         );
143         $options->registerOption(
144             self::FORCE_OPTION,
145             "Replicate with force",
146             'f',
147             false,
148             self::METADATA_TO_DATABASE
149         );
150         $options->registerOption(
151             'dry',
152             "Optional, dry-run",
153             'd', false);
154 
155 
156     }
157 
158     /**
159      * The main entry
160      * @param Options $options
161      * @throws ExceptionCompile
162      */
163     protected function main(Options $options)
164     {
165 
166 
167         if (isset($_REQUEST['animal'])) {
168             // on linux
169             echo "Animal detected: " . $_REQUEST['animal'] . "\n";
170         } else {
171             // on windows
172             echo "No Animal detected\n";
173             echo "Conf: " . DOKU_CONF . "\n";
174         }
175 
176         $args = $options->getArgs();
177 
178 
179         $depth = $options->getOpt('depth', 0);
180         $cmd = $options->getCmd();
181 
182         try {
183             switch ($cmd) {
184                 case self::METADATA_TO_DATABASE:
185                     $startPath = $this->getStartPath($args);
186                     $force = $options->getOpt(self::FORCE_OPTION, false);
187                     $hostOptionValue = $options->getOpt(self::HOST_OPTION, null);
188                     if ($hostOptionValue === null) {
189                         fwrite(STDERR, "The host name is mandatory");
190                         return;
191                     }
192                     $_SERVER['HTTP_HOST'] = $hostOptionValue;
193                     $portOptionName = $options->getOpt(self::PORT_OPTION, null);
194                     if ($portOptionName === null) {
195                         fwrite(STDERR, "The host port is mandatory");
196                         return;
197                     }
198                     $_SERVER['SERVER_PORT'] = $portOptionName;
199                     $this->index($startPath, $force, $depth);
200                     break;
201                 case self::METADATA_TO_FRONTMATTER:
202                     $startPath = $this->getStartPath($args);
203                     $this->frontmatter($startPath, $depth);
204                     break;
205                 case self::ANALYTICS:
206                     $startPath = $this->getStartPath($args);
207                     $output = $options->getOpt('output', '');
208                     //if ($output == '-') $output = 'php://stdout';
209                     $this->analytics($startPath, $output, $depth);
210                     break;
211                 case self::SYNC:
212                     $this->deleteNonExistingPageFromDatabase();
213                     break;
214                 case self::PLUGINS_TO_UPDATE:
215                     /**
216                      * Endpoint:
217                      * self::EXTENSION_REPOSITORY_API.'?fmt=php&ext[]='.urlencode($name)
218                      * `http://www.dokuwiki.org/lib/plugins/pluginrepo/api.php?fmt=php&ext[]=`.urlencode($name)
219                      */
220                     $pluginList = plugin_list('', true);
221                     /* @var helper_plugin_extension_extension $extension */
222                     $extension = $this->loadHelper('extension_extension');
223                     foreach ($pluginList as $name) {
224                         $extension->setExtension($name);
225                         if ($extension->updateAvailable()) {
226                             echo "The extension $name should be updated";
227                         }
228                     }
229                     break;
230                 default:
231                     if ($cmd !== "") {
232                         fwrite(STDERR, "Combo: Command unknown (" . $cmd . ")");
233                     } else {
234                         echo $options->help();
235                     }
236                     exit(1);
237             }
238         } catch (\Exception $exception) {
239             fwrite(STDERR, "An internal error has occured. " . $exception->getMessage() . "\n" . $exception->getTraceAsString());
240             exit(1);
241         }
242 
243 
244     }
245 
246     /**
247      * @param array $namespaces
248      * @param bool $rebuild
249      * @param int $depth recursion depth. 0 for unlimited
250      * @throws ExceptionCompile
251      */
252     private function index($namespaces = array(), $rebuild = false, $depth = 0)
253     {
254 
255         /**
256          * Run as admin to overcome the fact that
257          * anonymous user cannot see all links and backlinks
258          */
259         global $USERINFO;
260         $USERINFO['grps'] = array('admin');
261         global $INPUT;
262         $INPUT->server->set('REMOTE_USER', "cli");
263 
264         $pages = FsWikiUtility::getPages($namespaces, $depth);
265 
266         $pageCounter = 0;
267         $totalNumberOfPages = sizeof($pages);
268         while ($pageArray = array_shift($pages)) {
269             $id = $pageArray['id'];
270             global $ID;
271             $ID = $id;
272             /**
273              * Indexing the page start the database replication
274              * See {@link action_plugin_combo_indexer}
275              */
276             $pageCounter++;
277             $executionContext = ExecutionContext::getActualOrCreateFromEnv();
278             try {
279                 /**
280                  * If the page does not need to be indexed, there is no run
281                  * and false is returned
282                  */
283                 $indexedOrNot = idx_addPage($id, true, true);
284                 if ($indexedOrNot) {
285                     LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) was indexed and replicated", LogUtility::LVL_MSG_INFO);
286                 } else {
287                     LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error", LogUtility::LVL_MSG_ERROR);
288                 }
289             } catch (ExceptionRuntime $e) {
290                 LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error: " . $e->getMessage(), LogUtility::LVL_MSG_ERROR);
291             } finally {
292                 $executionContext->close();
293             }
294         }
295         /**
296          * Process all backlinks
297          */
298         echo "Processing Replication Request\n";
299         Event::dispatchEvent(PHP_INT_MAX);
300 
301     }
302 
303     private function analytics($namespaces = array(), $output = null, $depth = 0)
304     {
305 
306         $fileHandle = null;
307         if (!empty($output)) {
308             $fileHandle = @fopen($output, 'w');
309             if (!$fileHandle) $this->fatal("Failed to open $output");
310         }
311 
312         /**
313          * Run as admin to overcome the fact that
314          * anonymous user cannot see all links and backlinks
315          */
316         global $USERINFO;
317         $USERINFO['grps'] = array('admin');
318         global $INPUT;
319         $INPUT->server->set('REMOTE_USER', "cli");
320 
321         $pages = FsWikiUtility::getPages($namespaces, $depth);
322 
323 
324         if (!empty($fileHandle)) {
325             $header = array(
326                 'id',
327                 'backlinks',
328                 'broken_links',
329                 'changes',
330                 'chars',
331                 'external_links',
332                 'external_medias',
333                 'h1',
334                 'h2',
335                 'h3',
336                 'h4',
337                 'h5',
338                 'internal_links',
339                 'internal_medias',
340                 'words',
341                 'score'
342             );
343             fwrite($fileHandle, implode(",", $header) . PHP_EOL);
344         }
345         $pageCounter = 0;
346         $totalNumberOfPages = sizeof($pages);
347         while ($pageArray = array_shift($pages)) {
348             $id = $pageArray['id'];
349             $page = MarkupPath::createMarkupFromId($id);
350 
351 
352             $pageCounter++;
353             /**
354              * Analytics
355              */
356             echo "Analytics Processing for the page {$id} ($pageCounter / $totalNumberOfPages)\n";
357             $executionContext = ExecutionContext::getActualOrCreateFromEnv();
358             try {
359                 $analyticsPath = $page->fetchAnalyticsPath();
360             } catch (ExceptionNotExists $e) {
361                 LogUtility::error("The analytics document for the page ($page) was not found");
362                 continue;
363             } catch (ExceptionCompile $e) {
364                 LogUtility::error("Error when get the analytics.", self::CANONICAL, $e);
365                 continue;
366             } finally {
367                 $executionContext->close();
368             }
369 
370             try {
371                 $data = \ComboStrap\Json::createFromPath($analyticsPath)->toArray();
372             } catch (ExceptionBadSyntax $e) {
373                 LogUtility::error("The analytics json of the page ($page) is not conform");
374                 continue;
375             } catch (ExceptionNotFound|ExceptionNotExists $e) {
376                 LogUtility::error("The analytics document ({$analyticsPath}) for the page ($page) was not found");
377                 continue;
378             }
379 
380             if (!empty($fileHandle)) {
381                 $statistics = $data[renderer_plugin_combo_analytics::STATISTICS];
382                 $row = array(
383                     'id' => $id,
384                     'backlinks' => $statistics[BacklinkCount::getPersistentName()],
385                     'broken_links' => $statistics[renderer_plugin_combo_analytics::INTERNAL_LINK_BROKEN_COUNT],
386                     'changes' => $statistics[renderer_plugin_combo_analytics::EDITS_COUNT],
387                     'chars' => $statistics[renderer_plugin_combo_analytics::CHAR_COUNT],
388                     'external_links' => $statistics[renderer_plugin_combo_analytics::EXTERNAL_LINK_COUNT],
389                     'external_medias' => $statistics[renderer_plugin_combo_analytics::EXTERNAL_MEDIA_COUNT],
390                     PageH1::PROPERTY_NAME => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT][PageH1::PROPERTY_NAME],
391                     'h2' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h2'],
392                     'h3' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h3'],
393                     'h4' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h4'],
394                     'h5' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h5'],
395                     'internal_links' => $statistics[renderer_plugin_combo_analytics::INTERNAL_LINK_COUNT],
396                     'internal_medias' => $statistics[renderer_plugin_combo_analytics::INTERNAL_MEDIA_COUNT],
397                     'words' => $statistics[renderer_plugin_combo_analytics::WORD_COUNT],
398                     'low' => $data[renderer_plugin_combo_analytics::QUALITY]['low']
399                 );
400                 fwrite($fileHandle, implode(",", $row) . PHP_EOL);
401             }
402 
403         }
404         if (!empty($fileHandle)) {
405             fclose($fileHandle);
406         }
407 
408     }
409 
410 
411     /**
412      * @throws \ComboStrap\ExceptionSqliteNotAvailable
413      */
414     private function deleteNonExistingPageFromDatabase()
415     {
416         LogUtility::msg("Starting: Deleting non-existing page from database");
417         $sqlite = Sqlite::createOrGetSqlite();
418         $request = $sqlite
419             ->createRequest()
420             ->setQuery("select id as \"id\" from pages");
421         $rows = [];
422         try {
423             $rows = $request
424                 ->execute()
425                 ->getRows();
426         } catch (ExceptionCompile $e) {
427             LogUtility::msg("Error while getting the id pages. {$e->getMessage()}");
428             return;
429         } finally {
430             $request->close();
431         }
432         $counter = 0;
433 
434         foreach ($rows as $row) {
435             /**
436              * Context
437              * PHP Fatal error:  Allowed memory size of 268435456 bytes exhausted (tried to allocate 20480 bytes)
438              * in /opt/www/datacadamia.com/inc/ErrorHandler.php on line 102
439              */
440             $executionContext = ExecutionContext::getActualOrCreateFromEnv();
441             try {
442                 $counter++;
443                 $id = $row['id'];
444                 if (!page_exists($id)) {
445                     echo 'Page does not exist on the file system. Delete from the database (' . $id . ")\n";
446                     try {
447                         $dbRow = DatabasePageRow::getFromDokuWikiId($id);
448                         $dbRow->delete();
449                     } catch (ExceptionNotFound $e) {
450                         // ok
451                     }
452                 }
453             } finally {
454                 $executionContext->close();
455             }
456 
457         }
458         LogUtility::msg("Sync finished ($counter pages checked)");
459 
460     }
461 
462     private function frontmatter($namespaces, $depth)
463     {
464         $pages = FsWikiUtility::getPages($namespaces, $depth);
465         $pageCounter = 0;
466         $totalNumberOfPages = sizeof($pages);
467         $pagesWithChanges = [];
468         $pagesWithError = [];
469         $pagesWithOthers = [];
470         $notChangedCounter = 0;
471         while ($pageArray = array_shift($pages)) {
472             $id = $pageArray['id'];
473             global $ID;
474             $ID = $id;
475             $page = MarkupPath::createMarkupFromId($id);
476             $pageCounter++;
477             LogUtility::msg("Processing page {$id} ($pageCounter / $totalNumberOfPages) ", LogUtility::LVL_MSG_INFO);
478             $executionContext = ExecutionContext::getActualOrCreateFromEnv();
479             try {
480                 $message = MetadataFrontmatterStore::createFromPage($page)
481                     ->sync();
482                 switch ($message->getStatus()) {
483                     case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_NOT_CHANGED:
484                         $notChangedCounter++;
485                         break;
486                     case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_DONE:
487                         $pagesWithChanges[] = $id;
488                         break;
489                     case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_ERROR:
490                         $pagesWithError[$id] = $message->getPlainTextContent();
491                         break;
492                     default:
493                         $pagesWithOthers[$id] = $message->getPlainTextContent();
494                         break;
495 
496                 }
497             } catch (ExceptionCompile $e) {
498                 $pagesWithError[$id] = $e->getMessage();
499             } finally {
500                 $executionContext->close();
501             }
502 
503         }
504 
505         echo "\n";
506         echo "Result:\n";
507         echo "$notChangedCounter pages without any frontmatter modifications\n";
508 
509         if (sizeof($pagesWithError) > 0) {
510             echo "\n";
511             echo "The following pages had errors\n";
512             $pageCounter = 0;
513             $totalNumberOfPages = sizeof($pagesWithError);
514             foreach ($pagesWithError as $id => $message) {
515                 $pageCounter++;
516                 LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages): " . $message, LogUtility::LVL_MSG_ERROR);
517             }
518         } else {
519             echo "No error\n";
520         }
521 
522         if (sizeof($pagesWithChanges) > 0) {
523             echo "\n";
524             echo "The following pages had changed:\n";
525             $pageCounter = 0;
526             $totalNumberOfPages = sizeof($pagesWithChanges);
527             foreach ($pagesWithChanges as $id) {
528                 $pageCounter++;
529                 LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages) ", LogUtility::LVL_MSG_ERROR);
530             }
531         } else {
532             echo "No changes\n";
533         }
534 
535         if (sizeof($pagesWithOthers) > 0) {
536             echo "\n";
537             echo "The following pages had an other status";
538             $pageCounter = 0;
539             $totalNumberOfPages = sizeof($pagesWithOthers);
540             foreach ($pagesWithOthers as $id => $message) {
541                 $pageCounter++;
542                 LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages) " . $message, LogUtility::LVL_MSG_ERROR);
543             }
544         }
545     }
546 
547     private function getStartPath($args)
548     {
549         $sizeof = sizeof($args);
550         switch ($sizeof) {
551             case 0:
552                 fwrite(STDERR, "The start path is mandatory and was not given");
553                 exit(1);
554             case 1:
555                 $startPath = $args[0];
556                 if (!in_array($startPath, [":", "/"])) {
557                     // cleanId would return blank for a root
558                     $startPath = cleanID($startPath);
559                 }
560                 break;
561             default:
562                 fwrite(STDERR, "Too much arguments given $sizeof");
563                 exit(1);
564         }
565         return $startPath;
566     }
567 }
568