1<?php 2/** 3 * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved. 4 * 5 * This source code is licensed under the GPL license found in the 6 * COPYING file in the root directory of this source tree. 7 * 8 * @license GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html) 9 * @author ComboStrap <support@combostrap.com> 10 * 11 */ 12 13use ComboStrap\DatabasePageRow; 14use ComboStrap\Event; 15use ComboStrap\ExceptionBadSyntax; 16use ComboStrap\ExceptionCompile; 17use ComboStrap\ExceptionNotExists; 18use ComboStrap\ExceptionNotFound; 19use ComboStrap\ExceptionRuntime; 20use ComboStrap\ExceptionSqliteNotAvailable; 21use ComboStrap\ExecutionContext; 22use ComboStrap\FsWikiUtility; 23use ComboStrap\LogUtility; 24use ComboStrap\MarkupPath; 25use ComboStrap\Meta\Field\BacklinkCount; 26use ComboStrap\Meta\Field\PageH1; 27use ComboStrap\MetadataFrontmatterStore; 28use ComboStrap\Sqlite; 29use splitbrain\phpcli\Options; 30 31/** 32 * All dependency are loaded 33 */ 34require_once(__DIR__ . '/vendor/autoload.php'); 35 36/** 37 * The memory of the server 128 is not enough 38 */ 39ini_set('memory_limit', '256M'); 40 41 42/** 43 * Class cli_plugin_combo 44 * 45 * This is a cli: 46 * https://www.dokuwiki.org/devel:cli_plugins#example 47 * 48 * Usage: 49 * 50 * ``` 51 * docker exec -ti $(CONTAINER) /bin/bash 52 * ``` 53 * ``` 54 * set animal=animal-directory-name 55 * php ./bin/plugin.php combo --help 56 * ``` 57 * or via the IDE 58 * 59 * 60 * Example: 61 * https://www.dokuwiki.org/tips:grapher 62 * 63 */ 64class cli_plugin_combo extends DokuWiki_CLI_Plugin 65{ 66 67 const METADATA_TO_DATABASE = "metadata-to-database"; 68 const ANALYTICS = "analytics"; 69 const METADATA_TO_FRONTMATTER = "metadata-to-frontmatter"; 70 const BROKEN_LINKS = "broken-links"; 71 const SYNC = "sync"; 72 const PLUGINS_TO_UPDATE = "plugins-to-update"; 73 const FORCE_OPTION = 'force'; 74 const PORT_OPTION = 'port'; 75 const HOST_OPTION = 'host'; 76 const CANONICAL = "combo-cli"; 77 78 79 /** 80 * register options and arguments 81 * @param Options $options 82 * 83 * Note the animal is set in {@link DokuWikiFarmCore::detectAnimal()} 84 * via the environment variable `animal` that is passed in the $_SERVER variable 85 */ 86 protected function setup(Options $options) 87 { 88 $help = <<<EOF 89ComboStrap Administrative Commands 90 91 92Example: 93 * Replicate all pages into the database 94```bash 95php ./bin/plugin.php combo metadata-to-database --host serverHostName --port 80 : 96# or 97php ./bin/plugin.php combo metadata-to-database --host serverHostName --port 80 / 98``` 99 * Replicate only the page `:namespace:my-page` 100```bash 101php ./bin/plugin.php combo metadata-to-database --host serverHostName --port 80 :namespace:my-page 102# or 103php ./bin/plugin.php combo metadata-to-database --host serverHostName --port 80 /namespace/my-page 104``` 105 106Animal: If you want to use it for an animal farm, you need to set first the animal directory name in a environment variable 107```bash 108animal=animal-directory-name php ./bin/plugin.php combo 109``` 110 111EOF; 112 113 /** 114 * Global Options 115 */ 116 $options->setHelp($help); 117 $options->registerOption('version', 'print version', 'v'); 118 /** @noinspection PhpRedundantOptionalArgumentInspection */ 119 $options->registerOption( 120 'dry', 121 "Optional, dry-run", 122 'd', false); 123 $options->registerOption( 124 'output', 125 "Optional, where to store the analytical data as csv eg. a filename.", 126 'o', 127 true 128 ); 129 130 /** 131 * Command without options 132 */ 133 $options->registerCommand(self::ANALYTICS, "Start the analytics and export optionally the data"); 134 $options->registerCommand(self::PLUGINS_TO_UPDATE, "List the plugins to update"); 135 $options->registerCommand(self::BROKEN_LINKS, "Output Broken Links"); 136 137 138 // Metadata to database command 139 $options->registerCommand(self::METADATA_TO_DATABASE, "Replicate the file system metadata into the database"); 140 $options->registerOption( 141 self::HOST_OPTION, 142 "The http host name of your server. This value is used by dokuwiki in the rendering cache key", 143 null, 144 true, 145 self::METADATA_TO_DATABASE 146 ); 147 $options->registerOption( 148 self::PORT_OPTION, 149 "The http host port of your server. This value is used by dokuwiki in the rendering cache key", 150 null, 151 true, 152 self::METADATA_TO_DATABASE 153 ); 154 $options->registerOption( 155 self::FORCE_OPTION, 156 "Replicate with force", 157 'f', 158 false, 159 self::METADATA_TO_DATABASE 160 ); 161 $startPathArgName = 'startPath'; 162 $startPathHelpDescription = "The start path (a page or a directory). For all pages, type the root directory '/' or ':'"; 163 $options->registerArgument( 164 $startPathArgName, 165 $startPathHelpDescription, 166 true, 167 self::METADATA_TO_DATABASE 168 ); 169 170 171 // Metadata Command definition 172 $options->registerCommand(self::METADATA_TO_FRONTMATTER, "Replicate the file system metadata into the page frontmatter"); 173 $options->registerArgument( 174 $startPathArgName, 175 $startPathHelpDescription, 176 true, 177 self::METADATA_TO_FRONTMATTER 178 ); 179 180 // Sync Command Definition 181 $options->registerCommand(self::SYNC, "Delete the non-existing pages in the database"); 182 $options->registerArgument( 183 $startPathArgName, 184 $startPathHelpDescription, 185 true, 186 self::SYNC 187 ); 188 189 } 190 191 /** 192 * The main entry 193 * @param Options $options 194 */ 195 protected function main(Options $options) 196 { 197 198 199 if (isset($_REQUEST['animal'])) { 200 // on linux 201 echo "Animal detected: " . $_REQUEST['animal'] . "\n"; 202 } else { 203 // on windows 204 echo "No Animal detected\n"; 205 echo "Conf: " . DOKU_CONF . "\n"; 206 } 207 208 $args = $options->getArgs(); 209 210 211 $depth = $options->getOpt('depth', 0); 212 $cmd = $options->getCmd(); 213 214 try { 215 switch ($cmd) { 216 case self::METADATA_TO_DATABASE: 217 $startPath = $this->getStartPath($args); 218 $force = $options->getOpt(self::FORCE_OPTION, false); 219 $hostOptionValue = $options->getOpt(self::HOST_OPTION, null); 220 if ($hostOptionValue === null) { 221 fwrite(STDERR, "The host name is mandatory"); 222 return; 223 } 224 $_SERVER['HTTP_HOST'] = $hostOptionValue; 225 $portOptionName = $options->getOpt(self::PORT_OPTION, null); 226 if ($portOptionName === null) { 227 fwrite(STDERR, "The host port is mandatory"); 228 return; 229 } 230 $_SERVER['SERVER_PORT'] = $portOptionName; 231 $this->index($startPath, $force, $depth); 232 break; 233 case self::METADATA_TO_FRONTMATTER: 234 $startPath = $this->getStartPath($args); 235 $this->frontmatter($startPath, $depth); 236 break; 237 case self::BROKEN_LINKS: 238 $this->brokenLinks(); 239 break; 240 case self::ANALYTICS: 241 $startPath = $this->getStartPath($args); 242 $output = $options->getOpt('output', ''); 243 //if ($output == '-') $output = 'php://stdout'; 244 $this->analytics($startPath, $output, $depth); 245 break; 246 case self::SYNC: 247 // php "$DOKUWIKI_HOME"/bin/indexer.php -q 248 $this->deleteNonExistingPageFromDatabase(); 249 break; 250 case self::PLUGINS_TO_UPDATE: 251 $this->pluginToUpdate(); 252 break; 253 default: 254 if ($cmd !== "") { 255 fwrite(STDERR, "Combo: Command unknown (" . $cmd . ")"); 256 } else { 257 echo $options->help(); 258 } 259 exit(1); 260 } 261 } catch (Exception $exception) { 262 fwrite(STDERR, "An internal error has occurred. " . $exception->getMessage() . "\n" . $exception->getTraceAsString()); 263 exit(1); 264 } 265 266 267 } 268 269 /** 270 * @param array $namespaces 271 * @param bool $rebuild 272 * @param int $depth recursion depth. 0 for unlimited 273 * @throws ExceptionCompile 274 */ 275 private function index($namespaces = array(), $rebuild = false, $depth = 0) 276 { 277 278 /** 279 * Run as admin to overcome the fact that 280 * anonymous user cannot see all links and backlinks 281 */ 282 global $USERINFO; 283 $USERINFO['grps'] = array('admin'); 284 global $INPUT; 285 $INPUT->server->set('REMOTE_USER', "cli"); 286 287 $pages = FsWikiUtility::getPages($namespaces, $depth); 288 289 $pageCounter = 0; 290 $totalNumberOfPages = sizeof($pages); 291 while ($pageArray = array_shift($pages)) { 292 $id = $pageArray['id']; 293 global $ID; 294 $ID = $id; 295 /** 296 * Indexing the page start the database replication 297 * See {@link action_plugin_combo_indexer} 298 */ 299 $pageCounter++; 300 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 301 try { 302 /** 303 * If the page does not need to be indexed, there is no run 304 * and false is returned 305 */ 306 $indexedOrNot = idx_addPage($id, true, true); 307 if ($indexedOrNot) { 308 LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) was indexed and replicated", LogUtility::LVL_MSG_INFO); 309 } else { 310 LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error", LogUtility::LVL_MSG_ERROR); 311 } 312 } catch (ExceptionRuntime $e) { 313 LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error: " . $e->getMessage(), LogUtility::LVL_MSG_ERROR); 314 } finally { 315 $executionContext->close(); 316 } 317 } 318 /** 319 * Process all backlinks 320 */ 321 echo "Processing Replication Request\n"; 322 Event::dispatchEvent(PHP_INT_MAX); 323 324 } 325 326 private function analytics($namespaces = array(), $output = null, $depth = 0) 327 { 328 329 $fileHandle = null; 330 if (!empty($output)) { 331 $fileHandle = @fopen($output, 'w'); 332 if (!$fileHandle) $this->fatal("Failed to open $output"); 333 } 334 335 /** 336 * Run as admin to overcome the fact that 337 * anonymous user cannot see all links and backlinks 338 */ 339 global $USERINFO; 340 $USERINFO['grps'] = array('admin'); 341 global $INPUT; 342 $INPUT->server->set('REMOTE_USER', "cli"); 343 344 $pages = FsWikiUtility::getPages($namespaces, $depth); 345 346 347 if (!empty($fileHandle)) { 348 $header = array( 349 'id', 350 'backlinks', 351 'broken_links', 352 'changes', 353 'chars', 354 'external_links', 355 'external_medias', 356 'h1', 357 'h2', 358 'h3', 359 'h4', 360 'h5', 361 'internal_links', 362 'internal_medias', 363 'words', 364 'score' 365 ); 366 fwrite($fileHandle, implode(",", $header) . PHP_EOL); 367 } 368 $pageCounter = 0; 369 $totalNumberOfPages = sizeof($pages); 370 while ($pageArray = array_shift($pages)) { 371 $id = $pageArray['id']; 372 $page = MarkupPath::createMarkupFromId($id); 373 374 375 $pageCounter++; 376 /** 377 * Analytics 378 */ 379 echo "Analytics Processing for the page {$id} ($pageCounter / $totalNumberOfPages)\n"; 380 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 381 try { 382 $analyticsPath = $page->fetchAnalyticsPath(); 383 } catch (ExceptionNotExists $e) { 384 LogUtility::error("The analytics document for the page ($page) was not found"); 385 continue; 386 } catch (ExceptionCompile $e) { 387 LogUtility::error("Error when get the analytics.", self::CANONICAL, $e); 388 continue; 389 } finally { 390 $executionContext->close(); 391 } 392 393 try { 394 $data = \ComboStrap\Json::createFromPath($analyticsPath)->toArray(); 395 } catch (ExceptionBadSyntax $e) { 396 LogUtility::error("The analytics json of the page ($page) is not conform"); 397 continue; 398 } catch (ExceptionNotFound|ExceptionNotExists $e) { 399 LogUtility::error("The analytics document ({$analyticsPath}) for the page ($page) was not found"); 400 continue; 401 } 402 403 if (!empty($fileHandle)) { 404 $statistics = $data[renderer_plugin_combo_analytics::STATISTICS]; 405 $row = array( 406 'id' => $id, 407 'backlinks' => $statistics[BacklinkCount::getPersistentName()], 408 'broken_links' => $statistics[renderer_plugin_combo_analytics::INTERNAL_LINK_BROKEN_COUNT], 409 'changes' => $statistics[renderer_plugin_combo_analytics::EDITS_COUNT], 410 'chars' => $statistics[renderer_plugin_combo_analytics::CHAR_COUNT], 411 'external_links' => $statistics[renderer_plugin_combo_analytics::EXTERNAL_LINK_COUNT], 412 'external_medias' => $statistics[renderer_plugin_combo_analytics::EXTERNAL_MEDIA_COUNT], 413 PageH1::PROPERTY_NAME => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT][PageH1::PROPERTY_NAME], 414 'h2' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h2'], 415 'h3' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h3'], 416 'h4' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h4'], 417 'h5' => $statistics[renderer_plugin_combo_analytics::HEADING_COUNT]['h5'], 418 'internal_links' => $statistics[renderer_plugin_combo_analytics::INTERNAL_LINK_COUNT], 419 'internal_medias' => $statistics[renderer_plugin_combo_analytics::INTERNAL_MEDIA_COUNT], 420 'words' => $statistics[renderer_plugin_combo_analytics::WORD_COUNT], 421 'low' => $data[renderer_plugin_combo_analytics::QUALITY]['low'] 422 ); 423 fwrite($fileHandle, implode(",", $row) . PHP_EOL); 424 } 425 426 } 427 if (!empty($fileHandle)) { 428 fclose($fileHandle); 429 } 430 431 } 432 433 434 /** 435 * @throws ExceptionSqliteNotAvailable 436 */ 437 private function deleteNonExistingPageFromDatabase() 438 { 439 LogUtility::msg("Starting: Deleting non-existing page from database"); 440 $sqlite = Sqlite::createOrGetSqlite(); 441 /** @noinspection SqlNoDataSourceInspection */ 442 $request = $sqlite 443 ->createRequest() 444 ->setQuery("select id as \"id\" from pages"); 445 $rows = []; 446 try { 447 $rows = $request 448 ->execute() 449 ->getRows(); 450 } catch (ExceptionCompile $e) { 451 LogUtility::msg("Error while getting the id pages. {$e->getMessage()}"); 452 return; 453 } finally { 454 $request->close(); 455 } 456 $counter = 0; 457 458 foreach ($rows as $row) { 459 /** 460 * Context 461 * PHP Fatal error: Allowed memory size of 268435456 bytes exhausted (tried to allocate 20480 bytes) 462 * in /opt/www/datacadamia.com/inc/ErrorHandler.php on line 102 463 */ 464 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 465 try { 466 $counter++; 467 $id = $row['id']; 468 if (!page_exists($id)) { 469 echo 'Page does not exist on the file system. Delete from the database (' . $id . ")\n"; 470 try { 471 $dbRow = DatabasePageRow::getFromDokuWikiId($id); 472 $dbRow->delete(); 473 } catch (ExceptionNotFound $e) { 474 // ok 475 } 476 } 477 } finally { 478 $executionContext->close(); 479 } 480 481 } 482 LogUtility::msg("Sync finished ($counter pages checked)"); 483 484 } 485 486 private function frontmatter($namespaces, $depth) 487 { 488 $pages = FsWikiUtility::getPages($namespaces, $depth); 489 $pageCounter = 0; 490 $totalNumberOfPages = sizeof($pages); 491 $pagesWithChanges = []; 492 $pagesWithError = []; 493 $pagesWithOthers = []; 494 $notChangedCounter = 0; 495 while ($pageArray = array_shift($pages)) { 496 $id = $pageArray['id']; 497 global $ID; 498 $ID = $id; 499 $page = MarkupPath::createMarkupFromId($id); 500 $pageCounter++; 501 LogUtility::msg("Processing page $id ($pageCounter / $totalNumberOfPages) ", LogUtility::LVL_MSG_INFO); 502 $executionContext = ExecutionContext::getActualOrCreateFromEnv(); 503 try { 504 $message = MetadataFrontmatterStore::createFromPage($page) 505 ->sync(); 506 switch ($message->getStatus()) { 507 case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_NOT_CHANGED: 508 $notChangedCounter++; 509 break; 510 case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_DONE: 511 $pagesWithChanges[] = $id; 512 break; 513 case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_ERROR: 514 $pagesWithError[$id] = $message->getPlainTextContent(); 515 break; 516 default: 517 $pagesWithOthers[$id] = $message->getPlainTextContent(); 518 break; 519 520 } 521 } catch (ExceptionCompile $e) { 522 $pagesWithError[$id] = $e->getMessage(); 523 } finally { 524 $executionContext->close(); 525 } 526 527 } 528 529 echo "\n"; 530 echo "Result:\n"; 531 echo "$notChangedCounter pages without any frontmatter modifications\n"; 532 533 if (sizeof($pagesWithError) > 0) { 534 echo "\n"; 535 echo "The following pages had errors\n"; 536 $pageCounter = 0; 537 $totalNumberOfPages = sizeof($pagesWithError); 538 foreach ($pagesWithError as $id => $message) { 539 $pageCounter++; 540 LogUtility::msg("Page $id ($pageCounter / $totalNumberOfPages): " . $message); 541 } 542 } else { 543 echo "No error\n"; 544 } 545 546 if (sizeof($pagesWithChanges) > 0) { 547 echo "\n"; 548 echo "The following pages had changed:\n"; 549 $pageCounter = 0; 550 $totalNumberOfPages = sizeof($pagesWithChanges); 551 foreach ($pagesWithChanges as $id) { 552 $pageCounter++; 553 LogUtility::msg("Page $id ($pageCounter / $totalNumberOfPages) "); 554 } 555 } else { 556 echo "No changes\n"; 557 } 558 559 if (sizeof($pagesWithOthers) > 0) { 560 echo "\n"; 561 echo "The following pages had an other status"; 562 $pageCounter = 0; 563 $totalNumberOfPages = sizeof($pagesWithOthers); 564 foreach ($pagesWithOthers as $id => $message) { 565 $pageCounter++; 566 LogUtility::msg("Page $id ($pageCounter / $totalNumberOfPages) " . $message, LogUtility::LVL_MSG_ERROR); 567 } 568 } 569 } 570 571 private function getStartPath($args) 572 { 573 $sizeof = sizeof($args); 574 switch ($sizeof) { 575 case 0: 576 fwrite(STDERR, "The start path is mandatory and was not given"); 577 exit(1); 578 case 1: 579 $startPath = $args[0]; 580 if (!in_array($startPath, [":", "/"])) { 581 // cleanId would return blank for a root 582 $startPath = cleanID($startPath); 583 } 584 break; 585 default: 586 fwrite(STDERR, "Too much arguments given $sizeof"); 587 exit(1); 588 } 589 return $startPath; 590 } 591 592 /** 593 * 594 * Print the extension/plugin to update 595 * 596 * Note, there is also an Endpoint: 597 * self::EXTENSION_REPOSITORY_API.'?fmt=php&ext[]='.urlencode($name) 598 * `http://www.dokuwiki.org/lib/plugins/pluginrepo/api.php?fmt=php&ext[]=`.urlencode($name) 599 * 600 * @noinspection PhpUndefinedClassInspection 601 */ 602 private function pluginToUpdate() 603 { 604 605 if (class_exists(Local::class)) { 606 /** 607 * Release 2025-05-14 "Librarian" 608 * https://www.dokuwiki.org/changes#release_2025-05-14_librarian 609 * https://www.patreon.com/posts/new-extension-116501986 610 * ./bin/plugin.php extension list 611 * @link lib/plugins/extension/cli.php 612 * Code based on https://github.com/giterlizzi/dokuwiki-template-bootstrap3/pull/617/files 613 */ 614 try { 615 $extensions = (new Local())->getExtensions(); 616 Repository::getInstance()->initExtensions(array_keys($extensions)); 617 foreach ($extensions as $extension) { 618 if ($extension->isEnabled() && $extension->isUpdateAvailable()) { 619 echo "The extension {$extension->getDisplayName()} should be updated"; 620 } 621 } 622 } /** @noinspection PhpUndefinedClassInspection */ catch (ExtensionException $ignore) { 623 // Ignore the exception 624 } 625 return; 626 } 627 628 629 $pluginList = plugin_list('', true); 630 $extension = $this->loadHelper('extension_extension'); 631 foreach ($pluginList as $name) { 632 633 /* @var helper_plugin_extension_extension $extension 634 * old extension manager until Kaos 635 */ 636 $extension->setExtension($name); 637 /** @noinspection PhpUndefinedMethodInspection */ 638 if ($extension->updateAvailable()) { 639 echo "The extension $name should be updated"; 640 } 641 } 642 643 644 } 645 646 /** 647 * @return void 648 * Print the broken Links 649 * @throws ExceptionSqliteNotAvailable 650 */ 651 private function brokenLinks() 652 { 653 LogUtility::msg("Broken Links Started"); 654 $sqlite = Sqlite::createOrGetSqlite(); 655 $request = $sqlite 656 ->createRequest() 657 ->setQuery("with validPages as (select path, analytics 658 from pages 659 where json_valid(analytics) = 1) 660select path, 661 json_extract(analytics, '$.statistics.internal_broken_link_count') as broken_link, 662 json_extract(analytics, '$.statistics.media.internal_broken_count') as broken_media 663from validPages 664where json_extract(analytics, '$.statistics.internal_broken_link_count') is not null 665 or json_extract(analytics, '$.statistics.media.internal_broken_count') != 0"); 666 $rows = []; 667 try { 668 $rows = $request 669 ->execute() 670 ->getRows(); 671 } catch (ExceptionCompile $e) { 672 LogUtility::msg("Error while getting the id pages. {$e->getMessage()}"); 673 return; 674 } finally { 675 $request->close(); 676 } 677 if (count($rows) == 0) { 678 LogUtility::msg("No Broken Links"); 679 exit(); 680 } 681 LogUtility::msg("Broken Links:"); 682 foreach ($rows as $row) { 683 $path = $row["path"]; 684 $broken_link = $row["broken_link"]; 685 $broken_media = $row["broken_media"]; 686 echo "$path (Page: $broken_link, Media: $broken_media) \n"; 687 } 688 if (count($rows) != 0) { 689 exit(1); 690 } 691 } 692} 693