1<?php 2/** 3 * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved. 4 * 5 * This source code is licensed under the GPL license found in the 6 * COPYING file in the root directory of this source tree. 7 * 8 * @license GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html) 9 * @author ComboStrap <support@combostrap.com> 10 * 11 */ 12if (!defined('DOKU_INC')) die(); 13 14use ComboStrap\AnalyticsDocument; 15use ComboStrap\BacklinkCount; 16use ComboStrap\DatabasePageRow; 17use ComboStrap\Event; 18use ComboStrap\ExceptionCombo; 19use ComboStrap\ExceptionComboRuntime; 20use ComboStrap\FsWikiUtility; 21use ComboStrap\LogUtility; 22use ComboStrap\MetadataFrontmatterStore; 23use ComboStrap\Page; 24use ComboStrap\PageH1; 25use ComboStrap\Sqlite; 26use splitbrain\phpcli\Options; 27 28/** 29 * All dependency are loaded in plugin utility 30 */ 31require_once(__DIR__ . '/ComboStrap/PluginUtility.php'); 32 33/** 34 * The memory of the server 128 is not enough 35 */ 36ini_set('memory_limit', '256M'); 37 38 39/** 40 * Class cli_plugin_combo 41 * 42 * This is a cli: 43 * https://www.dokuwiki.org/devel:cli_plugins#example 44 * 45 * Usage: 46 * 47 * ``` 48 * docker exec -ti $(CONTAINER) /bin/bash 49 * ``` 50 * ``` 51 * set animal=animal-directory-name 52 * php ./bin/plugin.php combo --help 53 * ``` 54 * or via the IDE 55 * 56 * 57 * Example: 58 * https://www.dokuwiki.org/tips:grapher 59 * 60 */ 61class cli_plugin_combo extends DokuWiki_CLI_Plugin 62{ 63 64 const METADATA_TO_DATABASE = "metadata-to-database"; 65 const ANALYTICS = "analytics"; 66 const METADATA_TO_FRONTMATTER = "metadata-to-frontmatter"; 67 const SYNC = "sync"; 68 const PLUGINS_TO_UPDATE = "plugins-to-update"; 69 const FORCE_OPTION = 'force'; 70 const PORT_OPTION = 'port'; 71 const HOST_OPTION = 'host'; 72 73 74 /** 75 * register options and arguments 76 * @param Options $options 77 */ 78 protected function setup(Options $options) 79 { 80 $help = <<<EOF 81ComboStrap Administrative Commands 82 83 84Example: 85 * Replicate all pages into the database 86```bash 87php ./bin/plugin.php combo metadata-to-database : 88# or 89php ./bin/plugin.php combo metadata-to-database / 90``` 91 * Replicate only the page `:namespace:my-page` 92```bash 93php ./bin/plugin.php combo metadata-to-database :namespace:my-page 94# or 95php ./bin/plugin.php combo metadata-to-database /namespace/my-page 96``` 97 98Animal: If you want to use it for an animal farm, you need to set first the animal directory name in a environment variable 99```bash 100set animal=animal-directory-name 101``` 102 103EOF; 104 105 $options->setHelp($help); 106 $options->registerOption('version', 'print version', 'v'); 107 $options->registerCommand(self::METADATA_TO_DATABASE, "Replicate the file system metadata into the database"); 108 $options->registerCommand(self::ANALYTICS, "Start the analytics and export optionally the data"); 109 $options->registerCommand(self::PLUGINS_TO_UPDATE, "List the plugins to update"); 110 $options->registerCommand(self::METADATA_TO_FRONTMATTER, "Replicate the file system metadata into the page frontmatter"); 111 $options->registerCommand(self::SYNC, "Delete the non-existing pages in the database"); 112 $options->registerArgument( 113 'path', 114 "The start path (a page or a directory). For all pages, type the root directory '/'", 115 false 116 ); 117 $options->registerOption( 118 'output', 119 "Optional, where to store the analytical data as csv eg. a filename.", 120 'o', 121 true 122 ); 123 $options->registerOption( 124 self::HOST_OPTION, 125 "The http host name of your server. This value is used by dokuwiki in the rendering cache key", 126 null, 127 true, 128 self::METADATA_TO_DATABASE 129 ); 130 $options->registerOption( 131 self::PORT_OPTION, 132 "The http host port of your server. This value is used by dokuwiki in the rendering cache key", 133 null, 134 true, 135 self::METADATA_TO_DATABASE 136 ); 137 $options->registerOption( 138 self::FORCE_OPTION, 139 "Replicate with force", 140 'f', 141 false, 142 self::METADATA_TO_DATABASE 143 ); 144 $options->registerOption( 145 'dry', 146 "Optional, dry-run", 147 'd', false); 148 149 150 } 151 152 /** 153 * The main entry 154 * @param Options $options 155 */ 156 protected function main(Options $options) 157 { 158 159 160 $args = $options->getArgs(); 161 162 163 $depth = $options->getOpt('depth', 0); 164 $cmd = $options->getCmd(); 165 switch ($cmd) { 166 case self::METADATA_TO_DATABASE: 167 $startPath = $this->getStartPath($args); 168 $force = $options->getOpt(self::FORCE_OPTION, false); 169 $hostOptionValue = $options->getOpt(self::HOST_OPTION, null); 170 if ($hostOptionValue === null) { 171 fwrite(STDERR, "The host name is mandatory"); 172 return; 173 } 174 $_SERVER['HTTP_HOST'] = $hostOptionValue; 175 $portOptionName = $options->getOpt(self::PORT_OPTION, null); 176 if ($portOptionName === null) { 177 fwrite(STDERR, "The host port is mandatory"); 178 return; 179 } 180 $_SERVER['SERVER_PORT'] = $portOptionName; 181 $this->index($startPath, $force, $depth); 182 break; 183 case self::METADATA_TO_FRONTMATTER: 184 $startPath = $this->getStartPath($args); 185 $this->frontmatter($startPath, $depth); 186 break; 187 case self::ANALYTICS: 188 $startPath = $this->getStartPath($args); 189 $output = $options->getOpt('output', ''); 190 //if ($output == '-') $output = 'php://stdout'; 191 $this->analytics($startPath, $output, $depth); 192 break; 193 case self::SYNC: 194 $this->deleteNonExistingPageFromDatabase(); 195 break; 196 case self::PLUGINS_TO_UPDATE: 197 /** 198 * Endpoint: 199 * self::EXTENSION_REPOSITORY_API.'?fmt=php&ext[]='.urlencode($name) 200 * `http://www.dokuwiki.org/lib/plugins/pluginrepo/api.php?fmt=php&ext[]=`.urlencode($name) 201 */ 202 $pluginList = plugin_list('', true); 203 /* @var helper_plugin_extension_extension $extension */ 204 $extension = $this->loadHelper('extension_extension'); 205 foreach ($pluginList as $name) { 206 $extension->setExtension($name); 207 if ($extension->updateAvailable()) { 208 echo "The extension $name should be updated"; 209 } 210 } 211 break; 212 default: 213 if ($cmd !== "") { 214 fwrite(STDERR, "Combo: Command unknown (" . $cmd . ")"); 215 } else { 216 echo $options->help(); 217 } 218 exit(1); 219 } 220 221 222 } 223 224 /** 225 * @param array $namespaces 226 * @param bool $rebuild 227 * @param int $depth recursion depth. 0 for unlimited 228 * @throws ExceptionCombo 229 */ 230 private function index($namespaces = array(), $rebuild = false, $depth = 0) 231 { 232 233 /** 234 * Run as admin to overcome the fact that 235 * anonymous user cannot see all links and backlinks 236 */ 237 global $USERINFO; 238 $USERINFO['grps'] = array('admin'); 239 global $INPUT; 240 $INPUT->server->set('REMOTE_USER', "cli"); 241 242 $pages = FsWikiUtility::getPages($namespaces, $depth); 243 244 $pageCounter = 0; 245 $totalNumberOfPages = sizeof($pages); 246 while ($pageArray = array_shift($pages)) { 247 $id = $pageArray['id']; 248 /** 249 * Indexing the page start the database replication 250 * See {@link action_plugin_combo_fulldatabasereplication} 251 */ 252 $pageCounter++; 253 try { 254 /** 255 * If the page does not need to be indexed, there is no run 256 * and false is returned 257 */ 258 $indexedOrNot = idx_addPage($id, true, true); 259 if ($indexedOrNot) { 260 LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) was indexed and replicated", LogUtility::LVL_MSG_INFO); 261 } else { 262 LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error", LogUtility::LVL_MSG_ERROR); 263 } 264 } catch (ExceptionComboRuntime $e) { 265 LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error: " . $e->getMessage(), LogUtility::LVL_MSG_ERROR); 266 } 267 } 268 /** 269 * Process all backlinks 270 */ 271 echo "Processing Replication Request\n"; 272 Event::dispatchEvent(PHP_INT_MAX); 273 274 } 275 276 private function analytics($namespaces = array(), $output = null, $depth = 0) 277 { 278 279 $fileHandle = null; 280 if (!empty($output)) { 281 $fileHandle = @fopen($output, 'w'); 282 if (!$fileHandle) $this->fatal("Failed to open $output"); 283 } 284 285 /** 286 * Run as admin to overcome the fact that 287 * anonymous user cannot see all links and backlinks 288 */ 289 global $USERINFO; 290 $USERINFO['grps'] = array('admin'); 291 global $INPUT; 292 $INPUT->server->set('REMOTE_USER', "cli"); 293 294 $pages = FsWikiUtility::getPages($namespaces, $depth); 295 296 297 if (!empty($fileHandle)) { 298 $header = array( 299 'id', 300 'backlinks', 301 'broken_links', 302 'changes', 303 'chars', 304 'external_links', 305 'external_medias', 306 'h1', 307 'h2', 308 'h3', 309 'h4', 310 'h5', 311 'internal_links', 312 'internal_medias', 313 'words', 314 'score' 315 ); 316 fwrite($fileHandle, implode(",", $header) . PHP_EOL); 317 } 318 $pageCounter = 0; 319 $totalNumberOfPages = sizeof($pages); 320 while ($pageArray = array_shift($pages)) { 321 $id = $pageArray['id']; 322 $page = Page::createPageFromId($id); 323 324 325 $pageCounter++; 326 echo "Analytics Processing for the page {$id} ($pageCounter / $totalNumberOfPages)\n"; 327 328 /** 329 * Analytics 330 */ 331 $analytics = $page->getAnalyticsDocument(); 332 $data = $analytics->getOrProcessContent()->toArray(); 333 334 if (!empty($fileHandle)) { 335 $statistics = $data[AnalyticsDocument::STATISTICS]; 336 $row = array( 337 'id' => $id, 338 'backlinks' => $statistics[BacklinkCount::getPersistentName()], 339 'broken_links' => $statistics[AnalyticsDocument::INTERNAL_LINK_BROKEN_COUNT], 340 'changes' => $statistics[AnalyticsDocument::EDITS_COUNT], 341 'chars' => $statistics[AnalyticsDocument::CHAR_COUNT], 342 'external_links' => $statistics[AnalyticsDocument::EXTERNAL_LINK_COUNT], 343 'external_medias' => $statistics[AnalyticsDocument::EXTERNAL_MEDIA_COUNT], 344 PageH1::PROPERTY_NAME => $statistics[AnalyticsDocument::HEADING_COUNT][PageH1::PROPERTY_NAME], 345 'h2' => $statistics[AnalyticsDocument::HEADING_COUNT]['h2'], 346 'h3' => $statistics[AnalyticsDocument::HEADING_COUNT]['h3'], 347 'h4' => $statistics[AnalyticsDocument::HEADING_COUNT]['h4'], 348 'h5' => $statistics[AnalyticsDocument::HEADING_COUNT]['h5'], 349 'internal_links' => $statistics[AnalyticsDocument::INTERNAL_LINK_COUNT], 350 'internal_medias' => $statistics[AnalyticsDocument::INTERNAL_MEDIA_COUNT], 351 'words' => $statistics[AnalyticsDocument::WORD_COUNT], 352 'low' => $data[AnalyticsDocument::QUALITY]['low'] 353 ); 354 fwrite($fileHandle, implode(",", $row) . PHP_EOL); 355 } 356 357 } 358 if (!empty($fileHandle)) { 359 fclose($fileHandle); 360 } 361 362 } 363 364 365 private function deleteNonExistingPageFromDatabase() 366 { 367 LogUtility::msg("Starting: Deleting non-existing page from database"); 368 $sqlite = Sqlite::createOrGetSqlite(); 369 $request = $sqlite 370 ->createRequest() 371 ->setQuery("select id as \"id\" from pages"); 372 $rows = []; 373 try { 374 $rows = $request 375 ->execute() 376 ->getRows(); 377 } catch (ExceptionCombo $e) { 378 LogUtility::msg("Error while getting the id pages. {$e->getMessage()}"); 379 return; 380 } finally { 381 $request->close(); 382 } 383 $counter = 0; 384 foreach ($rows as $row) { 385 $counter++; 386 $id = $row['id']; 387 if (!page_exists($id)) { 388 echo 'Page does not exist on the file system. Deleted from the database (' . $id . ")\n"; 389 Page::createPageFromId($id)->getDatabasePage()->delete(); 390 } 391 } 392 LogUtility::msg("Sync finished ($counter pages checked)"); 393 394 395 } 396 397 private function frontmatter($namespaces, $depth) 398 { 399 $pages = FsWikiUtility::getPages($namespaces, $depth); 400 $pageCounter = 0; 401 $totalNumberOfPages = sizeof($pages); 402 $pagesWithChanges = []; 403 $pagesWithError = []; 404 $pagesWithOthers = []; 405 $notChangedCounter = 0; 406 while ($pageArray = array_shift($pages)) { 407 $id = $pageArray['id']; 408 $page = Page::createPageFromId($id); 409 $pageCounter++; 410 LogUtility::msg("Processing page {$id} ($pageCounter / $totalNumberOfPages) ", LogUtility::LVL_MSG_INFO); 411 try { 412 $message = MetadataFrontmatterStore::createFromPage($page) 413 ->sync(); 414 switch ($message->getStatus()) { 415 case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_NOT_CHANGED: 416 $notChangedCounter++; 417 break; 418 case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_DONE: 419 $pagesWithChanges[] = $id; 420 break; 421 case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_ERROR: 422 $pagesWithError[$id] = $message->getPlainTextContent(); 423 break; 424 default: 425 $pagesWithOthers[$id] = $message->getPlainTextContent(); 426 break; 427 428 } 429 } catch (ExceptionCombo $e) { 430 $pagesWithError[$id] = $e->getMessage(); 431 } 432 433 } 434 435 echo "\n"; 436 echo "Result:\n"; 437 echo "$notChangedCounter pages without any frontmatter modifications\n"; 438 439 if (sizeof($pagesWithError) > 0) { 440 echo "\n"; 441 echo "The following pages had errors\n"; 442 $pageCounter = 0; 443 $totalNumberOfPages = sizeof($pagesWithError); 444 foreach ($pagesWithError as $id => $message) { 445 $pageCounter++; 446 LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages): " . $message, LogUtility::LVL_MSG_ERROR); 447 } 448 } else { 449 echo "No error\n"; 450 } 451 452 if (sizeof($pagesWithChanges) > 0) { 453 echo "\n"; 454 echo "The following pages had changed:\n"; 455 $pageCounter = 0; 456 $totalNumberOfPages = sizeof($pagesWithChanges); 457 foreach ($pagesWithChanges as $id) { 458 $pageCounter++; 459 LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages) ", LogUtility::LVL_MSG_ERROR); 460 } 461 } else { 462 echo "No changes\n"; 463 } 464 465 if (sizeof($pagesWithOthers) > 0) { 466 echo "\n"; 467 echo "The following pages had an other status"; 468 $pageCounter = 0; 469 $totalNumberOfPages = sizeof($pagesWithOthers); 470 foreach ($pagesWithOthers as $id => $message) { 471 $pageCounter++; 472 LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages) " . $message, LogUtility::LVL_MSG_ERROR); 473 } 474 } 475 } 476 477 private function getStartPath($args) 478 { 479 $sizeof = sizeof($args); 480 switch ($sizeof) { 481 case 0: 482 fwrite(STDERR, "The start path is mandatory and was not given"); 483 exit(1); 484 case 1: 485 $startPath = $args[0]; 486 if (!in_array($startPath, [":", "/"])) { 487 // cleanId would return blank for a root 488 $startPath = cleanID($startPath); 489 } 490 break; 491 default: 492 fwrite(STDERR, "Too much arguments given $sizeof"); 493 exit(1); 494 } 495 return $startPath; 496 } 497} 498