1<?php 2/** 3 * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved. 4 * 5 * This source code is licensed under the GPL license found in the 6 * COPYING file in the root directory of this source tree. 7 * 8 * @license GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html) 9 * @author ComboStrap <support@combostrap.com> 10 * 11 */ 12if (!defined('DOKU_INC')) die(); 13 14use ComboStrap\AnalyticsDocument; 15use ComboStrap\BacklinkCount; 16use ComboStrap\Event; 17use ComboStrap\ExceptionCombo; 18use ComboStrap\ExceptionComboRuntime; 19use ComboStrap\FsWikiUtility; 20use ComboStrap\LogUtility; 21use ComboStrap\MetadataFrontmatterStore; 22use ComboStrap\Page; 23use ComboStrap\PageH1; 24use ComboStrap\Sqlite; 25use splitbrain\phpcli\Options; 26 27/** 28 * All dependency are loaded in plugin utility 29 */ 30require_once(__DIR__ . '/ComboStrap/PluginUtility.php'); 31 32/** 33 * The memory of the server 128 is not enough 34 */ 35ini_set('memory_limit', '256M'); 36 37 38/** 39 * Class cli_plugin_combo 40 * 41 * This is a cli: 42 * https://www.dokuwiki.org/devel:cli_plugins#example 43 * 44 * Usage: 45 * 46 * ``` 47 * docker exec -ti $(CONTAINER) /bin/bash 48 * ``` 49 * ``` 50 * set animal=animal-directory-name 51 * php ./bin/plugin.php combo --help 52 * ``` 53 * or via the IDE 54 * 55 * 56 * Example: 57 * https://www.dokuwiki.org/tips:grapher 58 * 59 */ 60class cli_plugin_combo extends DokuWiki_CLI_Plugin 61{ 62 63 const METADATA_TO_DATABASE = "metadata-to-database"; 64 const ANALYTICS = "analytics"; 65 const METADATA_TO_FRONTMATTER = "metadata-to-frontmatter"; 66 const SYNC = "sync"; 67 const PLUGINS_TO_UPDATE = "plugins-to-update"; 68 const FORCE_OPTION = 'force'; 69 const PORT_OPTION = 'port'; 70 const HOST_OPTION = 'host'; 71 72 73 /** 74 * register options and arguments 75 * @param Options $options 76 * 77 * Note the animal is set in {@link DokuWikiFarmCore::detectAnimal()} 78 * via the environment variable `animal` that is passed in the $_SERVER variable 79 */ 80 protected function setup(Options $options) 81 { 82 $help = <<<EOF 83ComboStrap Administrative Commands 84 85 86Example: 87 * Replicate all pages into the database 88```bash 89php ./bin/plugin.php combo metadata-to-database : 90# or 91php ./bin/plugin.php combo metadata-to-database / 92``` 93 * Replicate only the page `:namespace:my-page` 94```bash 95php ./bin/plugin.php combo metadata-to-database :namespace:my-page 96# or 97php ./bin/plugin.php combo metadata-to-database /namespace/my-page 98``` 99 100Animal: If you want to use it for an animal farm, you need to set first the animal directory name in a environment variable 101```bash 102set animal=animal-directory-name 103``` 104 105EOF; 106 107 $options->setHelp($help); 108 $options->registerOption('version', 'print version', 'v'); 109 $options->registerCommand(self::METADATA_TO_DATABASE, "Replicate the file system metadata into the database"); 110 $options->registerCommand(self::ANALYTICS, "Start the analytics and export optionally the data"); 111 $options->registerCommand(self::PLUGINS_TO_UPDATE, "List the plugins to update"); 112 $options->registerCommand(self::METADATA_TO_FRONTMATTER, "Replicate the file system metadata into the page frontmatter"); 113 $options->registerCommand(self::SYNC, "Delete the non-existing pages in the database"); 114 $options->registerArgument( 115 'path', 116 "The start path (a page or a directory). For all pages, type the root directory '/'", 117 false 118 ); 119 $options->registerOption( 120 'output', 121 "Optional, where to store the analytical data as csv eg. a filename.", 122 'o', 123 true 124 ); 125 $options->registerOption( 126 self::HOST_OPTION, 127 "The http host name of your server. This value is used by dokuwiki in the rendering cache key", 128 null, 129 true, 130 self::METADATA_TO_DATABASE 131 ); 132 $options->registerOption( 133 self::PORT_OPTION, 134 "The http host port of your server. This value is used by dokuwiki in the rendering cache key", 135 null, 136 true, 137 self::METADATA_TO_DATABASE 138 ); 139 $options->registerOption( 140 self::FORCE_OPTION, 141 "Replicate with force", 142 'f', 143 false, 144 self::METADATA_TO_DATABASE 145 ); 146 $options->registerOption( 147 'dry', 148 "Optional, dry-run", 149 'd', false); 150 151 152 } 153 154 /** 155 * The main entry 156 * @param Options $options 157 * @throws ExceptionCombo 158 */ 159 protected function main(Options $options) 160 { 161 162 163 if(isset($_REQUEST['animal'])){ 164 echo "Animal detected: ".$_REQUEST['animal']."\n"; 165 } else { 166 echo "No Animal detected\n"; 167 echo "Conf: ".DOKU_CONF."\n"; 168 } 169 170 $args = $options->getArgs(); 171 172 173 $depth = $options->getOpt('depth', 0); 174 $cmd = $options->getCmd(); 175 switch ($cmd) { 176 case self::METADATA_TO_DATABASE: 177 $startPath = $this->getStartPath($args); 178 $force = $options->getOpt(self::FORCE_OPTION, false); 179 $hostOptionValue = $options->getOpt(self::HOST_OPTION, null); 180 if ($hostOptionValue === null) { 181 fwrite(STDERR, "The host name is mandatory"); 182 return; 183 } 184 $_SERVER['HTTP_HOST'] = $hostOptionValue; 185 $portOptionName = $options->getOpt(self::PORT_OPTION, null); 186 if ($portOptionName === null) { 187 fwrite(STDERR, "The host port is mandatory"); 188 return; 189 } 190 $_SERVER['SERVER_PORT'] = $portOptionName; 191 $this->index($startPath, $force, $depth); 192 break; 193 case self::METADATA_TO_FRONTMATTER: 194 $startPath = $this->getStartPath($args); 195 $this->frontmatter($startPath, $depth); 196 break; 197 case self::ANALYTICS: 198 $startPath = $this->getStartPath($args); 199 $output = $options->getOpt('output', ''); 200 //if ($output == '-') $output = 'php://stdout'; 201 $this->analytics($startPath, $output, $depth); 202 break; 203 case self::SYNC: 204 $this->deleteNonExistingPageFromDatabase(); 205 break; 206 case self::PLUGINS_TO_UPDATE: 207 /** 208 * Endpoint: 209 * self::EXTENSION_REPOSITORY_API.'?fmt=php&ext[]='.urlencode($name) 210 * `http://www.dokuwiki.org/lib/plugins/pluginrepo/api.php?fmt=php&ext[]=`.urlencode($name) 211 */ 212 $pluginList = plugin_list('', true); 213 /* @var helper_plugin_extension_extension $extension */ 214 $extension = $this->loadHelper('extension_extension'); 215 foreach ($pluginList as $name) { 216 $extension->setExtension($name); 217 if ($extension->updateAvailable()) { 218 echo "The extension $name should be updated"; 219 } 220 } 221 break; 222 default: 223 if ($cmd !== "") { 224 fwrite(STDERR, "Combo: Command unknown (" . $cmd . ")"); 225 } else { 226 echo $options->help(); 227 } 228 exit(1); 229 } 230 231 232 } 233 234 /** 235 * @param array $namespaces 236 * @param bool $rebuild 237 * @param int $depth recursion depth. 0 for unlimited 238 * @throws ExceptionCombo 239 */ 240 private function index($namespaces = array(), $rebuild = false, $depth = 0) 241 { 242 243 /** 244 * Run as admin to overcome the fact that 245 * anonymous user cannot see all links and backlinks 246 */ 247 global $USERINFO; 248 $USERINFO['grps'] = array('admin'); 249 global $INPUT; 250 $INPUT->server->set('REMOTE_USER', "cli"); 251 252 $pages = FsWikiUtility::getPages($namespaces, $depth); 253 254 $pageCounter = 0; 255 $totalNumberOfPages = sizeof($pages); 256 while ($pageArray = array_shift($pages)) { 257 $id = $pageArray['id']; 258 /** 259 * Indexing the page start the database replication 260 * See {@link action_plugin_combo_fulldatabasereplication} 261 */ 262 $pageCounter++; 263 try { 264 /** 265 * If the page does not need to be indexed, there is no run 266 * and false is returned 267 */ 268 $indexedOrNot = idx_addPage($id, true, true); 269 if ($indexedOrNot) { 270 LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) was indexed and replicated", LogUtility::LVL_MSG_INFO); 271 } else { 272 LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error", LogUtility::LVL_MSG_ERROR); 273 } 274 } catch (ExceptionComboRuntime $e) { 275 LogUtility::msg("The page {$id} ($pageCounter / $totalNumberOfPages) has an error: " . $e->getMessage(), LogUtility::LVL_MSG_ERROR); 276 } 277 } 278 /** 279 * Process all backlinks 280 */ 281 echo "Processing Replication Request\n"; 282 Event::dispatchEvent(PHP_INT_MAX); 283 284 } 285 286 private function analytics($namespaces = array(), $output = null, $depth = 0) 287 { 288 289 $fileHandle = null; 290 if (!empty($output)) { 291 $fileHandle = @fopen($output, 'w'); 292 if (!$fileHandle) $this->fatal("Failed to open $output"); 293 } 294 295 /** 296 * Run as admin to overcome the fact that 297 * anonymous user cannot see all links and backlinks 298 */ 299 global $USERINFO; 300 $USERINFO['grps'] = array('admin'); 301 global $INPUT; 302 $INPUT->server->set('REMOTE_USER', "cli"); 303 304 $pages = FsWikiUtility::getPages($namespaces, $depth); 305 306 307 if (!empty($fileHandle)) { 308 $header = array( 309 'id', 310 'backlinks', 311 'broken_links', 312 'changes', 313 'chars', 314 'external_links', 315 'external_medias', 316 'h1', 317 'h2', 318 'h3', 319 'h4', 320 'h5', 321 'internal_links', 322 'internal_medias', 323 'words', 324 'score' 325 ); 326 fwrite($fileHandle, implode(",", $header) . PHP_EOL); 327 } 328 $pageCounter = 0; 329 $totalNumberOfPages = sizeof($pages); 330 while ($pageArray = array_shift($pages)) { 331 $id = $pageArray['id']; 332 $page = Page::createPageFromId($id); 333 334 335 $pageCounter++; 336 echo "Analytics Processing for the page {$id} ($pageCounter / $totalNumberOfPages)\n"; 337 338 /** 339 * Analytics 340 */ 341 $analytics = $page->getAnalyticsDocument(); 342 $data = $analytics->getOrProcessContent()->toArray(); 343 344 if (!empty($fileHandle)) { 345 $statistics = $data[AnalyticsDocument::STATISTICS]; 346 $row = array( 347 'id' => $id, 348 'backlinks' => $statistics[BacklinkCount::getPersistentName()], 349 'broken_links' => $statistics[AnalyticsDocument::INTERNAL_LINK_BROKEN_COUNT], 350 'changes' => $statistics[AnalyticsDocument::EDITS_COUNT], 351 'chars' => $statistics[AnalyticsDocument::CHAR_COUNT], 352 'external_links' => $statistics[AnalyticsDocument::EXTERNAL_LINK_COUNT], 353 'external_medias' => $statistics[AnalyticsDocument::EXTERNAL_MEDIA_COUNT], 354 PageH1::PROPERTY_NAME => $statistics[AnalyticsDocument::HEADING_COUNT][PageH1::PROPERTY_NAME], 355 'h2' => $statistics[AnalyticsDocument::HEADING_COUNT]['h2'], 356 'h3' => $statistics[AnalyticsDocument::HEADING_COUNT]['h3'], 357 'h4' => $statistics[AnalyticsDocument::HEADING_COUNT]['h4'], 358 'h5' => $statistics[AnalyticsDocument::HEADING_COUNT]['h5'], 359 'internal_links' => $statistics[AnalyticsDocument::INTERNAL_LINK_COUNT], 360 'internal_medias' => $statistics[AnalyticsDocument::INTERNAL_MEDIA_COUNT], 361 'words' => $statistics[AnalyticsDocument::WORD_COUNT], 362 'low' => $data[AnalyticsDocument::QUALITY]['low'] 363 ); 364 fwrite($fileHandle, implode(",", $row) . PHP_EOL); 365 } 366 367 } 368 if (!empty($fileHandle)) { 369 fclose($fileHandle); 370 } 371 372 } 373 374 375 private function deleteNonExistingPageFromDatabase() 376 { 377 LogUtility::msg("Starting: Deleting non-existing page from database"); 378 $sqlite = Sqlite::createOrGetSqlite(); 379 $request = $sqlite 380 ->createRequest() 381 ->setQuery("select id as \"id\" from pages"); 382 $rows = []; 383 try { 384 $rows = $request 385 ->execute() 386 ->getRows(); 387 } catch (ExceptionCombo $e) { 388 LogUtility::msg("Error while getting the id pages. {$e->getMessage()}"); 389 return; 390 } finally { 391 $request->close(); 392 } 393 $counter = 0; 394 foreach ($rows as $row) { 395 $counter++; 396 $id = $row['id']; 397 if (!page_exists($id)) { 398 echo 'Page does not exist on the file system. Deleted from the database (' . $id . ")\n"; 399 Page::createPageFromId($id)->getDatabasePage()->delete(); 400 } 401 } 402 LogUtility::msg("Sync finished ($counter pages checked)"); 403 404 405 } 406 407 private function frontmatter($namespaces, $depth) 408 { 409 $pages = FsWikiUtility::getPages($namespaces, $depth); 410 $pageCounter = 0; 411 $totalNumberOfPages = sizeof($pages); 412 $pagesWithChanges = []; 413 $pagesWithError = []; 414 $pagesWithOthers = []; 415 $notChangedCounter = 0; 416 while ($pageArray = array_shift($pages)) { 417 $id = $pageArray['id']; 418 $page = Page::createPageFromId($id); 419 $pageCounter++; 420 LogUtility::msg("Processing page {$id} ($pageCounter / $totalNumberOfPages) ", LogUtility::LVL_MSG_INFO); 421 try { 422 $message = MetadataFrontmatterStore::createFromPage($page) 423 ->sync(); 424 switch ($message->getStatus()) { 425 case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_NOT_CHANGED: 426 $notChangedCounter++; 427 break; 428 case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_DONE: 429 $pagesWithChanges[] = $id; 430 break; 431 case syntax_plugin_combo_frontmatter::UPDATE_EXIT_CODE_ERROR: 432 $pagesWithError[$id] = $message->getPlainTextContent(); 433 break; 434 default: 435 $pagesWithOthers[$id] = $message->getPlainTextContent(); 436 break; 437 438 } 439 } catch (ExceptionCombo $e) { 440 $pagesWithError[$id] = $e->getMessage(); 441 } 442 443 } 444 445 echo "\n"; 446 echo "Result:\n"; 447 echo "$notChangedCounter pages without any frontmatter modifications\n"; 448 449 if (sizeof($pagesWithError) > 0) { 450 echo "\n"; 451 echo "The following pages had errors\n"; 452 $pageCounter = 0; 453 $totalNumberOfPages = sizeof($pagesWithError); 454 foreach ($pagesWithError as $id => $message) { 455 $pageCounter++; 456 LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages): " . $message, LogUtility::LVL_MSG_ERROR); 457 } 458 } else { 459 echo "No error\n"; 460 } 461 462 if (sizeof($pagesWithChanges) > 0) { 463 echo "\n"; 464 echo "The following pages had changed:\n"; 465 $pageCounter = 0; 466 $totalNumberOfPages = sizeof($pagesWithChanges); 467 foreach ($pagesWithChanges as $id) { 468 $pageCounter++; 469 LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages) ", LogUtility::LVL_MSG_ERROR); 470 } 471 } else { 472 echo "No changes\n"; 473 } 474 475 if (sizeof($pagesWithOthers) > 0) { 476 echo "\n"; 477 echo "The following pages had an other status"; 478 $pageCounter = 0; 479 $totalNumberOfPages = sizeof($pagesWithOthers); 480 foreach ($pagesWithOthers as $id => $message) { 481 $pageCounter++; 482 LogUtility::msg("Page {$id} ($pageCounter / $totalNumberOfPages) " . $message, LogUtility::LVL_MSG_ERROR); 483 } 484 } 485 } 486 487 private function getStartPath($args) 488 { 489 $sizeof = sizeof($args); 490 switch ($sizeof) { 491 case 0: 492 fwrite(STDERR, "The start path is mandatory and was not given"); 493 exit(1); 494 case 1: 495 $startPath = $args[0]; 496 if (!in_array($startPath, [":", "/"])) { 497 // cleanId would return blank for a root 498 $startPath = cleanID($startPath); 499 } 500 break; 501 default: 502 fwrite(STDERR, "Too much arguments given $sizeof"); 503 exit(1); 504 } 505 return $startPath; 506 } 507} 508