1<?php 2/** 3 * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved. 4 * 5 * This source code is licensed under the GPL license found in the 6 * COPYING file in the root directory of this source tree. 7 * 8 * @license GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html) 9 * @author ComboStrap <support@combostrap.com> 10 * 11 */ 12if (!defined('DOKU_INC')) die(); 13 14use ComboStrap\Analytics; 15use ComboStrap\Page; 16use ComboStrap\Sqlite; 17use splitbrain\phpcli\Options; 18 19/** 20 * All dependency are loaded in plugin utility 21 */ 22require_once(__DIR__ . '/class/PluginUtility.php'); 23 24/** 25 * The memory of the server 128 is not enough 26 */ 27ini_set('memory_limit', '256M'); 28 29/** 30 * Class cli_plugin_combo 31 * 32 * This is a cli: 33 * https://www.dokuwiki.org/devel:cli_plugins#example 34 * 35 * Usage: 36 * 37 * ``` 38 * docker exec -ti $(CONTAINER) /bin/bash 39 * ./bin/plugin.php combo -c 40 * ``` 41 * or via the IDE 42 * 43 * 44 * Example: 45 * https://www.dokuwiki.org/tips:grapher 46 * 47 */ 48class cli_plugin_combo extends DokuWiki_CLI_Plugin 49{ 50 const ANALYTICS = "analytics"; 51 const SYNC = "sync"; 52 53 /** 54 * register options and arguments 55 * @param Options $options 56 */ 57 protected function setup(Options $options) 58 { 59 $options->setHelp( 60 "Manage the analytics database\n\n" . 61 "analytics\n" . 62 "sync" 63 ); 64 $options->registerOption('version', 'print version', 'v'); 65 $options->registerCommand(self::ANALYTICS, "Update the analytics data"); 66 $options->registerOption( 67 'namespaces', 68 "If no namespace is given, the root namespace is assumed.", 69 'n', 70 true 71 ); 72 $options->registerOption( 73 'output', 74 "Optional, where to store the analytical data as csv eg. a filename.", 75 'o', 'file'); 76 $options->registerOption( 77 'cache', 78 "Optional, returns from the cache if set", 79 'c', false); 80 $options->registerOption( 81 'dry', 82 "Optional, dry-run", 83 'd', false); 84 $options->registerCommand(self::SYNC, "Sync the database"); 85 86 } 87 88 /** 89 * The main entry 90 * @param Options $options 91 */ 92 protected function main(Options $options) 93 { 94 95 $namespaces = array_map('cleanID', $options->getArgs()); 96 if (!count($namespaces)) $namespaces = array(''); //import from top 97 98 $cache = $options->getOpt('cache', false); 99 $depth = $options->getOpt('depth', 0); 100 $cmd = $options->getCmd(); 101 if ($cmd == "") { 102 $cmd = self::ANALYTICS; 103 } 104 switch ($cmd) { 105 case self::ANALYTICS: 106 $output = $options->getOpt('output', ''); 107 //if ($output == '-') $output = 'php://stdout'; 108 $this->updateAnalyticsData($namespaces, $output, $cache, $depth); 109 break; 110 case self::SYNC: 111 $this->syncPages(); 112 break; 113 default: 114 throw new \RuntimeException("Combo: Command unknown (" . $cmd . ")"); 115 } 116 117 118 } 119 120 /** 121 * @param array $namespaces 122 * @param $output 123 * @param bool $cache 124 * @param int $depth recursion depth. 0 for unlimited 125 */ 126 private function updateAnalyticsData($namespaces = array(), $output = null, $cache = false, $depth = 0) 127 { 128 129 $fileHandle = null; 130 if (!empty($output)) { 131 $fileHandle = @fopen($output, 'w'); 132 if (!$fileHandle) $this->fatal("Failed to open $output"); 133 } 134 135 $pages = $this->findPages($namespaces, $depth); 136 137 138 if (!empty($fileHandle)) { 139 $header = array( 140 'id', 141 'backlinks', 142 'broken_links', 143 'changes', 144 'chars', 145 'external_links', 146 'external_medias', 147 'h1', 148 'h2', 149 'h3', 150 'h4', 151 'h5', 152 'internal_links', 153 'internal_medias', 154 'words', 155 'score' 156 ); 157 fwrite($fileHandle, implode(",", $header) . PHP_EOL); 158 } 159 $pageCounter = 0; 160 $totalNumberOfPages = sizeof($pages); 161 while ($page = array_shift($pages)) { 162 $id = $page['id']; 163 164 $pageCounter++; 165 echo "Processing the page {$id} ($pageCounter / $totalNumberOfPages)\n"; 166 167 $data = Analytics::processAndGetDataAsArray($id, $cache); 168 if (!empty($fileHandle)) { 169 $statistics = $data[Analytics::STATISTICS]; 170 $row = array( 171 'id' => $id, 172 'backlinks' => $statistics[Analytics::INTERNAL_BACKLINKS_COUNT], 173 'broken_links' => $statistics[Analytics::INTERNAL_LINKS_BROKEN_COUNT], 174 'changes' => $statistics[Analytics::EDITS_COUNT], 175 'chars' => $statistics[Analytics::CHARS_COUNT], 176 'external_links' => $statistics[Analytics::EXTERNAL_LINKS_COUNT], 177 'external_medias' => $statistics[Analytics::EXTERNAL_MEDIAS_COUNT], 178 Analytics::H1 => $statistics[Analytics::HEADERS_COUNT][Analytics::H1], 179 'h2' => $statistics[Analytics::HEADERS_COUNT]['h2'], 180 'h3' => $statistics[Analytics::HEADERS_COUNT]['h3'], 181 'h4' => $statistics[Analytics::HEADERS_COUNT]['h4'], 182 'h5' => $statistics[Analytics::HEADERS_COUNT]['h5'], 183 'internal_links' => $statistics[Analytics::INTERNAL_LINKS_COUNT], 184 'internal_medias' => $statistics[Analytics::INTERNAL_MEDIAS_COUNT], 185 'words' => $statistics[Analytics::WORDS_COUNT], 186 'low' => $data[Analytics::QUALITY]['low'] 187 ); 188 fwrite($fileHandle, implode(",", $row) . PHP_EOL); 189 } 190 } 191 if (!empty($fileHandle)) { 192 fclose($fileHandle); 193 } 194 195 } 196 197 /** 198 * Find the pages in the tree 199 * @param $namespaces 200 * @param $depth 201 * @return array 202 */ 203 private function findPages($namespaces = array(), $depth = 0) 204 { 205 // Run as admin to overcome the fact that 206 // anonymous user cannot set all links and backlinnks 207 208 209 global $conf; 210 $datadir = $conf['datadir']; 211 212 /** 213 * Run as admin to overcome the fact that 214 * anonymous user cannot see all links and backlinnks 215 */ 216 global $USERINFO; 217 $USERINFO['grps'] = array('admin'); 218 global $INPUT; 219 $INPUT->server->set('REMOTE_USER', "cli"); 220 221 $pages = array(); 222 foreach ($namespaces as $ns) { 223 224 search( 225 $pages, 226 $datadir, 227 'search_universal', 228 array( 229 'depth' => $depth, 230 'listfiles' => true, 231 'listdirs' => false, 232 'pagesonly' => true, 233 'skipacl' => true, 234 'firsthead' => false, 235 'meta' => false, 236 ), 237 str_replace(':', '/', $ns) 238 ); 239 240 // add the ns start page 241 if ($ns && page_exists($ns)) { 242 $pages[] = array( 243 'id' => $ns, 244 'ns' => getNS($ns), 245 'title' => p_get_first_heading($ns, false), 246 'size' => filesize(wikiFN($ns)), 247 'mtime' => filemtime(wikiFN($ns)), 248 'perm' => 16, 249 'type' => 'f', 250 'level' => 0, 251 'open' => 1, 252 ); 253 } 254 255 } 256 return $pages; 257 } 258 259 private function syncPages() 260 { 261 $sqlite = Sqlite::getSqlite(); 262 $res = $sqlite->query("select ID from pages"); 263 if (!$res) { 264 throw new \RuntimeException("An exception has occurred with the alias selection query"); 265 } 266 $res2arr = $sqlite->res2arr($res); 267 $sqlite->res_close($res); 268 foreach ($res2arr as $row) { 269 $id = $row['ID']; 270 if (!page_exists($id)) { 271 echo 'Page does not exist on the file system. Deleted from the database (' . $id . ")\n"; 272 Page::createPageFromId($id)->deleteInDb(); 273 } 274 } 275 276 277 } 278} 279