1007225e5Sgerardnico<?php 2007225e5Sgerardnico/** 3007225e5Sgerardnico * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved. 4007225e5Sgerardnico * 5007225e5Sgerardnico * This source code is licensed under the GPL license found in the 6007225e5Sgerardnico * COPYING file in the root directory of this source tree. 7007225e5Sgerardnico * 8007225e5Sgerardnico * @license GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html) 9007225e5Sgerardnico * @author ComboStrap <support@combostrap.com> 10007225e5Sgerardnico * 11007225e5Sgerardnico */ 12007225e5Sgerardnicoif (!defined('DOKU_INC')) die(); 13007225e5Sgerardnico 14007225e5Sgerardnicouse ComboStrap\Analytics; 15*71f916b9Sgerardnicouse ComboStrap\Page; 16*71f916b9Sgerardnicouse ComboStrap\Sqlite; 17007225e5Sgerardnicouse splitbrain\phpcli\Options; 18007225e5Sgerardnico 19007225e5Sgerardnicorequire_once(__DIR__ . '/class/Analytics.php'); 20007225e5Sgerardnico 21007225e5Sgerardnico/** 22007225e5Sgerardnico * The memory of the server 128 is not enough 23007225e5Sgerardnico */ 24007225e5Sgerardnicoini_set('memory_limit', '256M'); 25007225e5Sgerardnico 26007225e5Sgerardnico/** 27007225e5Sgerardnico * Class cli_plugin_combo 28007225e5Sgerardnico * 29007225e5Sgerardnico * This is a cli: 30007225e5Sgerardnico * https://www.dokuwiki.org/devel:cli_plugins#example 31007225e5Sgerardnico * 32007225e5Sgerardnico * Usage: 33007225e5Sgerardnico * 34007225e5Sgerardnico * ``` 35007225e5Sgerardnico * docker exec -ti $(CONTAINER) /bin/bash 36007225e5Sgerardnico * ./bin/plugin.php combo -c 37007225e5Sgerardnico * ``` 38007225e5Sgerardnico * or via the IDE 39007225e5Sgerardnico * 40007225e5Sgerardnico * 41007225e5Sgerardnico * Example: 42007225e5Sgerardnico * https://www.dokuwiki.org/tips:grapher 43007225e5Sgerardnico * 44007225e5Sgerardnico */ 45007225e5Sgerardnicoclass cli_plugin_combo extends DokuWiki_CLI_Plugin 46007225e5Sgerardnico{ 47*71f916b9Sgerardnico const ANALYTICS = "analytics"; 48*71f916b9Sgerardnico const SYNC = "sync"; 49007225e5Sgerardnico 50007225e5Sgerardnico /** 51007225e5Sgerardnico * register options and arguments 52007225e5Sgerardnico * @param Options $options 53007225e5Sgerardnico */ 54007225e5Sgerardnico protected function setup(Options $options) 55007225e5Sgerardnico { 56*71f916b9Sgerardnico $options->setHelp( 57*71f916b9Sgerardnico "Manage the analytics database\n\n" . 58*71f916b9Sgerardnico "analytics\n" . 59*71f916b9Sgerardnico "sync" 60*71f916b9Sgerardnico ); 61007225e5Sgerardnico $options->registerOption('version', 'print version', 'v'); 62*71f916b9Sgerardnico $options->registerCommand(self::ANALYTICS, "Update the analytics data"); 63*71f916b9Sgerardnico $options->registerOption( 64007225e5Sgerardnico 'namespaces', 65007225e5Sgerardnico "If no namespace is given, the root namespace is assumed.", 66*71f916b9Sgerardnico 'n', 67*71f916b9Sgerardnico true 68*71f916b9Sgerardnico ); 69007225e5Sgerardnico $options->registerOption( 70007225e5Sgerardnico 'output', 71007225e5Sgerardnico "Optional, where to store the analytical data as csv eg. a filename.", 72007225e5Sgerardnico 'o', 'file'); 73007225e5Sgerardnico $options->registerOption( 74007225e5Sgerardnico 'cache', 75007225e5Sgerardnico "Optional, returns from the cache if set", 76007225e5Sgerardnico 'c', false); 77*71f916b9Sgerardnico $options->registerOption( 78*71f916b9Sgerardnico 'dry', 79*71f916b9Sgerardnico "Optional, dry-run", 80*71f916b9Sgerardnico 'd', false); 81*71f916b9Sgerardnico $options->registerCommand(self::SYNC, "Sync the database"); 82007225e5Sgerardnico 83007225e5Sgerardnico } 84007225e5Sgerardnico 85007225e5Sgerardnico /** 86007225e5Sgerardnico * The main entry 87007225e5Sgerardnico * @param Options $options 88007225e5Sgerardnico */ 89007225e5Sgerardnico protected function main(Options $options) 90007225e5Sgerardnico { 91007225e5Sgerardnico 92007225e5Sgerardnico $namespaces = array_map('cleanID', $options->getArgs()); 93007225e5Sgerardnico if (!count($namespaces)) $namespaces = array(''); //import from top 94007225e5Sgerardnico 95*71f916b9Sgerardnico $cache = $options->getOpt('cache', false); 96*71f916b9Sgerardnico $depth = $options->getOpt('depth', 0); 97*71f916b9Sgerardnico switch ($options->getCmd()) { 98*71f916b9Sgerardnico case self::ANALYTICS: 99007225e5Sgerardnico $output = $options->getOpt('output', ''); 100007225e5Sgerardnico //if ($output == '-') $output = 'php://stdout'; 101*71f916b9Sgerardnico $this->updateAnalyticsData($namespaces, $output, $cache, $depth); 102*71f916b9Sgerardnico break; 103*71f916b9Sgerardnico case self::SYNC: 104*71f916b9Sgerardnico $this->syncPages(); 105*71f916b9Sgerardnico break; 106*71f916b9Sgerardnico default: 107*71f916b9Sgerardnico throw new \RuntimeException("Command unknown (" . $options->getCmd() . ")"); 108*71f916b9Sgerardnico } 109007225e5Sgerardnico 110007225e5Sgerardnico 111007225e5Sgerardnico } 112007225e5Sgerardnico 113007225e5Sgerardnico /** 114*71f916b9Sgerardnico * @param array $namespaces 115007225e5Sgerardnico * @param $output 116007225e5Sgerardnico * @param bool $cache 117007225e5Sgerardnico * @param int $depth recursion depth. 0 for unlimited 118007225e5Sgerardnico */ 119*71f916b9Sgerardnico private function updateAnalyticsData($namespaces = array(), $output = null, $cache = false, $depth = 0) 120007225e5Sgerardnico { 121007225e5Sgerardnico 122007225e5Sgerardnico $fileHandle = null; 123007225e5Sgerardnico if (!empty($output)) { 124007225e5Sgerardnico $fileHandle = @fopen($output, 'w'); 125007225e5Sgerardnico if (!$fileHandle) $this->fatal("Failed to open $output"); 126007225e5Sgerardnico } 127007225e5Sgerardnico 128*71f916b9Sgerardnico $pages = $this->findPages($namespaces, $depth); 129007225e5Sgerardnico 130007225e5Sgerardnico 131007225e5Sgerardnico if (!empty($fileHandle)) { 132007225e5Sgerardnico $header = array( 133007225e5Sgerardnico 'id', 134007225e5Sgerardnico 'backlinks', 135007225e5Sgerardnico 'broken_links', 136007225e5Sgerardnico 'changes', 137007225e5Sgerardnico 'chars', 138007225e5Sgerardnico 'external_links', 139007225e5Sgerardnico 'external_medias', 140007225e5Sgerardnico 'h1', 141007225e5Sgerardnico 'h2', 142007225e5Sgerardnico 'h3', 143007225e5Sgerardnico 'h4', 144007225e5Sgerardnico 'h5', 145007225e5Sgerardnico 'internal_links', 146007225e5Sgerardnico 'internal_medias', 147007225e5Sgerardnico 'words', 148007225e5Sgerardnico 'score' 149007225e5Sgerardnico ); 150007225e5Sgerardnico fwrite($fileHandle, implode(",", $header) . PHP_EOL); 151007225e5Sgerardnico } 152007225e5Sgerardnico while ($page = array_shift($pages)) { 153007225e5Sgerardnico $id = $page['id']; 154007225e5Sgerardnico 155007225e5Sgerardnico // Run as admin to overcome the fact that 156007225e5Sgerardnico // anonymous user cannot set all links and backlinnks 157007225e5Sgerardnico global $USERINFO; 158007225e5Sgerardnico $USERINFO['grps'] = array('admin'); 159007225e5Sgerardnico 160007225e5Sgerardnico 161007225e5Sgerardnico echo 'Processing the page ' . $id . "\n"; 162007225e5Sgerardnico 163007225e5Sgerardnico $data = Analytics::getDataAsArray($id, $cache); 164007225e5Sgerardnico if (!empty($fileHandle)) { 165007225e5Sgerardnico $statistics = $data[Analytics::STATISTICS]; 166007225e5Sgerardnico $row = array( 167007225e5Sgerardnico 'id' => $id, 168007225e5Sgerardnico 'backlinks' => $statistics[Analytics::INTERNAL_BACKLINKS_COUNT], 169007225e5Sgerardnico 'broken_links' => $statistics[Analytics::INTERNAL_LINKS_BROKEN_COUNT], 170007225e5Sgerardnico 'changes' => $statistics[Analytics::EDITS_COUNT], 171007225e5Sgerardnico 'chars' => $statistics[Analytics::CHARS_COUNT], 172007225e5Sgerardnico 'external_links' => $statistics[Analytics::EXTERNAL_LINKS_COUNT], 173007225e5Sgerardnico 'external_medias' => $statistics[Analytics::EXTERNAL_MEDIAS], 174007225e5Sgerardnico 'h1' => $statistics[Analytics::HEADERS_COUNT]['h1'], 175007225e5Sgerardnico 'h2' => $statistics[Analytics::HEADERS_COUNT]['h2'], 176007225e5Sgerardnico 'h3' => $statistics[Analytics::HEADERS_COUNT]['h3'], 177007225e5Sgerardnico 'h4' => $statistics[Analytics::HEADERS_COUNT]['h4'], 178007225e5Sgerardnico 'h5' => $statistics[Analytics::HEADERS_COUNT]['h5'], 179007225e5Sgerardnico 'internal_links' => $statistics[Analytics::INTERNAL_LINKS_COUNT], 180007225e5Sgerardnico 'internal_medias' => $statistics[Analytics::INTERNAL_MEDIAS_COUNT], 181007225e5Sgerardnico 'words' => $statistics[Analytics::WORDS_COUNT], 182007225e5Sgerardnico 'low' => $data[Analytics::QUALITY]['low'] 183007225e5Sgerardnico ); 184007225e5Sgerardnico fwrite($fileHandle, implode(",", $row) . PHP_EOL); 185007225e5Sgerardnico } 186007225e5Sgerardnico } 187007225e5Sgerardnico if (!empty($fileHandle)) { 188007225e5Sgerardnico fclose($fileHandle); 189007225e5Sgerardnico } 190007225e5Sgerardnico 191007225e5Sgerardnico } 192*71f916b9Sgerardnico 193*71f916b9Sgerardnico /** 194*71f916b9Sgerardnico * Find the pages in the tree 195*71f916b9Sgerardnico * @param $namespaces 196*71f916b9Sgerardnico * @param $depth 197*71f916b9Sgerardnico * @return array 198*71f916b9Sgerardnico */ 199*71f916b9Sgerardnico private function findPages($namespaces = array(), $depth = 0) 200*71f916b9Sgerardnico { 201*71f916b9Sgerardnico global $conf; 202*71f916b9Sgerardnico $datadir = $conf['datadir']; 203*71f916b9Sgerardnico 204*71f916b9Sgerardnico $pages = array(); 205*71f916b9Sgerardnico foreach ($namespaces as $ns) { 206*71f916b9Sgerardnico 207*71f916b9Sgerardnico search( 208*71f916b9Sgerardnico $pages, 209*71f916b9Sgerardnico $datadir, 210*71f916b9Sgerardnico 'search_universal', 211*71f916b9Sgerardnico array( 212*71f916b9Sgerardnico 'depth' => $depth, 213*71f916b9Sgerardnico 'listfiles' => true, 214*71f916b9Sgerardnico 'listdirs' => false, 215*71f916b9Sgerardnico 'pagesonly' => true, 216*71f916b9Sgerardnico 'skipacl' => true, 217*71f916b9Sgerardnico 'firsthead' => false, 218*71f916b9Sgerardnico 'meta' => false, 219*71f916b9Sgerardnico ), 220*71f916b9Sgerardnico str_replace(':', '/', $ns) 221*71f916b9Sgerardnico ); 222*71f916b9Sgerardnico 223*71f916b9Sgerardnico // add the ns start page 224*71f916b9Sgerardnico if ($ns && page_exists($ns)) { 225*71f916b9Sgerardnico $pages[] = array( 226*71f916b9Sgerardnico 'id' => $ns, 227*71f916b9Sgerardnico 'ns' => getNS($ns), 228*71f916b9Sgerardnico 'title' => p_get_first_heading($ns, false), 229*71f916b9Sgerardnico 'size' => filesize(wikiFN($ns)), 230*71f916b9Sgerardnico 'mtime' => filemtime(wikiFN($ns)), 231*71f916b9Sgerardnico 'perm' => 16, 232*71f916b9Sgerardnico 'type' => 'f', 233*71f916b9Sgerardnico 'level' => 0, 234*71f916b9Sgerardnico 'open' => 1, 235*71f916b9Sgerardnico ); 236*71f916b9Sgerardnico } 237*71f916b9Sgerardnico 238*71f916b9Sgerardnico } 239*71f916b9Sgerardnico return $pages; 240*71f916b9Sgerardnico } 241*71f916b9Sgerardnico 242*71f916b9Sgerardnico private function syncPages() 243*71f916b9Sgerardnico { 244*71f916b9Sgerardnico $sqlite = Sqlite::getSqlite(); 245*71f916b9Sgerardnico $res = $sqlite->query("select ID from pages"); 246*71f916b9Sgerardnico if (!$res) { 247*71f916b9Sgerardnico throw new \RuntimeException("An exception has occurred with the alias selection query"); 248*71f916b9Sgerardnico } 249*71f916b9Sgerardnico $res2arr = $sqlite->res2arr($res); 250*71f916b9Sgerardnico $sqlite->res_close($res); 251*71f916b9Sgerardnico foreach ($res2arr as $row) { 252*71f916b9Sgerardnico $id = $row['ID']; 253*71f916b9Sgerardnico if (!page_exists($id)){ 254*71f916b9Sgerardnico echo 'Page does not exist on the file system. Deleted from the database (' . $id . ")\n"; 255*71f916b9Sgerardnico Page::createFromId($id)->deleteInDb(); 256*71f916b9Sgerardnico } 257*71f916b9Sgerardnico } 258*71f916b9Sgerardnico 259*71f916b9Sgerardnico 260*71f916b9Sgerardnico } 261007225e5Sgerardnico} 262