1007225e5Sgerardnico<?php 2007225e5Sgerardnico/** 3007225e5Sgerardnico * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved. 4007225e5Sgerardnico * 5007225e5Sgerardnico * This source code is licensed under the GPL license found in the 6007225e5Sgerardnico * COPYING file in the root directory of this source tree. 7007225e5Sgerardnico * 8007225e5Sgerardnico * @license GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html) 9007225e5Sgerardnico * @author ComboStrap <support@combostrap.com> 10007225e5Sgerardnico * 11007225e5Sgerardnico */ 12007225e5Sgerardnicoif (!defined('DOKU_INC')) die(); 13007225e5Sgerardnico 14007225e5Sgerardnicouse ComboStrap\Analytics; 1571f916b9Sgerardnicouse ComboStrap\Page; 16325fe0c5Sgerardnicouse ComboStrap\PluginUtility; 1771f916b9Sgerardnicouse ComboStrap\Sqlite; 18007225e5Sgerardnicouse splitbrain\phpcli\Options; 19007225e5Sgerardnico 20007225e5Sgerardnicorequire_once(__DIR__ . '/class/Analytics.php'); 21007225e5Sgerardnico 22007225e5Sgerardnico/** 23007225e5Sgerardnico * The memory of the server 128 is not enough 24007225e5Sgerardnico */ 25007225e5Sgerardnicoini_set('memory_limit', '256M'); 26007225e5Sgerardnico 27007225e5Sgerardnico/** 28007225e5Sgerardnico * Class cli_plugin_combo 29007225e5Sgerardnico * 30007225e5Sgerardnico * This is a cli: 31007225e5Sgerardnico * https://www.dokuwiki.org/devel:cli_plugins#example 32007225e5Sgerardnico * 33007225e5Sgerardnico * Usage: 34007225e5Sgerardnico * 35007225e5Sgerardnico * ``` 36007225e5Sgerardnico * docker exec -ti $(CONTAINER) /bin/bash 37007225e5Sgerardnico * ./bin/plugin.php combo -c 38007225e5Sgerardnico * ``` 39007225e5Sgerardnico * or via the IDE 40007225e5Sgerardnico * 41007225e5Sgerardnico * 42007225e5Sgerardnico * Example: 43007225e5Sgerardnico * https://www.dokuwiki.org/tips:grapher 44007225e5Sgerardnico * 45007225e5Sgerardnico */ 46007225e5Sgerardnicoclass cli_plugin_combo extends DokuWiki_CLI_Plugin 47007225e5Sgerardnico{ 4871f916b9Sgerardnico const ANALYTICS = "analytics"; 4971f916b9Sgerardnico const SYNC = "sync"; 50007225e5Sgerardnico 51007225e5Sgerardnico /** 52007225e5Sgerardnico * register options and arguments 53007225e5Sgerardnico * @param Options $options 54007225e5Sgerardnico */ 55007225e5Sgerardnico protected function setup(Options $options) 56007225e5Sgerardnico { 5771f916b9Sgerardnico $options->setHelp( 5871f916b9Sgerardnico "Manage the analytics database\n\n" . 5971f916b9Sgerardnico "analytics\n" . 6071f916b9Sgerardnico "sync" 6171f916b9Sgerardnico ); 62007225e5Sgerardnico $options->registerOption('version', 'print version', 'v'); 6371f916b9Sgerardnico $options->registerCommand(self::ANALYTICS, "Update the analytics data"); 6471f916b9Sgerardnico $options->registerOption( 65007225e5Sgerardnico 'namespaces', 66007225e5Sgerardnico "If no namespace is given, the root namespace is assumed.", 6771f916b9Sgerardnico 'n', 6871f916b9Sgerardnico true 6971f916b9Sgerardnico ); 70007225e5Sgerardnico $options->registerOption( 71007225e5Sgerardnico 'output', 72007225e5Sgerardnico "Optional, where to store the analytical data as csv eg. a filename.", 73007225e5Sgerardnico 'o', 'file'); 74007225e5Sgerardnico $options->registerOption( 75007225e5Sgerardnico 'cache', 76007225e5Sgerardnico "Optional, returns from the cache if set", 77007225e5Sgerardnico 'c', false); 7871f916b9Sgerardnico $options->registerOption( 7971f916b9Sgerardnico 'dry', 8071f916b9Sgerardnico "Optional, dry-run", 8171f916b9Sgerardnico 'd', false); 8271f916b9Sgerardnico $options->registerCommand(self::SYNC, "Sync the database"); 83007225e5Sgerardnico 84007225e5Sgerardnico } 85007225e5Sgerardnico 86007225e5Sgerardnico /** 87007225e5Sgerardnico * The main entry 88007225e5Sgerardnico * @param Options $options 89007225e5Sgerardnico */ 90007225e5Sgerardnico protected function main(Options $options) 91007225e5Sgerardnico { 92007225e5Sgerardnico 93007225e5Sgerardnico $namespaces = array_map('cleanID', $options->getArgs()); 94007225e5Sgerardnico if (!count($namespaces)) $namespaces = array(''); //import from top 95007225e5Sgerardnico 9671f916b9Sgerardnico $cache = $options->getOpt('cache', false); 9771f916b9Sgerardnico $depth = $options->getOpt('depth', 0); 9871f916b9Sgerardnico switch ($options->getCmd()) { 9971f916b9Sgerardnico case self::ANALYTICS: 100007225e5Sgerardnico $output = $options->getOpt('output', ''); 101007225e5Sgerardnico //if ($output == '-') $output = 'php://stdout'; 10271f916b9Sgerardnico $this->updateAnalyticsData($namespaces, $output, $cache, $depth); 10371f916b9Sgerardnico break; 10471f916b9Sgerardnico case self::SYNC: 10571f916b9Sgerardnico $this->syncPages(); 10671f916b9Sgerardnico break; 10771f916b9Sgerardnico default: 10871f916b9Sgerardnico throw new \RuntimeException("Command unknown (" . $options->getCmd() . ")"); 10971f916b9Sgerardnico } 110007225e5Sgerardnico 111007225e5Sgerardnico 112007225e5Sgerardnico } 113007225e5Sgerardnico 114007225e5Sgerardnico /** 11571f916b9Sgerardnico * @param array $namespaces 116007225e5Sgerardnico * @param $output 117007225e5Sgerardnico * @param bool $cache 118007225e5Sgerardnico * @param int $depth recursion depth. 0 for unlimited 119007225e5Sgerardnico */ 12071f916b9Sgerardnico private function updateAnalyticsData($namespaces = array(), $output = null, $cache = false, $depth = 0) 121007225e5Sgerardnico { 122007225e5Sgerardnico 123007225e5Sgerardnico $fileHandle = null; 124007225e5Sgerardnico if (!empty($output)) { 125007225e5Sgerardnico $fileHandle = @fopen($output, 'w'); 126007225e5Sgerardnico if (!$fileHandle) $this->fatal("Failed to open $output"); 127007225e5Sgerardnico } 128007225e5Sgerardnico 12971f916b9Sgerardnico $pages = $this->findPages($namespaces, $depth); 130007225e5Sgerardnico 131007225e5Sgerardnico 132007225e5Sgerardnico if (!empty($fileHandle)) { 133007225e5Sgerardnico $header = array( 134007225e5Sgerardnico 'id', 135007225e5Sgerardnico 'backlinks', 136007225e5Sgerardnico 'broken_links', 137007225e5Sgerardnico 'changes', 138007225e5Sgerardnico 'chars', 139007225e5Sgerardnico 'external_links', 140007225e5Sgerardnico 'external_medias', 141007225e5Sgerardnico 'h1', 142007225e5Sgerardnico 'h2', 143007225e5Sgerardnico 'h3', 144007225e5Sgerardnico 'h4', 145007225e5Sgerardnico 'h5', 146007225e5Sgerardnico 'internal_links', 147007225e5Sgerardnico 'internal_medias', 148007225e5Sgerardnico 'words', 149007225e5Sgerardnico 'score' 150007225e5Sgerardnico ); 151007225e5Sgerardnico fwrite($fileHandle, implode(",", $header) . PHP_EOL); 152007225e5Sgerardnico } 153*9da76789Sgerardnico $pageCounter = 0; 154007225e5Sgerardnico while ($page = array_shift($pages)) { 155007225e5Sgerardnico $id = $page['id']; 156007225e5Sgerardnico 157*9da76789Sgerardnico $pageCounter++; 158*9da76789Sgerardnico echo "Processing the page {$id} ($pageCounter)\n"; 159007225e5Sgerardnico 1601c5862d3Sgerardnico $data = Analytics::processAndGetDataAsArray($id, $cache); 161007225e5Sgerardnico if (!empty($fileHandle)) { 162007225e5Sgerardnico $statistics = $data[Analytics::STATISTICS]; 163007225e5Sgerardnico $row = array( 164007225e5Sgerardnico 'id' => $id, 165007225e5Sgerardnico 'backlinks' => $statistics[Analytics::INTERNAL_BACKLINKS_COUNT], 166007225e5Sgerardnico 'broken_links' => $statistics[Analytics::INTERNAL_LINKS_BROKEN_COUNT], 167007225e5Sgerardnico 'changes' => $statistics[Analytics::EDITS_COUNT], 168007225e5Sgerardnico 'chars' => $statistics[Analytics::CHARS_COUNT], 169007225e5Sgerardnico 'external_links' => $statistics[Analytics::EXTERNAL_LINKS_COUNT], 170007225e5Sgerardnico 'external_medias' => $statistics[Analytics::EXTERNAL_MEDIAS], 171007225e5Sgerardnico 'h1' => $statistics[Analytics::HEADERS_COUNT]['h1'], 172007225e5Sgerardnico 'h2' => $statistics[Analytics::HEADERS_COUNT]['h2'], 173007225e5Sgerardnico 'h3' => $statistics[Analytics::HEADERS_COUNT]['h3'], 174007225e5Sgerardnico 'h4' => $statistics[Analytics::HEADERS_COUNT]['h4'], 175007225e5Sgerardnico 'h5' => $statistics[Analytics::HEADERS_COUNT]['h5'], 176007225e5Sgerardnico 'internal_links' => $statistics[Analytics::INTERNAL_LINKS_COUNT], 177007225e5Sgerardnico 'internal_medias' => $statistics[Analytics::INTERNAL_MEDIAS_COUNT], 178007225e5Sgerardnico 'words' => $statistics[Analytics::WORDS_COUNT], 179007225e5Sgerardnico 'low' => $data[Analytics::QUALITY]['low'] 180007225e5Sgerardnico ); 181007225e5Sgerardnico fwrite($fileHandle, implode(",", $row) . PHP_EOL); 182007225e5Sgerardnico } 183007225e5Sgerardnico } 184007225e5Sgerardnico if (!empty($fileHandle)) { 185007225e5Sgerardnico fclose($fileHandle); 186007225e5Sgerardnico } 187007225e5Sgerardnico 188007225e5Sgerardnico } 18971f916b9Sgerardnico 19071f916b9Sgerardnico /** 19171f916b9Sgerardnico * Find the pages in the tree 19271f916b9Sgerardnico * @param $namespaces 19371f916b9Sgerardnico * @param $depth 19471f916b9Sgerardnico * @return array 19571f916b9Sgerardnico */ 19671f916b9Sgerardnico private function findPages($namespaces = array(), $depth = 0) 19771f916b9Sgerardnico { 198325fe0c5Sgerardnico // Run as admin to overcome the fact that 199325fe0c5Sgerardnico // anonymous user cannot set all links and backlinnks 200325fe0c5Sgerardnico 201325fe0c5Sgerardnico 20271f916b9Sgerardnico global $conf; 20371f916b9Sgerardnico $datadir = $conf['datadir']; 20471f916b9Sgerardnico 205325fe0c5Sgerardnico /** 206325fe0c5Sgerardnico * Run as admin to overcome the fact that 207325fe0c5Sgerardnico * anonymous user cannot see all links and backlinnks 208325fe0c5Sgerardnico */ 209325fe0c5Sgerardnico global $USERINFO; 210325fe0c5Sgerardnico $USERINFO['grps'] = array('admin'); 211325fe0c5Sgerardnico global $INPUT; 212325fe0c5Sgerardnico $INPUT->server->set('REMOTE_USER', "cli"); 213325fe0c5Sgerardnico 21471f916b9Sgerardnico $pages = array(); 21571f916b9Sgerardnico foreach ($namespaces as $ns) { 21671f916b9Sgerardnico 21771f916b9Sgerardnico search( 21871f916b9Sgerardnico $pages, 21971f916b9Sgerardnico $datadir, 22071f916b9Sgerardnico 'search_universal', 22171f916b9Sgerardnico array( 22271f916b9Sgerardnico 'depth' => $depth, 22371f916b9Sgerardnico 'listfiles' => true, 22471f916b9Sgerardnico 'listdirs' => false, 22571f916b9Sgerardnico 'pagesonly' => true, 22671f916b9Sgerardnico 'skipacl' => true, 22771f916b9Sgerardnico 'firsthead' => false, 22871f916b9Sgerardnico 'meta' => false, 22971f916b9Sgerardnico ), 23071f916b9Sgerardnico str_replace(':', '/', $ns) 23171f916b9Sgerardnico ); 23271f916b9Sgerardnico 23371f916b9Sgerardnico // add the ns start page 23471f916b9Sgerardnico if ($ns && page_exists($ns)) { 23571f916b9Sgerardnico $pages[] = array( 23671f916b9Sgerardnico 'id' => $ns, 23771f916b9Sgerardnico 'ns' => getNS($ns), 23871f916b9Sgerardnico 'title' => p_get_first_heading($ns, false), 23971f916b9Sgerardnico 'size' => filesize(wikiFN($ns)), 24071f916b9Sgerardnico 'mtime' => filemtime(wikiFN($ns)), 24171f916b9Sgerardnico 'perm' => 16, 24271f916b9Sgerardnico 'type' => 'f', 24371f916b9Sgerardnico 'level' => 0, 24471f916b9Sgerardnico 'open' => 1, 24571f916b9Sgerardnico ); 24671f916b9Sgerardnico } 24771f916b9Sgerardnico 24871f916b9Sgerardnico } 24971f916b9Sgerardnico return $pages; 25071f916b9Sgerardnico } 25171f916b9Sgerardnico 25271f916b9Sgerardnico private function syncPages() 25371f916b9Sgerardnico { 25471f916b9Sgerardnico $sqlite = Sqlite::getSqlite(); 25571f916b9Sgerardnico $res = $sqlite->query("select ID from pages"); 25671f916b9Sgerardnico if (!$res) { 25771f916b9Sgerardnico throw new \RuntimeException("An exception has occurred with the alias selection query"); 25871f916b9Sgerardnico } 25971f916b9Sgerardnico $res2arr = $sqlite->res2arr($res); 26071f916b9Sgerardnico $sqlite->res_close($res); 26171f916b9Sgerardnico foreach ($res2arr as $row) { 26271f916b9Sgerardnico $id = $row['ID']; 26371f916b9Sgerardnico if (!page_exists($id)) { 26471f916b9Sgerardnico echo 'Page does not exist on the file system. Deleted from the database (' . $id . ")\n"; 26571f916b9Sgerardnico Page::createFromId($id)->deleteInDb(); 26671f916b9Sgerardnico } 26771f916b9Sgerardnico } 26871f916b9Sgerardnico 26971f916b9Sgerardnico 27071f916b9Sgerardnico } 271007225e5Sgerardnico} 272