1007225e5Sgerardnico<?php 2007225e5Sgerardnico/** 3007225e5Sgerardnico * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved. 4007225e5Sgerardnico * 5007225e5Sgerardnico * This source code is licensed under the GPL license found in the 6007225e5Sgerardnico * COPYING file in the root directory of this source tree. 7007225e5Sgerardnico * 8007225e5Sgerardnico * @license GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html) 9007225e5Sgerardnico * @author ComboStrap <support@combostrap.com> 10007225e5Sgerardnico * 11007225e5Sgerardnico */ 12007225e5Sgerardnicoif (!defined('DOKU_INC')) die(); 13007225e5Sgerardnico 14007225e5Sgerardnicouse ComboStrap\Analytics; 1571f916b9Sgerardnicouse ComboStrap\Page; 1671f916b9Sgerardnicouse ComboStrap\Sqlite; 17007225e5Sgerardnicouse splitbrain\phpcli\Options; 18007225e5Sgerardnico 19007225e5Sgerardnicorequire_once(__DIR__ . '/class/Analytics.php'); 20007225e5Sgerardnico 21007225e5Sgerardnico/** 22007225e5Sgerardnico * The memory of the server 128 is not enough 23007225e5Sgerardnico */ 24007225e5Sgerardnicoini_set('memory_limit', '256M'); 25007225e5Sgerardnico 26007225e5Sgerardnico/** 27007225e5Sgerardnico * Class cli_plugin_combo 28007225e5Sgerardnico * 29007225e5Sgerardnico * This is a cli: 30007225e5Sgerardnico * https://www.dokuwiki.org/devel:cli_plugins#example 31007225e5Sgerardnico * 32007225e5Sgerardnico * Usage: 33007225e5Sgerardnico * 34007225e5Sgerardnico * ``` 35007225e5Sgerardnico * docker exec -ti $(CONTAINER) /bin/bash 36007225e5Sgerardnico * ./bin/plugin.php combo -c 37007225e5Sgerardnico * ``` 38007225e5Sgerardnico * or via the IDE 39007225e5Sgerardnico * 40007225e5Sgerardnico * 41007225e5Sgerardnico * Example: 42007225e5Sgerardnico * https://www.dokuwiki.org/tips:grapher 43007225e5Sgerardnico * 44007225e5Sgerardnico */ 45007225e5Sgerardnicoclass cli_plugin_combo extends DokuWiki_CLI_Plugin 46007225e5Sgerardnico{ 4771f916b9Sgerardnico const ANALYTICS = "analytics"; 4871f916b9Sgerardnico const SYNC = "sync"; 49007225e5Sgerardnico 50007225e5Sgerardnico /** 51007225e5Sgerardnico * register options and arguments 52007225e5Sgerardnico * @param Options $options 53007225e5Sgerardnico */ 54007225e5Sgerardnico protected function setup(Options $options) 55007225e5Sgerardnico { 5671f916b9Sgerardnico $options->setHelp( 5771f916b9Sgerardnico "Manage the analytics database\n\n" . 5871f916b9Sgerardnico "analytics\n" . 5971f916b9Sgerardnico "sync" 6071f916b9Sgerardnico ); 61007225e5Sgerardnico $options->registerOption('version', 'print version', 'v'); 6271f916b9Sgerardnico $options->registerCommand(self::ANALYTICS, "Update the analytics data"); 6371f916b9Sgerardnico $options->registerOption( 64007225e5Sgerardnico 'namespaces', 65007225e5Sgerardnico "If no namespace is given, the root namespace is assumed.", 6671f916b9Sgerardnico 'n', 6771f916b9Sgerardnico true 6871f916b9Sgerardnico ); 69007225e5Sgerardnico $options->registerOption( 70007225e5Sgerardnico 'output', 71007225e5Sgerardnico "Optional, where to store the analytical data as csv eg. a filename.", 72007225e5Sgerardnico 'o', 'file'); 73007225e5Sgerardnico $options->registerOption( 74007225e5Sgerardnico 'cache', 75007225e5Sgerardnico "Optional, returns from the cache if set", 76007225e5Sgerardnico 'c', false); 7771f916b9Sgerardnico $options->registerOption( 7871f916b9Sgerardnico 'dry', 7971f916b9Sgerardnico "Optional, dry-run", 8071f916b9Sgerardnico 'd', false); 8171f916b9Sgerardnico $options->registerCommand(self::SYNC, "Sync the database"); 82007225e5Sgerardnico 83007225e5Sgerardnico } 84007225e5Sgerardnico 85007225e5Sgerardnico /** 86007225e5Sgerardnico * The main entry 87007225e5Sgerardnico * @param Options $options 88007225e5Sgerardnico */ 89007225e5Sgerardnico protected function main(Options $options) 90007225e5Sgerardnico { 91007225e5Sgerardnico 92007225e5Sgerardnico $namespaces = array_map('cleanID', $options->getArgs()); 93007225e5Sgerardnico if (!count($namespaces)) $namespaces = array(''); //import from top 94007225e5Sgerardnico 9571f916b9Sgerardnico $cache = $options->getOpt('cache', false); 9671f916b9Sgerardnico $depth = $options->getOpt('depth', 0); 9771f916b9Sgerardnico switch ($options->getCmd()) { 9871f916b9Sgerardnico case self::ANALYTICS: 99007225e5Sgerardnico $output = $options->getOpt('output', ''); 100007225e5Sgerardnico //if ($output == '-') $output = 'php://stdout'; 10171f916b9Sgerardnico $this->updateAnalyticsData($namespaces, $output, $cache, $depth); 10271f916b9Sgerardnico break; 10371f916b9Sgerardnico case self::SYNC: 10471f916b9Sgerardnico $this->syncPages(); 10571f916b9Sgerardnico break; 10671f916b9Sgerardnico default: 10771f916b9Sgerardnico throw new \RuntimeException("Command unknown (" . $options->getCmd() . ")"); 10871f916b9Sgerardnico } 109007225e5Sgerardnico 110007225e5Sgerardnico 111007225e5Sgerardnico } 112007225e5Sgerardnico 113007225e5Sgerardnico /** 11471f916b9Sgerardnico * @param array $namespaces 115007225e5Sgerardnico * @param $output 116007225e5Sgerardnico * @param bool $cache 117007225e5Sgerardnico * @param int $depth recursion depth. 0 for unlimited 118007225e5Sgerardnico */ 11971f916b9Sgerardnico private function updateAnalyticsData($namespaces = array(), $output = null, $cache = false, $depth = 0) 120007225e5Sgerardnico { 121007225e5Sgerardnico 122007225e5Sgerardnico $fileHandle = null; 123007225e5Sgerardnico if (!empty($output)) { 124007225e5Sgerardnico $fileHandle = @fopen($output, 'w'); 125007225e5Sgerardnico if (!$fileHandle) $this->fatal("Failed to open $output"); 126007225e5Sgerardnico } 127007225e5Sgerardnico 12871f916b9Sgerardnico $pages = $this->findPages($namespaces, $depth); 129007225e5Sgerardnico 130007225e5Sgerardnico 131007225e5Sgerardnico if (!empty($fileHandle)) { 132007225e5Sgerardnico $header = array( 133007225e5Sgerardnico 'id', 134007225e5Sgerardnico 'backlinks', 135007225e5Sgerardnico 'broken_links', 136007225e5Sgerardnico 'changes', 137007225e5Sgerardnico 'chars', 138007225e5Sgerardnico 'external_links', 139007225e5Sgerardnico 'external_medias', 140007225e5Sgerardnico 'h1', 141007225e5Sgerardnico 'h2', 142007225e5Sgerardnico 'h3', 143007225e5Sgerardnico 'h4', 144007225e5Sgerardnico 'h5', 145007225e5Sgerardnico 'internal_links', 146007225e5Sgerardnico 'internal_medias', 147007225e5Sgerardnico 'words', 148007225e5Sgerardnico 'score' 149007225e5Sgerardnico ); 150007225e5Sgerardnico fwrite($fileHandle, implode(",", $header) . PHP_EOL); 151007225e5Sgerardnico } 152007225e5Sgerardnico while ($page = array_shift($pages)) { 153007225e5Sgerardnico $id = $page['id']; 154007225e5Sgerardnico 155007225e5Sgerardnico // Run as admin to overcome the fact that 156007225e5Sgerardnico // anonymous user cannot set all links and backlinnks 157007225e5Sgerardnico global $USERINFO; 158007225e5Sgerardnico $USERINFO['grps'] = array('admin'); 159007225e5Sgerardnico 160007225e5Sgerardnico 161007225e5Sgerardnico echo 'Processing the page ' . $id . "\n"; 162007225e5Sgerardnico 163*1c5862d3Sgerardnico $data = Analytics::processAndGetDataAsArray($id, $cache); 164007225e5Sgerardnico if (!empty($fileHandle)) { 165007225e5Sgerardnico $statistics = $data[Analytics::STATISTICS]; 166007225e5Sgerardnico $row = array( 167007225e5Sgerardnico 'id' => $id, 168007225e5Sgerardnico 'backlinks' => $statistics[Analytics::INTERNAL_BACKLINKS_COUNT], 169007225e5Sgerardnico 'broken_links' => $statistics[Analytics::INTERNAL_LINKS_BROKEN_COUNT], 170007225e5Sgerardnico 'changes' => $statistics[Analytics::EDITS_COUNT], 171007225e5Sgerardnico 'chars' => $statistics[Analytics::CHARS_COUNT], 172007225e5Sgerardnico 'external_links' => $statistics[Analytics::EXTERNAL_LINKS_COUNT], 173007225e5Sgerardnico 'external_medias' => $statistics[Analytics::EXTERNAL_MEDIAS], 174007225e5Sgerardnico 'h1' => $statistics[Analytics::HEADERS_COUNT]['h1'], 175007225e5Sgerardnico 'h2' => $statistics[Analytics::HEADERS_COUNT]['h2'], 176007225e5Sgerardnico 'h3' => $statistics[Analytics::HEADERS_COUNT]['h3'], 177007225e5Sgerardnico 'h4' => $statistics[Analytics::HEADERS_COUNT]['h4'], 178007225e5Sgerardnico 'h5' => $statistics[Analytics::HEADERS_COUNT]['h5'], 179007225e5Sgerardnico 'internal_links' => $statistics[Analytics::INTERNAL_LINKS_COUNT], 180007225e5Sgerardnico 'internal_medias' => $statistics[Analytics::INTERNAL_MEDIAS_COUNT], 181007225e5Sgerardnico 'words' => $statistics[Analytics::WORDS_COUNT], 182007225e5Sgerardnico 'low' => $data[Analytics::QUALITY]['low'] 183007225e5Sgerardnico ); 184007225e5Sgerardnico fwrite($fileHandle, implode(",", $row) . PHP_EOL); 185007225e5Sgerardnico } 186007225e5Sgerardnico } 187007225e5Sgerardnico if (!empty($fileHandle)) { 188007225e5Sgerardnico fclose($fileHandle); 189007225e5Sgerardnico } 190007225e5Sgerardnico 191007225e5Sgerardnico } 19271f916b9Sgerardnico 19371f916b9Sgerardnico /** 19471f916b9Sgerardnico * Find the pages in the tree 19571f916b9Sgerardnico * @param $namespaces 19671f916b9Sgerardnico * @param $depth 19771f916b9Sgerardnico * @return array 19871f916b9Sgerardnico */ 19971f916b9Sgerardnico private function findPages($namespaces = array(), $depth = 0) 20071f916b9Sgerardnico { 20171f916b9Sgerardnico global $conf; 20271f916b9Sgerardnico $datadir = $conf['datadir']; 20371f916b9Sgerardnico 20471f916b9Sgerardnico $pages = array(); 20571f916b9Sgerardnico foreach ($namespaces as $ns) { 20671f916b9Sgerardnico 20771f916b9Sgerardnico search( 20871f916b9Sgerardnico $pages, 20971f916b9Sgerardnico $datadir, 21071f916b9Sgerardnico 'search_universal', 21171f916b9Sgerardnico array( 21271f916b9Sgerardnico 'depth' => $depth, 21371f916b9Sgerardnico 'listfiles' => true, 21471f916b9Sgerardnico 'listdirs' => false, 21571f916b9Sgerardnico 'pagesonly' => true, 21671f916b9Sgerardnico 'skipacl' => true, 21771f916b9Sgerardnico 'firsthead' => false, 21871f916b9Sgerardnico 'meta' => false, 21971f916b9Sgerardnico ), 22071f916b9Sgerardnico str_replace(':', '/', $ns) 22171f916b9Sgerardnico ); 22271f916b9Sgerardnico 22371f916b9Sgerardnico // add the ns start page 22471f916b9Sgerardnico if ($ns && page_exists($ns)) { 22571f916b9Sgerardnico $pages[] = array( 22671f916b9Sgerardnico 'id' => $ns, 22771f916b9Sgerardnico 'ns' => getNS($ns), 22871f916b9Sgerardnico 'title' => p_get_first_heading($ns, false), 22971f916b9Sgerardnico 'size' => filesize(wikiFN($ns)), 23071f916b9Sgerardnico 'mtime' => filemtime(wikiFN($ns)), 23171f916b9Sgerardnico 'perm' => 16, 23271f916b9Sgerardnico 'type' => 'f', 23371f916b9Sgerardnico 'level' => 0, 23471f916b9Sgerardnico 'open' => 1, 23571f916b9Sgerardnico ); 23671f916b9Sgerardnico } 23771f916b9Sgerardnico 23871f916b9Sgerardnico } 23971f916b9Sgerardnico return $pages; 24071f916b9Sgerardnico } 24171f916b9Sgerardnico 24271f916b9Sgerardnico private function syncPages() 24371f916b9Sgerardnico { 24471f916b9Sgerardnico $sqlite = Sqlite::getSqlite(); 24571f916b9Sgerardnico $res = $sqlite->query("select ID from pages"); 24671f916b9Sgerardnico if (!$res) { 24771f916b9Sgerardnico throw new \RuntimeException("An exception has occurred with the alias selection query"); 24871f916b9Sgerardnico } 24971f916b9Sgerardnico $res2arr = $sqlite->res2arr($res); 25071f916b9Sgerardnico $sqlite->res_close($res); 25171f916b9Sgerardnico foreach ($res2arr as $row) { 25271f916b9Sgerardnico $id = $row['ID']; 25371f916b9Sgerardnico if (!page_exists($id)){ 25471f916b9Sgerardnico echo 'Page does not exist on the file system. Deleted from the database (' . $id . ")\n"; 25571f916b9Sgerardnico Page::createFromId($id)->deleteInDb(); 25671f916b9Sgerardnico } 25771f916b9Sgerardnico } 25871f916b9Sgerardnico 25971f916b9Sgerardnico 26071f916b9Sgerardnico } 261007225e5Sgerardnico} 262