1007225e5Sgerardnico<?php 2007225e5Sgerardnico/** 3007225e5Sgerardnico * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved. 4007225e5Sgerardnico * 5007225e5Sgerardnico * This source code is licensed under the GPL license found in the 6007225e5Sgerardnico * COPYING file in the root directory of this source tree. 7007225e5Sgerardnico * 8007225e5Sgerardnico * @license GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html) 9007225e5Sgerardnico * @author ComboStrap <support@combostrap.com> 10007225e5Sgerardnico * 11007225e5Sgerardnico */ 12007225e5Sgerardnicoif (!defined('DOKU_INC')) die(); 13007225e5Sgerardnico 14007225e5Sgerardnicouse ComboStrap\Analytics; 1571f916b9Sgerardnicouse ComboStrap\Page; 1671f916b9Sgerardnicouse ComboStrap\Sqlite; 17007225e5Sgerardnicouse splitbrain\phpcli\Options; 18007225e5Sgerardnico 19*e8b2ff59SNickeau/** 20*e8b2ff59SNickeau * All dependency are loaded in plugin utility 21*e8b2ff59SNickeau */ 22*e8b2ff59SNickeaurequire_once(__DIR__ . '/class/PluginUtility.php'); 23007225e5Sgerardnico 24007225e5Sgerardnico/** 25007225e5Sgerardnico * The memory of the server 128 is not enough 26007225e5Sgerardnico */ 27007225e5Sgerardnicoini_set('memory_limit', '256M'); 28007225e5Sgerardnico 29007225e5Sgerardnico/** 30007225e5Sgerardnico * Class cli_plugin_combo 31007225e5Sgerardnico * 32007225e5Sgerardnico * This is a cli: 33007225e5Sgerardnico * https://www.dokuwiki.org/devel:cli_plugins#example 34007225e5Sgerardnico * 35007225e5Sgerardnico * Usage: 36007225e5Sgerardnico * 37007225e5Sgerardnico * ``` 38007225e5Sgerardnico * docker exec -ti $(CONTAINER) /bin/bash 39007225e5Sgerardnico * ./bin/plugin.php combo -c 40007225e5Sgerardnico * ``` 41007225e5Sgerardnico * or via the IDE 42007225e5Sgerardnico * 43007225e5Sgerardnico * 44007225e5Sgerardnico * Example: 45007225e5Sgerardnico * https://www.dokuwiki.org/tips:grapher 46007225e5Sgerardnico * 47007225e5Sgerardnico */ 48007225e5Sgerardnicoclass cli_plugin_combo extends DokuWiki_CLI_Plugin 49007225e5Sgerardnico{ 5071f916b9Sgerardnico const ANALYTICS = "analytics"; 5171f916b9Sgerardnico const SYNC = "sync"; 52007225e5Sgerardnico 53007225e5Sgerardnico /** 54007225e5Sgerardnico * register options and arguments 55007225e5Sgerardnico * @param Options $options 56007225e5Sgerardnico */ 57007225e5Sgerardnico protected function setup(Options $options) 58007225e5Sgerardnico { 5971f916b9Sgerardnico $options->setHelp( 6071f916b9Sgerardnico "Manage the analytics database\n\n" . 6171f916b9Sgerardnico "analytics\n" . 6271f916b9Sgerardnico "sync" 6371f916b9Sgerardnico ); 64007225e5Sgerardnico $options->registerOption('version', 'print version', 'v'); 6571f916b9Sgerardnico $options->registerCommand(self::ANALYTICS, "Update the analytics data"); 6671f916b9Sgerardnico $options->registerOption( 67007225e5Sgerardnico 'namespaces', 68007225e5Sgerardnico "If no namespace is given, the root namespace is assumed.", 6971f916b9Sgerardnico 'n', 7071f916b9Sgerardnico true 7171f916b9Sgerardnico ); 72007225e5Sgerardnico $options->registerOption( 73007225e5Sgerardnico 'output', 74007225e5Sgerardnico "Optional, where to store the analytical data as csv eg. a filename.", 75007225e5Sgerardnico 'o', 'file'); 76007225e5Sgerardnico $options->registerOption( 77007225e5Sgerardnico 'cache', 78007225e5Sgerardnico "Optional, returns from the cache if set", 79007225e5Sgerardnico 'c', false); 8071f916b9Sgerardnico $options->registerOption( 8171f916b9Sgerardnico 'dry', 8271f916b9Sgerardnico "Optional, dry-run", 8371f916b9Sgerardnico 'd', false); 8471f916b9Sgerardnico $options->registerCommand(self::SYNC, "Sync the database"); 85007225e5Sgerardnico 86007225e5Sgerardnico } 87007225e5Sgerardnico 88007225e5Sgerardnico /** 89007225e5Sgerardnico * The main entry 90007225e5Sgerardnico * @param Options $options 91007225e5Sgerardnico */ 92007225e5Sgerardnico protected function main(Options $options) 93007225e5Sgerardnico { 94007225e5Sgerardnico 95007225e5Sgerardnico $namespaces = array_map('cleanID', $options->getArgs()); 96007225e5Sgerardnico if (!count($namespaces)) $namespaces = array(''); //import from top 97007225e5Sgerardnico 9871f916b9Sgerardnico $cache = $options->getOpt('cache', false); 9971f916b9Sgerardnico $depth = $options->getOpt('depth', 0); 10021913ab3SNickeau $cmd = $options->getCmd(); 10121913ab3SNickeau if ($cmd == "") { 10221913ab3SNickeau $cmd = self::ANALYTICS; 10321913ab3SNickeau } 10421913ab3SNickeau switch ($cmd) { 10571f916b9Sgerardnico case self::ANALYTICS: 106007225e5Sgerardnico $output = $options->getOpt('output', ''); 107007225e5Sgerardnico //if ($output == '-') $output = 'php://stdout'; 10871f916b9Sgerardnico $this->updateAnalyticsData($namespaces, $output, $cache, $depth); 10971f916b9Sgerardnico break; 11071f916b9Sgerardnico case self::SYNC: 11171f916b9Sgerardnico $this->syncPages(); 11271f916b9Sgerardnico break; 11371f916b9Sgerardnico default: 11421913ab3SNickeau throw new \RuntimeException("Combo: Command unknown (" . $cmd . ")"); 11571f916b9Sgerardnico } 116007225e5Sgerardnico 117007225e5Sgerardnico 118007225e5Sgerardnico } 119007225e5Sgerardnico 120007225e5Sgerardnico /** 12171f916b9Sgerardnico * @param array $namespaces 122007225e5Sgerardnico * @param $output 123007225e5Sgerardnico * @param bool $cache 124007225e5Sgerardnico * @param int $depth recursion depth. 0 for unlimited 125007225e5Sgerardnico */ 12671f916b9Sgerardnico private function updateAnalyticsData($namespaces = array(), $output = null, $cache = false, $depth = 0) 127007225e5Sgerardnico { 128007225e5Sgerardnico 129007225e5Sgerardnico $fileHandle = null; 130007225e5Sgerardnico if (!empty($output)) { 131007225e5Sgerardnico $fileHandle = @fopen($output, 'w'); 132007225e5Sgerardnico if (!$fileHandle) $this->fatal("Failed to open $output"); 133007225e5Sgerardnico } 134007225e5Sgerardnico 13571f916b9Sgerardnico $pages = $this->findPages($namespaces, $depth); 136007225e5Sgerardnico 137007225e5Sgerardnico 138007225e5Sgerardnico if (!empty($fileHandle)) { 139007225e5Sgerardnico $header = array( 140007225e5Sgerardnico 'id', 141007225e5Sgerardnico 'backlinks', 142007225e5Sgerardnico 'broken_links', 143007225e5Sgerardnico 'changes', 144007225e5Sgerardnico 'chars', 145007225e5Sgerardnico 'external_links', 146007225e5Sgerardnico 'external_medias', 147007225e5Sgerardnico 'h1', 148007225e5Sgerardnico 'h2', 149007225e5Sgerardnico 'h3', 150007225e5Sgerardnico 'h4', 151007225e5Sgerardnico 'h5', 152007225e5Sgerardnico 'internal_links', 153007225e5Sgerardnico 'internal_medias', 154007225e5Sgerardnico 'words', 155007225e5Sgerardnico 'score' 156007225e5Sgerardnico ); 157007225e5Sgerardnico fwrite($fileHandle, implode(",", $header) . PHP_EOL); 158007225e5Sgerardnico } 1599da76789Sgerardnico $pageCounter = 0; 160*e8b2ff59SNickeau $totalNumberOfPages = sizeof($pages); 161007225e5Sgerardnico while ($page = array_shift($pages)) { 162007225e5Sgerardnico $id = $page['id']; 163007225e5Sgerardnico 1649da76789Sgerardnico $pageCounter++; 165*e8b2ff59SNickeau echo "Processing the page {$id} ($pageCounter / $totalNumberOfPages)\n"; 166007225e5Sgerardnico 1671c5862d3Sgerardnico $data = Analytics::processAndGetDataAsArray($id, $cache); 168007225e5Sgerardnico if (!empty($fileHandle)) { 169007225e5Sgerardnico $statistics = $data[Analytics::STATISTICS]; 170007225e5Sgerardnico $row = array( 171007225e5Sgerardnico 'id' => $id, 172007225e5Sgerardnico 'backlinks' => $statistics[Analytics::INTERNAL_BACKLINKS_COUNT], 173007225e5Sgerardnico 'broken_links' => $statistics[Analytics::INTERNAL_LINKS_BROKEN_COUNT], 174007225e5Sgerardnico 'changes' => $statistics[Analytics::EDITS_COUNT], 175007225e5Sgerardnico 'chars' => $statistics[Analytics::CHARS_COUNT], 176007225e5Sgerardnico 'external_links' => $statistics[Analytics::EXTERNAL_LINKS_COUNT], 177*e8b2ff59SNickeau 'external_medias' => $statistics[Analytics::EXTERNAL_MEDIAS_COUNT], 178531e725cSNickeau Analytics::H1 => $statistics[Analytics::HEADERS_COUNT][Analytics::H1], 179007225e5Sgerardnico 'h2' => $statistics[Analytics::HEADERS_COUNT]['h2'], 180007225e5Sgerardnico 'h3' => $statistics[Analytics::HEADERS_COUNT]['h3'], 181007225e5Sgerardnico 'h4' => $statistics[Analytics::HEADERS_COUNT]['h4'], 182007225e5Sgerardnico 'h5' => $statistics[Analytics::HEADERS_COUNT]['h5'], 183007225e5Sgerardnico 'internal_links' => $statistics[Analytics::INTERNAL_LINKS_COUNT], 184007225e5Sgerardnico 'internal_medias' => $statistics[Analytics::INTERNAL_MEDIAS_COUNT], 185007225e5Sgerardnico 'words' => $statistics[Analytics::WORDS_COUNT], 186007225e5Sgerardnico 'low' => $data[Analytics::QUALITY]['low'] 187007225e5Sgerardnico ); 188007225e5Sgerardnico fwrite($fileHandle, implode(",", $row) . PHP_EOL); 189007225e5Sgerardnico } 190007225e5Sgerardnico } 191007225e5Sgerardnico if (!empty($fileHandle)) { 192007225e5Sgerardnico fclose($fileHandle); 193007225e5Sgerardnico } 194007225e5Sgerardnico 195007225e5Sgerardnico } 19671f916b9Sgerardnico 19771f916b9Sgerardnico /** 19871f916b9Sgerardnico * Find the pages in the tree 19971f916b9Sgerardnico * @param $namespaces 20071f916b9Sgerardnico * @param $depth 20171f916b9Sgerardnico * @return array 20271f916b9Sgerardnico */ 20371f916b9Sgerardnico private function findPages($namespaces = array(), $depth = 0) 20471f916b9Sgerardnico { 205325fe0c5Sgerardnico // Run as admin to overcome the fact that 206325fe0c5Sgerardnico // anonymous user cannot set all links and backlinnks 207325fe0c5Sgerardnico 208325fe0c5Sgerardnico 20971f916b9Sgerardnico global $conf; 21071f916b9Sgerardnico $datadir = $conf['datadir']; 21171f916b9Sgerardnico 212325fe0c5Sgerardnico /** 213325fe0c5Sgerardnico * Run as admin to overcome the fact that 214325fe0c5Sgerardnico * anonymous user cannot see all links and backlinnks 215325fe0c5Sgerardnico */ 216325fe0c5Sgerardnico global $USERINFO; 217325fe0c5Sgerardnico $USERINFO['grps'] = array('admin'); 218325fe0c5Sgerardnico global $INPUT; 219325fe0c5Sgerardnico $INPUT->server->set('REMOTE_USER', "cli"); 220325fe0c5Sgerardnico 22171f916b9Sgerardnico $pages = array(); 22271f916b9Sgerardnico foreach ($namespaces as $ns) { 22371f916b9Sgerardnico 22471f916b9Sgerardnico search( 22571f916b9Sgerardnico $pages, 22671f916b9Sgerardnico $datadir, 22771f916b9Sgerardnico 'search_universal', 22871f916b9Sgerardnico array( 22971f916b9Sgerardnico 'depth' => $depth, 23071f916b9Sgerardnico 'listfiles' => true, 23171f916b9Sgerardnico 'listdirs' => false, 23271f916b9Sgerardnico 'pagesonly' => true, 23371f916b9Sgerardnico 'skipacl' => true, 23471f916b9Sgerardnico 'firsthead' => false, 23571f916b9Sgerardnico 'meta' => false, 23671f916b9Sgerardnico ), 23771f916b9Sgerardnico str_replace(':', '/', $ns) 23871f916b9Sgerardnico ); 23971f916b9Sgerardnico 24071f916b9Sgerardnico // add the ns start page 24171f916b9Sgerardnico if ($ns && page_exists($ns)) { 24271f916b9Sgerardnico $pages[] = array( 24371f916b9Sgerardnico 'id' => $ns, 24471f916b9Sgerardnico 'ns' => getNS($ns), 24571f916b9Sgerardnico 'title' => p_get_first_heading($ns, false), 24671f916b9Sgerardnico 'size' => filesize(wikiFN($ns)), 24771f916b9Sgerardnico 'mtime' => filemtime(wikiFN($ns)), 24871f916b9Sgerardnico 'perm' => 16, 24971f916b9Sgerardnico 'type' => 'f', 25071f916b9Sgerardnico 'level' => 0, 25171f916b9Sgerardnico 'open' => 1, 25271f916b9Sgerardnico ); 25371f916b9Sgerardnico } 25471f916b9Sgerardnico 25571f916b9Sgerardnico } 25671f916b9Sgerardnico return $pages; 25771f916b9Sgerardnico } 25871f916b9Sgerardnico 25971f916b9Sgerardnico private function syncPages() 26071f916b9Sgerardnico { 26171f916b9Sgerardnico $sqlite = Sqlite::getSqlite(); 26271f916b9Sgerardnico $res = $sqlite->query("select ID from pages"); 26371f916b9Sgerardnico if (!$res) { 26471f916b9Sgerardnico throw new \RuntimeException("An exception has occurred with the alias selection query"); 26571f916b9Sgerardnico } 26671f916b9Sgerardnico $res2arr = $sqlite->res2arr($res); 26771f916b9Sgerardnico $sqlite->res_close($res); 26871f916b9Sgerardnico foreach ($res2arr as $row) { 26971f916b9Sgerardnico $id = $row['ID']; 27071f916b9Sgerardnico if (!page_exists($id)) { 27171f916b9Sgerardnico echo 'Page does not exist on the file system. Deleted from the database (' . $id . ")\n"; 27285e82846SNickeau Page::createPageFromId($id)->deleteInDb(); 27371f916b9Sgerardnico } 27471f916b9Sgerardnico } 27571f916b9Sgerardnico 27671f916b9Sgerardnico 27771f916b9Sgerardnico } 278007225e5Sgerardnico} 279