1*007225e5Sgerardnico<?php 2*007225e5Sgerardnico 3*007225e5Sgerardnico 4*007225e5Sgerardnicouse ComboStrap\Analytics; 5*007225e5Sgerardnicouse ComboStrap\LinkUtility; 6*007225e5Sgerardnicouse ComboStrap\LogUtility; 7*007225e5Sgerardnicouse ComboStrap\LowQualityPage; 8*007225e5Sgerardnicouse ComboStrap\Sqlite; 9*007225e5Sgerardnicouse ComboStrap\Text; 10*007225e5Sgerardnicouse ComboStrap\UrlCanonical; 11*007225e5Sgerardnicouse dokuwiki\ChangeLog\PageChangeLog; 12*007225e5Sgerardnico 13*007225e5Sgerardnicorequire_once(__DIR__ . '/../class/Text.php'); 14*007225e5Sgerardnicorequire_once(__DIR__ . '/../class/LowQualityPage.php'); 15*007225e5Sgerardnicorequire_once(__DIR__ . '/../class/Analytics.php'); 16*007225e5Sgerardnico 17*007225e5Sgerardnico 18*007225e5Sgerardnico/** 19*007225e5Sgerardnico * A analysis Renderer that exports stats/quality/metadata in a json format 20*007225e5Sgerardnico * You can export the data with 21*007225e5Sgerardnico * doku.php?id=somepage&do=export_combo_analytics 22*007225e5Sgerardnico */ 23*007225e5Sgerardnicoclass renderer_plugin_combo_analytics extends Doku_Renderer 24*007225e5Sgerardnico{ 25*007225e5Sgerardnico const DATE_CREATED = 'date_created'; 26*007225e5Sgerardnico const PLAINTEXT = 'formatted'; 27*007225e5Sgerardnico const RESULT = "result"; 28*007225e5Sgerardnico const DESCRIPTION = "description"; 29*007225e5Sgerardnico const PASSED = "Passed"; 30*007225e5Sgerardnico const FAILED = "Failed"; 31*007225e5Sgerardnico const FIXME = 'fixme'; 32*007225e5Sgerardnico 33*007225e5Sgerardnico /** 34*007225e5Sgerardnico * Rules key 35*007225e5Sgerardnico */ 36*007225e5Sgerardnico const RULE_WORDS_MINIMAL = 'words_min'; 37*007225e5Sgerardnico const RULE_OUTLINE_STRUCTURE = "outline_structure"; 38*007225e5Sgerardnico const RULE_INTERNAL_BACKLINKS_MIN = 'internal_backlinks_min'; 39*007225e5Sgerardnico const RULE_WORDS_MAXIMAL = "words_max"; 40*007225e5Sgerardnico const RULE_AVERAGE_WORDS_BY_SECTION_MIN = 'words_by_section_avg_min'; 41*007225e5Sgerardnico const RULE_AVERAGE_WORDS_BY_SECTION_MAX = 'words_by_section_avg_max'; 42*007225e5Sgerardnico const RULE_INTERNAL_LINKS_MIN = 'internal_links_min'; 43*007225e5Sgerardnico const RULE_INTERNAL_BROKEN_LINKS_MAX = 'internal_links_broken_max'; 44*007225e5Sgerardnico const RULE_DESCRIPTION_PRESENT = 'description_present'; 45*007225e5Sgerardnico const RULE_FIXME = "fixme_min"; 46*007225e5Sgerardnico const RULE_TITLE_PRESENT = "title_present"; 47*007225e5Sgerardnico const RULE_CANONICAL_PRESENT = "canonical_present"; 48*007225e5Sgerardnico 49*007225e5Sgerardnico /** 50*007225e5Sgerardnico * The default man 51*007225e5Sgerardnico */ 52*007225e5Sgerardnico const CONF_MANDATORY_QUALITY_RULES_DEFAULT_VALUE = [ 53*007225e5Sgerardnico self::RULE_WORDS_MINIMAL, 54*007225e5Sgerardnico self::RULE_INTERNAL_BACKLINKS_MIN, 55*007225e5Sgerardnico self::RULE_INTERNAL_LINKS_MIN 56*007225e5Sgerardnico ]; 57*007225e5Sgerardnico const CONF_MANDATORY_QUALITY_RULES = "mandatoryQualityRules"; 58*007225e5Sgerardnico 59*007225e5Sgerardnico /** 60*007225e5Sgerardnico * Quality Score factors 61*007225e5Sgerardnico * They are used to calculate the score 62*007225e5Sgerardnico */ 63*007225e5Sgerardnico const CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR = 'qualityScoreInternalBacklinksFactor'; 64*007225e5Sgerardnico const CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR = 'qualityScoreInternalLinksFactor'; 65*007225e5Sgerardnico const CONF_QUALITY_SCORE_TITLE_PRESENT = 'qualityScoreTitlePresent'; 66*007225e5Sgerardnico const CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE = 'qualityScoreCorrectOutline'; 67*007225e5Sgerardnico const CONF_QUALITY_SCORE_CORRECT_CONTENT = 'qualityScoreCorrectContentLength'; 68*007225e5Sgerardnico const CONF_QUALITY_SCORE_NO_FIXME = 'qualityScoreNoFixMe'; 69*007225e5Sgerardnico const CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE = 'qualityScoreCorrectWordSectionAvg'; 70*007225e5Sgerardnico const CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR = 'qualityScoreNoBrokenLinks'; 71*007225e5Sgerardnico const CONF_QUALITY_SCORE_CHANGES_FACTOR = 'qualityScoreChangesFactor'; 72*007225e5Sgerardnico const CONF_QUALITY_SCORE_DESCRIPTION_PRESENT = 'qualityScoreDescriptionPresent'; 73*007225e5Sgerardnico const CONF_QUALITY_SCORE_CANONICAL_PRESENT = 'qualityScoreCanonicalPresent'; 74*007225e5Sgerardnico 75*007225e5Sgerardnico 76*007225e5Sgerardnico /** 77*007225e5Sgerardnico * The processing data 78*007225e5Sgerardnico * that should be {@link renderer_plugin_combo_analysis::reset()} 79*007225e5Sgerardnico */ 80*007225e5Sgerardnico public $stats = array(); // the stats 81*007225e5Sgerardnico protected $metadata = array(); // the metadata 82*007225e5Sgerardnico protected $headerId = 0; // the id of the header on the page (first, second, ...) 83*007225e5Sgerardnico 84*007225e5Sgerardnico /** 85*007225e5Sgerardnico * Don't known this variable ? 86*007225e5Sgerardnico */ 87*007225e5Sgerardnico protected $quotelevel = 0; 88*007225e5Sgerardnico protected $formattingBracket = 0; 89*007225e5Sgerardnico protected $tableopen = false; 90*007225e5Sgerardnico private $plainTextId = 0; 91*007225e5Sgerardnico 92*007225e5Sgerardnico 93*007225e5Sgerardnico /** 94*007225e5Sgerardnico * Here the score is calculated 95*007225e5Sgerardnico */ 96*007225e5Sgerardnico public function document_end() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 97*007225e5Sgerardnico { 98*007225e5Sgerardnico /** 99*007225e5Sgerardnico * The metadata 100*007225e5Sgerardnico */ 101*007225e5Sgerardnico global $ID; 102*007225e5Sgerardnico $meta = p_get_metadata($ID); 103*007225e5Sgerardnico 104*007225e5Sgerardnico /** 105*007225e5Sgerardnico * Word and chars count 106*007225e5Sgerardnico * The word count does not take into account 107*007225e5Sgerardnico * words with non-words characters such as < = 108*007225e5Sgerardnico * Therefore the node and attribute are not taken in the count 109*007225e5Sgerardnico */ 110*007225e5Sgerardnico $text = rawWiki($ID); 111*007225e5Sgerardnico $this->stats[Analytics::CHARS_COUNT] = strlen($text); 112*007225e5Sgerardnico $this->stats[Analytics::WORDS_COUNT] = Text::getWordCount($text); 113*007225e5Sgerardnico 114*007225e5Sgerardnico /** 115*007225e5Sgerardnico * The exported object 116*007225e5Sgerardnico */ 117*007225e5Sgerardnico $statExport = $this->stats; 118*007225e5Sgerardnico 119*007225e5Sgerardnico 120*007225e5Sgerardnico /** 121*007225e5Sgerardnico * Internal link distance summary calculation 122*007225e5Sgerardnico */ 123*007225e5Sgerardnico if (array_key_exists(Analytics::INTERNAL_LINK_DISTANCE, $statExport)) { 124*007225e5Sgerardnico $linkLengths = $statExport[Analytics::INTERNAL_LINK_DISTANCE]; 125*007225e5Sgerardnico unset($statExport[Analytics::INTERNAL_LINK_DISTANCE]); 126*007225e5Sgerardnico $countBacklinks = count($linkLengths); 127*007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = null; 128*007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = null; 129*007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = null; 130*007225e5Sgerardnico if ($countBacklinks > 0) { 131*007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = array_sum($linkLengths) / $countBacklinks; 132*007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = max($linkLengths); 133*007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = min($linkLengths); 134*007225e5Sgerardnico } 135*007225e5Sgerardnico } 136*007225e5Sgerardnico 137*007225e5Sgerardnico /** 138*007225e5Sgerardnico * Quality Report / Rules 139*007225e5Sgerardnico */ 140*007225e5Sgerardnico // The array that hold the results of the quality rules 141*007225e5Sgerardnico $ruleResults = array(); 142*007225e5Sgerardnico // The array that hold the quality score details 143*007225e5Sgerardnico $qualityScores = array(); 144*007225e5Sgerardnico 145*007225e5Sgerardnico 146*007225e5Sgerardnico /** 147*007225e5Sgerardnico * No fixme 148*007225e5Sgerardnico */ 149*007225e5Sgerardnico $fixmeCount = $this->stats[self::FIXME]; 150*007225e5Sgerardnico $statExport[self::FIXME] = $fixmeCount == null ? 0 : $fixmeCount; 151*007225e5Sgerardnico if ($fixmeCount != 0) { 152*007225e5Sgerardnico $ruleResults[self::RULE_FIXME] = self::FAILED; 153*007225e5Sgerardnico $qualityScores['no_' . self::FIXME] = 0; 154*007225e5Sgerardnico } else { 155*007225e5Sgerardnico $ruleResults[self::RULE_FIXME] = self::PASSED; 156*007225e5Sgerardnico $qualityScores['no_' . self::FIXME] = $this->getConf(self::CONF_QUALITY_SCORE_NO_FIXME, 1);; 157*007225e5Sgerardnico } 158*007225e5Sgerardnico 159*007225e5Sgerardnico /** 160*007225e5Sgerardnico * A title should be present 161*007225e5Sgerardnico */ 162*007225e5Sgerardnico if (empty($this->metadata[Analytics::TITLE])) { 163*007225e5Sgerardnico $ruleResults[self::RULE_TITLE_PRESENT] = self::FAILED; 164*007225e5Sgerardnico $ruleInfo[self::RULE_TITLE_PRESENT] = "A title is not present in the frontmatter"; 165*007225e5Sgerardnico $this->metadata[Analytics::TITLE] = $meta[Analytics::TITLE]; 166*007225e5Sgerardnico $qualityScores[self::RULE_TITLE_PRESENT] = 0; 167*007225e5Sgerardnico } else { 168*007225e5Sgerardnico $qualityScores[self::RULE_TITLE_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_TITLE_PRESENT, 10);; 169*007225e5Sgerardnico $ruleResults[self::RULE_TITLE_PRESENT] = self::PASSED; 170*007225e5Sgerardnico } 171*007225e5Sgerardnico 172*007225e5Sgerardnico /** 173*007225e5Sgerardnico * A description should be present 174*007225e5Sgerardnico */ 175*007225e5Sgerardnico if (empty($this->metadata[self::DESCRIPTION])) { 176*007225e5Sgerardnico $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::FAILED; 177*007225e5Sgerardnico $ruleInfo[self::RULE_CANONICAL_PRESENT] = "A description is not present in the frontmatter"; 178*007225e5Sgerardnico $this->metadata[self::DESCRIPTION] = $meta[self::DESCRIPTION]["abstract"]; 179*007225e5Sgerardnico $qualityScores[self::RULE_DESCRIPTION_PRESENT] = 0; 180*007225e5Sgerardnico } else { 181*007225e5Sgerardnico $qualityScores[self::RULE_DESCRIPTION_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_DESCRIPTION_PRESENT, 8);; 182*007225e5Sgerardnico $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::PASSED; 183*007225e5Sgerardnico } 184*007225e5Sgerardnico 185*007225e5Sgerardnico /** 186*007225e5Sgerardnico * A canonical should be present 187*007225e5Sgerardnico */ 188*007225e5Sgerardnico if (empty($this->metadata[UrlCanonical::CANONICAL_PROPERTY])) { 189*007225e5Sgerardnico $qualityScores[self::RULE_CANONICAL_PRESENT] = 0; 190*007225e5Sgerardnico $ruleResults[self::RULE_CANONICAL_PRESENT] = self::FAILED; 191*007225e5Sgerardnico $ruleInfo[self::RULE_CANONICAL_PRESENT] = "A canonical is not present in the frontmatter"; 192*007225e5Sgerardnico } else { 193*007225e5Sgerardnico $qualityScores[self::RULE_CANONICAL_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_CANONICAL_PRESENT, 5);; 194*007225e5Sgerardnico $ruleResults[self::RULE_CANONICAL_PRESENT] = self::PASSED; 195*007225e5Sgerardnico } 196*007225e5Sgerardnico 197*007225e5Sgerardnico /** 198*007225e5Sgerardnico * Outline / Header structure 199*007225e5Sgerardnico */ 200*007225e5Sgerardnico $treeError = 0; 201*007225e5Sgerardnico $headersCount = 0; 202*007225e5Sgerardnico if (array_key_exists(Analytics::HEADER_POSITION, $this->stats)) { 203*007225e5Sgerardnico $headersCount = count($this->stats[Analytics::HEADER_POSITION]); 204*007225e5Sgerardnico unset($statExport[Analytics::HEADER_POSITION]); 205*007225e5Sgerardnico for ($i = 1; $i < $headersCount; $i++) { 206*007225e5Sgerardnico $currentHeaderLevel = $this->stats['header_struct'][$i]; 207*007225e5Sgerardnico $previousHeaderLevel = $this->stats['header_struct'][$i - 1]; 208*007225e5Sgerardnico if ($currentHeaderLevel - $previousHeaderLevel > 1) { 209*007225e5Sgerardnico $treeError += 1; 210*007225e5Sgerardnico $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "The " . $i . " header (h" . $currentHeaderLevel . ") has a level bigger than its precedent (" . $previousHeaderLevel . ")"; 211*007225e5Sgerardnico } 212*007225e5Sgerardnico } 213*007225e5Sgerardnico } 214*007225e5Sgerardnico if ($treeError > 0 || $headersCount == 0) { 215*007225e5Sgerardnico $qualityScores['correct_outline'] = 0; 216*007225e5Sgerardnico $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::FAILED; 217*007225e5Sgerardnico if ($headersCount==0){ 218*007225e5Sgerardnico $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "There is no header"; 219*007225e5Sgerardnico } 220*007225e5Sgerardnico } else { 221*007225e5Sgerardnico $qualityScores['correct_outline'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE, 3); 222*007225e5Sgerardnico $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::PASSED; 223*007225e5Sgerardnico } 224*007225e5Sgerardnico 225*007225e5Sgerardnico 226*007225e5Sgerardnico /** 227*007225e5Sgerardnico * Document length 228*007225e5Sgerardnico */ 229*007225e5Sgerardnico $minimalWordCount = 50; 230*007225e5Sgerardnico $maximalWordCount = 1500; 231*007225e5Sgerardnico $correctContentLength = true; 232*007225e5Sgerardnico if ($this->stats[Analytics::WORDS_COUNT] < $minimalWordCount) { 233*007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MINIMAL] = self::FAILED; 234*007225e5Sgerardnico $correctContentLength = false; 235*007225e5Sgerardnico $ruleInfo[self::RULE_WORDS_MINIMAL] = "The number of words is less than {$minimalWordCount}"; 236*007225e5Sgerardnico } else { 237*007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MINIMAL] = self::PASSED; 238*007225e5Sgerardnico } 239*007225e5Sgerardnico if ($this->stats[Analytics::WORDS_COUNT] > $maximalWordCount) { 240*007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MAXIMAL] = self::FAILED; 241*007225e5Sgerardnico $ruleInfo[self::RULE_WORDS_MAXIMAL] = "The number of words is more than {$maximalWordCount}"; 242*007225e5Sgerardnico $correctContentLength = false; 243*007225e5Sgerardnico } else { 244*007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MAXIMAL] = self::PASSED; 245*007225e5Sgerardnico } 246*007225e5Sgerardnico if ($correctContentLength) { 247*007225e5Sgerardnico $qualityScores['correct_content_length'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_CONTENT, 10); 248*007225e5Sgerardnico } else { 249*007225e5Sgerardnico $qualityScores['correct_content_length'] = 0; 250*007225e5Sgerardnico } 251*007225e5Sgerardnico 252*007225e5Sgerardnico 253*007225e5Sgerardnico /** 254*007225e5Sgerardnico * Average Number of words by header section to text ratio 255*007225e5Sgerardnico */ 256*007225e5Sgerardnico $headers = $this->stats[Analytics::HEADERS_COUNT]; 257*007225e5Sgerardnico if ($headers != null) { 258*007225e5Sgerardnico $headerCount = array_sum($headers); 259*007225e5Sgerardnico $headerCount--; // h1 is supposed to have no words 260*007225e5Sgerardnico if ($headerCount > 0) { 261*007225e5Sgerardnico 262*007225e5Sgerardnico $avgWordsCountBySection = round($this->stats[Analytics::WORDS_COUNT] / $headerCount); 263*007225e5Sgerardnico $statExport['word_section_count']['avg'] = $avgWordsCountBySection; 264*007225e5Sgerardnico 265*007225e5Sgerardnico /** 266*007225e5Sgerardnico * Min words by header section 267*007225e5Sgerardnico */ 268*007225e5Sgerardnico $wordsByHeaderMin = 20; 269*007225e5Sgerardnico /** 270*007225e5Sgerardnico * Max words by header section 271*007225e5Sgerardnico */ 272*007225e5Sgerardnico $wordsByHeaderMax = 300; 273*007225e5Sgerardnico $correctAverageWordsBySection = true; 274*007225e5Sgerardnico if ($avgWordsCountBySection < $wordsByHeaderMin) { 275*007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::FAILED; 276*007225e5Sgerardnico $correctAverageWordsBySection = false; 277*007225e5Sgerardnico $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = "The number of words by section is less than {$wordsByHeaderMin}"; 278*007225e5Sgerardnico } else { 279*007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::PASSED; 280*007225e5Sgerardnico } 281*007225e5Sgerardnico if ($avgWordsCountBySection > $wordsByHeaderMax) { 282*007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::FAILED; 283*007225e5Sgerardnico $correctAverageWordsBySection = false; 284*007225e5Sgerardnico $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = "The number of words by section is more than {$wordsByHeaderMax}"; 285*007225e5Sgerardnico } else { 286*007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::PASSED; 287*007225e5Sgerardnico } 288*007225e5Sgerardnico if ($correctAverageWordsBySection) { 289*007225e5Sgerardnico $qualityScores['correct_word_avg_by_section'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE, 10); 290*007225e5Sgerardnico } else { 291*007225e5Sgerardnico $qualityScores['correct_word_avg_by_section'] = 0; 292*007225e5Sgerardnico } 293*007225e5Sgerardnico 294*007225e5Sgerardnico } 295*007225e5Sgerardnico } 296*007225e5Sgerardnico 297*007225e5Sgerardnico /** 298*007225e5Sgerardnico * Internal Backlinks rule 299*007225e5Sgerardnico * 300*007225e5Sgerardnico * If a page is a low quality page, if the process run 301*007225e5Sgerardnico * anonymous, we will not see all {@link ft_backlinks()} 302*007225e5Sgerardnico * we use then the index directly to avoid confusion 303*007225e5Sgerardnico */ 304*007225e5Sgerardnico $backlinks = idx_get_indexer()->lookupKey('relation_references', $ID); 305*007225e5Sgerardnico $countBacklinks = count($backlinks); 306*007225e5Sgerardnico $statExport[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks; 307*007225e5Sgerardnico if ($countBacklinks == 0) { 308*007225e5Sgerardnico $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = 0; 309*007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::FAILED; 310*007225e5Sgerardnico $ruleInfo[self::RULE_INTERNAL_BACKLINKS_MIN] = "There is no backlinks"; 311*007225e5Sgerardnico } else { 312*007225e5Sgerardnico $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks * $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR, 1); 313*007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::PASSED; 314*007225e5Sgerardnico } 315*007225e5Sgerardnico 316*007225e5Sgerardnico /** 317*007225e5Sgerardnico * Internal links 318*007225e5Sgerardnico */ 319*007225e5Sgerardnico $internalLinksCount = $this->stats[Analytics::INTERNAL_LINKS_COUNT]; 320*007225e5Sgerardnico if ($internalLinksCount == 0) { 321*007225e5Sgerardnico $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = 0; 322*007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::FAILED; 323*007225e5Sgerardnico $ruleInfo[self::RULE_INTERNAL_BACKLINKS_MIN] = "There is no internal links"; 324*007225e5Sgerardnico } else { 325*007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::PASSED; 326*007225e5Sgerardnico $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = $countBacklinks * $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR, 1);; 327*007225e5Sgerardnico } 328*007225e5Sgerardnico 329*007225e5Sgerardnico /** 330*007225e5Sgerardnico * Broken Links 331*007225e5Sgerardnico */ 332*007225e5Sgerardnico $brokenLinksCount = $this->stats[Analytics::INTERNAL_LINKS_BROKEN_COUNT]; 333*007225e5Sgerardnico if ($brokenLinksCount > 2) { 334*007225e5Sgerardnico $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = 0; 335*007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::FAILED; 336*007225e5Sgerardnico $ruleInfo[self::RULE_INTERNAL_BACKLINKS_MIN] = "There is {$brokenLinksCount} broken links"; 337*007225e5Sgerardnico } else { 338*007225e5Sgerardnico $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR, 2);;; 339*007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::PASSED; 340*007225e5Sgerardnico } 341*007225e5Sgerardnico 342*007225e5Sgerardnico /** 343*007225e5Sgerardnico * Changes, the more changes the better 344*007225e5Sgerardnico */ 345*007225e5Sgerardnico $qualityScores[Analytics::EDITS_COUNT] = $this->stats[Analytics::EDITS_COUNT] * $this->getConf(self::CONF_QUALITY_SCORE_CHANGES_FACTOR, 0.25);;; 346*007225e5Sgerardnico 347*007225e5Sgerardnico 348*007225e5Sgerardnico /** 349*007225e5Sgerardnico * Rules that comes from the qc plugin 350*007225e5Sgerardnico * but are not yet fully implemented 351*007225e5Sgerardnico */ 352*007225e5Sgerardnico 353*007225e5Sgerardnico// // 2 points for lot's of formatting 354*007225e5Sgerardnico// if ($this->stats[self::PLAINTEXT] && $this->stats['chars'] / $this->stats[self::PLAINTEXT] < 3) { 355*007225e5Sgerardnico// $ruleResults['manyformat'] = 2; 356*007225e5Sgerardnico// } 357*007225e5Sgerardnico// 358*007225e5Sgerardnico// // 1/2 points for deeply nested quotations 359*007225e5Sgerardnico// if ($this->stats['quote_nest'] > 2) { 360*007225e5Sgerardnico// $ruleResults['deepquote'] += $this->stats['quote_nest'] / 2; 361*007225e5Sgerardnico// } 362*007225e5Sgerardnico// 363*007225e5Sgerardnico// // 1/2 points for too many hr 364*007225e5Sgerardnico// if ($this->stats['hr'] > 2) { 365*007225e5Sgerardnico// $ruleResults['manyhr'] = ($this->stats['hr'] - 2) / 2; 366*007225e5Sgerardnico// } 367*007225e5Sgerardnico// 368*007225e5Sgerardnico// // 1 point for too many line breaks 369*007225e5Sgerardnico// if ($this->stats['linebreak'] > 2) { 370*007225e5Sgerardnico// $ruleResults['manybr'] = $this->stats['linebreak'] - 2; 371*007225e5Sgerardnico// } 372*007225e5Sgerardnico// 373*007225e5Sgerardnico// // 1 point for single author only 374*007225e5Sgerardnico// if (!$this->getConf('single_author_only') && count($this->stats['authors']) == 1) { 375*007225e5Sgerardnico// $ruleResults['singleauthor'] = 1; 376*007225e5Sgerardnico// } 377*007225e5Sgerardnico 378*007225e5Sgerardnico // Too much cdata (plaintext), see cdata 379*007225e5Sgerardnico // if ($len > 500) $statExport[self::QUALITY][self::ERROR]['plaintext']++; 380*007225e5Sgerardnico // if ($len > 500) $statExport[self::QUALITY][self::ERROR]['plaintext']++; 381*007225e5Sgerardnico // 382*007225e5Sgerardnico // // 1 point for formattings longer than 500 chars 383*007225e5Sgerardnico // $statExport[self::QUALITY][self::ERROR]['multiformat'] 384*007225e5Sgerardnico 385*007225e5Sgerardnico /** 386*007225e5Sgerardnico * Quality Score 387*007225e5Sgerardnico */ 388*007225e5Sgerardnico ksort($qualityScores); 389*007225e5Sgerardnico $qualityScoring = array(); 390*007225e5Sgerardnico $qualityScoring["score"] = array_sum($qualityScores); 391*007225e5Sgerardnico $qualityScoring["scores"] = $qualityScores; 392*007225e5Sgerardnico 393*007225e5Sgerardnico 394*007225e5Sgerardnico /** 395*007225e5Sgerardnico * The rule that if broken will set the quality level to low 396*007225e5Sgerardnico */ 397*007225e5Sgerardnico $brokenRules = array(); 398*007225e5Sgerardnico foreach ($ruleResults as $ruleName => $ruleResult) { 399*007225e5Sgerardnico if ($ruleResult == self::FAILED) { 400*007225e5Sgerardnico $brokenRules[] = $ruleName; 401*007225e5Sgerardnico } 402*007225e5Sgerardnico } 403*007225e5Sgerardnico $ruleErrorCount = sizeof($brokenRules); 404*007225e5Sgerardnico if ($ruleErrorCount > 0) { 405*007225e5Sgerardnico $qualityResult = $ruleErrorCount . " quality rules errors"; 406*007225e5Sgerardnico } else { 407*007225e5Sgerardnico $qualityResult = "All quality rules passed"; 408*007225e5Sgerardnico } 409*007225e5Sgerardnico 410*007225e5Sgerardnico /** 411*007225e5Sgerardnico * Low level 412*007225e5Sgerardnico */ 413*007225e5Sgerardnico $mandatoryRules = preg_split("/,/", $this->getConf(self::CONF_MANDATORY_QUALITY_RULES)); 414*007225e5Sgerardnico $mandatoryRulesBroken = []; 415*007225e5Sgerardnico foreach ($mandatoryRules as $lowLevelRule) { 416*007225e5Sgerardnico if (in_array($lowLevelRule, $brokenRules)) { 417*007225e5Sgerardnico $mandatoryRulesBroken[] = $lowLevelRule; 418*007225e5Sgerardnico } 419*007225e5Sgerardnico } 420*007225e5Sgerardnico $lowLevel = false; 421*007225e5Sgerardnico if (sizeof($mandatoryRulesBroken) > 0) { 422*007225e5Sgerardnico $lowLevel = true; 423*007225e5Sgerardnico } 424*007225e5Sgerardnico LowQualityPage::setLowQualityPage($ID, $lowLevel); 425*007225e5Sgerardnico 426*007225e5Sgerardnico /** 427*007225e5Sgerardnico * Building the quality object in order 428*007225e5Sgerardnico */ 429*007225e5Sgerardnico $quality["low"] = $lowLevel; 430*007225e5Sgerardnico if (sizeof($mandatoryRulesBroken) > 0) { 431*007225e5Sgerardnico ksort($mandatoryRulesBroken); 432*007225e5Sgerardnico $quality['failed_mandatory_rules'] = $mandatoryRulesBroken; 433*007225e5Sgerardnico } 434*007225e5Sgerardnico $quality["scoring"] = $qualityScoring; 435*007225e5Sgerardnico $quality["rules"][self::RESULT] = $qualityResult; 436*007225e5Sgerardnico if (!empty($ruleInfo)) { 437*007225e5Sgerardnico $quality["rules"]["info"] = $ruleInfo; 438*007225e5Sgerardnico } 439*007225e5Sgerardnico 440*007225e5Sgerardnico ksort($ruleResults); 441*007225e5Sgerardnico $quality["rules"]['details'] = $ruleResults; 442*007225e5Sgerardnico 443*007225e5Sgerardnico /** 444*007225e5Sgerardnico * Metadata 445*007225e5Sgerardnico */ 446*007225e5Sgerardnico $this->metadata[Analytics::TITLE] = $meta['title']; 447*007225e5Sgerardnico $timestampCreation = $meta['date']['created']; 448*007225e5Sgerardnico $this->metadata[self::DATE_CREATED] = date('Y-m-d h:i:s', $timestampCreation); 449*007225e5Sgerardnico $timestampModification = $meta['date']['modified']; 450*007225e5Sgerardnico $this->metadata[Analytics::DATE_MODIFIED] = date('Y-m-d h:i:s', $timestampModification); 451*007225e5Sgerardnico $this->metadata['age_creation'] = round((time() - $timestampCreation) / 60 / 60 / 24); 452*007225e5Sgerardnico $this->metadata['age_modification'] = round((time() - $timestampModification) / 60 / 60 / 24); 453*007225e5Sgerardnico 454*007225e5Sgerardnico 455*007225e5Sgerardnico // get author info 456*007225e5Sgerardnico $changelog = new PageChangeLog($ID); 457*007225e5Sgerardnico $revs = $changelog->getRevisions(0, 10000); 458*007225e5Sgerardnico array_push($revs, $meta['last_change']['date']); 459*007225e5Sgerardnico $this->stats[Analytics::EDITS_COUNT] = count($revs); 460*007225e5Sgerardnico foreach ($revs as $rev) { 461*007225e5Sgerardnico $info = $changelog->getRevisionInfo($rev); 462*007225e5Sgerardnico if ($info['user']) { 463*007225e5Sgerardnico $this->stats['authors'][$info['user']] += 1; 464*007225e5Sgerardnico } else { 465*007225e5Sgerardnico $this->stats['authors']['*'] += 1; 466*007225e5Sgerardnico } 467*007225e5Sgerardnico } 468*007225e5Sgerardnico 469*007225e5Sgerardnico /** 470*007225e5Sgerardnico * Building the Top JSON in order 471*007225e5Sgerardnico */ 472*007225e5Sgerardnico global $ID; 473*007225e5Sgerardnico $json = array(); 474*007225e5Sgerardnico $json["id"] = $ID; 475*007225e5Sgerardnico $json['metadata'] = $this->metadata; 476*007225e5Sgerardnico ksort($statExport); 477*007225e5Sgerardnico $json[Analytics::STATISTICS] = $statExport; 478*007225e5Sgerardnico $json[Analytics::QUALITY] = $quality; // Quality after the sort to get them at the end 479*007225e5Sgerardnico 480*007225e5Sgerardnico 481*007225e5Sgerardnico /** 482*007225e5Sgerardnico * The result can be seen with 483*007225e5Sgerardnico * doku.php?id=somepage&do=export_combo_analysis 484*007225e5Sgerardnico */ 485*007225e5Sgerardnico /** 486*007225e5Sgerardnico * Set the header for the export.php file 487*007225e5Sgerardnico */ 488*007225e5Sgerardnico p_set_metadata($ID, array("format" => 489*007225e5Sgerardnico array("combo_" . $this->getPluginComponent() => array("Content-Type" => 'application/json')) 490*007225e5Sgerardnico )); 491*007225e5Sgerardnico $json_encoded = json_encode($json, JSON_PRETTY_PRINT); 492*007225e5Sgerardnico 493*007225e5Sgerardnico $sqlite = Sqlite::getSqlite(); 494*007225e5Sgerardnico if ($sqlite != null) { 495*007225e5Sgerardnico /** 496*007225e5Sgerardnico * Sqlite Plugin installed 497*007225e5Sgerardnico */ 498*007225e5Sgerardnico $canonical = $this->metadata[UrlCanonical::CANONICAL_PROPERTY]; 499*007225e5Sgerardnico if (empty($canonical)) { 500*007225e5Sgerardnico $canonical = $ID; // not null constraint unfortunately 501*007225e5Sgerardnico } 502*007225e5Sgerardnico $entry = array( 503*007225e5Sgerardnico 'CANONICAL' => $canonical, 504*007225e5Sgerardnico 'ANALYTICS' => $json_encoded, 505*007225e5Sgerardnico 'ID' => $ID 506*007225e5Sgerardnico ); 507*007225e5Sgerardnico $res = $sqlite->query("SELECT count(*) FROM PAGES where ID = ?", $ID); 508*007225e5Sgerardnico if ($sqlite->res2single($res) == 1) { 509*007225e5Sgerardnico // Upset not supported on all version 510*007225e5Sgerardnico //$upsert = 'insert into PAGES (ID,CANONICAL,ANALYTICS) values (?,?,?) on conflict (ID,CANONICAL) do update set ANALYTICS = EXCLUDED.ANALYTICS'; 511*007225e5Sgerardnico $update = 'update PAGES SET CANONICAL = ?, ANALYTICS = ? where ID=?'; 512*007225e5Sgerardnico $res = $sqlite->query($update, $entry); 513*007225e5Sgerardnico } else { 514*007225e5Sgerardnico $res = $sqlite->storeEntry('PAGES', $entry); 515*007225e5Sgerardnico } 516*007225e5Sgerardnico if (!$res) { 517*007225e5Sgerardnico LogUtility::msg("There was a problem during the upsert: {$sqlite->getAdapter()->getDb()->errorInfo()}"); 518*007225e5Sgerardnico } 519*007225e5Sgerardnico $sqlite->res_close($res); 520*007225e5Sgerardnico } 521*007225e5Sgerardnico $this->doc .= $json_encoded; 522*007225e5Sgerardnico 523*007225e5Sgerardnico } 524*007225e5Sgerardnico 525*007225e5Sgerardnico /** 526*007225e5Sgerardnico */ 527*007225e5Sgerardnico public function getFormat() 528*007225e5Sgerardnico { 529*007225e5Sgerardnico return Analytics::RENDERER_FORMAT; 530*007225e5Sgerardnico } 531*007225e5Sgerardnico 532*007225e5Sgerardnico public function internallink($id, $name = null, $search = null, $returnonly = false, $linktype = 'content') 533*007225e5Sgerardnico { 534*007225e5Sgerardnico 535*007225e5Sgerardnico LinkUtility::processInternalLinkStats($id, $this->stats); 536*007225e5Sgerardnico 537*007225e5Sgerardnico } 538*007225e5Sgerardnico 539*007225e5Sgerardnico public function externallink($url, $name = null) 540*007225e5Sgerardnico { 541*007225e5Sgerardnico $this->stats[Analytics::EXTERNAL_LINKS_COUNT]++; 542*007225e5Sgerardnico } 543*007225e5Sgerardnico 544*007225e5Sgerardnico public function header($text, $level, $pos) 545*007225e5Sgerardnico { 546*007225e5Sgerardnico $this->stats[Analytics::HEADERS_COUNT]['h' . $level]++; 547*007225e5Sgerardnico $this->headerId++; 548*007225e5Sgerardnico $this->stats[Analytics::HEADER_POSITION][$this->headerId] = 'h' . $level; 549*007225e5Sgerardnico 550*007225e5Sgerardnico } 551*007225e5Sgerardnico 552*007225e5Sgerardnico public function smiley($smiley) 553*007225e5Sgerardnico { 554*007225e5Sgerardnico if ($smiley == 'FIXME') $this->stats[self::FIXME]++; 555*007225e5Sgerardnico } 556*007225e5Sgerardnico 557*007225e5Sgerardnico public function linebreak() 558*007225e5Sgerardnico { 559*007225e5Sgerardnico if (!$this->tableopen) { 560*007225e5Sgerardnico $this->stats['linebreak']++; 561*007225e5Sgerardnico } 562*007225e5Sgerardnico } 563*007225e5Sgerardnico 564*007225e5Sgerardnico public function table_open($maxcols = null, $numrows = null, $pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 565*007225e5Sgerardnico { 566*007225e5Sgerardnico $this->tableopen = true; 567*007225e5Sgerardnico } 568*007225e5Sgerardnico 569*007225e5Sgerardnico public function table_close($pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 570*007225e5Sgerardnico { 571*007225e5Sgerardnico $this->tableopen = false; 572*007225e5Sgerardnico } 573*007225e5Sgerardnico 574*007225e5Sgerardnico public function hr() 575*007225e5Sgerardnico { 576*007225e5Sgerardnico $this->stats['hr']++; 577*007225e5Sgerardnico } 578*007225e5Sgerardnico 579*007225e5Sgerardnico public function quote_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 580*007225e5Sgerardnico { 581*007225e5Sgerardnico $this->stats['quote_count']++; 582*007225e5Sgerardnico $this->quotelevel++; 583*007225e5Sgerardnico $this->stats['quote_nest'] = max($this->quotelevel, $this->stats['quote_nest']); 584*007225e5Sgerardnico } 585*007225e5Sgerardnico 586*007225e5Sgerardnico public function quote_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 587*007225e5Sgerardnico { 588*007225e5Sgerardnico $this->quotelevel--; 589*007225e5Sgerardnico } 590*007225e5Sgerardnico 591*007225e5Sgerardnico public function strong_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 592*007225e5Sgerardnico { 593*007225e5Sgerardnico $this->formattingBracket++; 594*007225e5Sgerardnico } 595*007225e5Sgerardnico 596*007225e5Sgerardnico public function strong_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 597*007225e5Sgerardnico { 598*007225e5Sgerardnico $this->formattingBracket--; 599*007225e5Sgerardnico } 600*007225e5Sgerardnico 601*007225e5Sgerardnico public function emphasis_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 602*007225e5Sgerardnico { 603*007225e5Sgerardnico $this->formattingBracket++; 604*007225e5Sgerardnico } 605*007225e5Sgerardnico 606*007225e5Sgerardnico public function emphasis_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 607*007225e5Sgerardnico { 608*007225e5Sgerardnico $this->formattingBracket--; 609*007225e5Sgerardnico } 610*007225e5Sgerardnico 611*007225e5Sgerardnico public function underline_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 612*007225e5Sgerardnico { 613*007225e5Sgerardnico $this->formattingBracket++; 614*007225e5Sgerardnico } 615*007225e5Sgerardnico 616*007225e5Sgerardnico public function underline_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 617*007225e5Sgerardnico { 618*007225e5Sgerardnico $this->formattingBracket--; 619*007225e5Sgerardnico } 620*007225e5Sgerardnico 621*007225e5Sgerardnico public function cdata($text) 622*007225e5Sgerardnico { 623*007225e5Sgerardnico 624*007225e5Sgerardnico /** 625*007225e5Sgerardnico * It seems that you receive cdata 626*007225e5Sgerardnico * when emphasis_open / underline_open / strong_open 627*007225e5Sgerardnico * Stats are not for them 628*007225e5Sgerardnico */ 629*007225e5Sgerardnico if (!$this->formattingBracket) return; 630*007225e5Sgerardnico 631*007225e5Sgerardnico $this->plainTextId++; 632*007225e5Sgerardnico 633*007225e5Sgerardnico /** 634*007225e5Sgerardnico * Length 635*007225e5Sgerardnico */ 636*007225e5Sgerardnico $len = strlen($text); 637*007225e5Sgerardnico $this->stats[self::PLAINTEXT][$this->plainTextId]['len'] = $len; 638*007225e5Sgerardnico 639*007225e5Sgerardnico 640*007225e5Sgerardnico /** 641*007225e5Sgerardnico * Multi-formatting 642*007225e5Sgerardnico */ 643*007225e5Sgerardnico if ($this->formattingBracket > 1) { 644*007225e5Sgerardnico $numberOfFormats = 1 * ($this->formattingBracket - 1); 645*007225e5Sgerardnico $this->stats[self::PLAINTEXT][$this->plainTextId]['multiformat'] += $numberOfFormats; 646*007225e5Sgerardnico } 647*007225e5Sgerardnico 648*007225e5Sgerardnico /** 649*007225e5Sgerardnico * Total 650*007225e5Sgerardnico */ 651*007225e5Sgerardnico $this->stats[self::PLAINTEXT][0] += $len; 652*007225e5Sgerardnico } 653*007225e5Sgerardnico 654*007225e5Sgerardnico public function internalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null) 655*007225e5Sgerardnico { 656*007225e5Sgerardnico $this->stats[Analytics::INTERNAL_MEDIAS_COUNT]++; 657*007225e5Sgerardnico } 658*007225e5Sgerardnico 659*007225e5Sgerardnico public function externalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null) 660*007225e5Sgerardnico { 661*007225e5Sgerardnico $this->stats[Analytics::EXTERNAL_MEDIAS]++; 662*007225e5Sgerardnico } 663*007225e5Sgerardnico 664*007225e5Sgerardnico public function reset() 665*007225e5Sgerardnico { 666*007225e5Sgerardnico $this->stats = array(); 667*007225e5Sgerardnico $this->metadata = array(); 668*007225e5Sgerardnico $this->headerId = 0; 669*007225e5Sgerardnico } 670*007225e5Sgerardnico 671*007225e5Sgerardnico public function setMeta($key, $value) 672*007225e5Sgerardnico { 673*007225e5Sgerardnico $this->metadata[$key] = $value; 674*007225e5Sgerardnico } 675*007225e5Sgerardnico 676*007225e5Sgerardnico 677*007225e5Sgerardnico} 678*007225e5Sgerardnico 679