1007225e5Sgerardnico<?php 2007225e5Sgerardnico 3007225e5Sgerardnico 4007225e5Sgerardnicouse ComboStrap\Analytics; 5007225e5Sgerardnicouse ComboStrap\LinkUtility; 6007225e5Sgerardnicouse ComboStrap\LogUtility; 7007225e5Sgerardnicouse ComboStrap\LowQualityPage; 8007225e5Sgerardnicouse ComboStrap\Sqlite; 9007225e5Sgerardnicouse ComboStrap\Text; 1071f916b9Sgerardnicouse ComboStrap\Page; 11007225e5Sgerardnicouse dokuwiki\ChangeLog\PageChangeLog; 12007225e5Sgerardnico 13007225e5Sgerardnicorequire_once(__DIR__ . '/../class/Text.php'); 14007225e5Sgerardnicorequire_once(__DIR__ . '/../class/LowQualityPage.php'); 15007225e5Sgerardnicorequire_once(__DIR__ . '/../class/Analytics.php'); 16007225e5Sgerardnico 17007225e5Sgerardnico 18007225e5Sgerardnico/** 19007225e5Sgerardnico * A analysis Renderer that exports stats/quality/metadata in a json format 20007225e5Sgerardnico * You can export the data with 21007225e5Sgerardnico * doku.php?id=somepage&do=export_combo_analytics 22007225e5Sgerardnico */ 23007225e5Sgerardnicoclass renderer_plugin_combo_analytics extends Doku_Renderer 24007225e5Sgerardnico{ 25007225e5Sgerardnico const DATE_CREATED = 'date_created'; 26007225e5Sgerardnico const PLAINTEXT = 'formatted'; 27007225e5Sgerardnico const RESULT = "result"; 28007225e5Sgerardnico const DESCRIPTION = "description"; 29007225e5Sgerardnico const PASSED = "Passed"; 30007225e5Sgerardnico const FAILED = "Failed"; 31007225e5Sgerardnico const FIXME = 'fixme'; 32007225e5Sgerardnico 33007225e5Sgerardnico /** 34007225e5Sgerardnico * Rules key 35007225e5Sgerardnico */ 36007225e5Sgerardnico const RULE_WORDS_MINIMAL = 'words_min'; 37007225e5Sgerardnico const RULE_OUTLINE_STRUCTURE = "outline_structure"; 38007225e5Sgerardnico const RULE_INTERNAL_BACKLINKS_MIN = 'internal_backlinks_min'; 39007225e5Sgerardnico const RULE_WORDS_MAXIMAL = "words_max"; 40007225e5Sgerardnico const RULE_AVERAGE_WORDS_BY_SECTION_MIN = 'words_by_section_avg_min'; 41007225e5Sgerardnico const RULE_AVERAGE_WORDS_BY_SECTION_MAX = 'words_by_section_avg_max'; 42007225e5Sgerardnico const RULE_INTERNAL_LINKS_MIN = 'internal_links_min'; 43007225e5Sgerardnico const RULE_INTERNAL_BROKEN_LINKS_MAX = 'internal_links_broken_max'; 44007225e5Sgerardnico const RULE_DESCRIPTION_PRESENT = 'description_present'; 45007225e5Sgerardnico const RULE_FIXME = "fixme_min"; 46007225e5Sgerardnico const RULE_TITLE_PRESENT = "title_present"; 47007225e5Sgerardnico const RULE_CANONICAL_PRESENT = "canonical_present"; 48007225e5Sgerardnico 49007225e5Sgerardnico /** 50007225e5Sgerardnico * The default man 51007225e5Sgerardnico */ 52007225e5Sgerardnico const CONF_MANDATORY_QUALITY_RULES_DEFAULT_VALUE = [ 53007225e5Sgerardnico self::RULE_WORDS_MINIMAL, 54007225e5Sgerardnico self::RULE_INTERNAL_BACKLINKS_MIN, 55007225e5Sgerardnico self::RULE_INTERNAL_LINKS_MIN 56007225e5Sgerardnico ]; 57007225e5Sgerardnico const CONF_MANDATORY_QUALITY_RULES = "mandatoryQualityRules"; 58007225e5Sgerardnico 59007225e5Sgerardnico /** 60007225e5Sgerardnico * Quality Score factors 61007225e5Sgerardnico * They are used to calculate the score 62007225e5Sgerardnico */ 63007225e5Sgerardnico const CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR = 'qualityScoreInternalBacklinksFactor'; 64007225e5Sgerardnico const CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR = 'qualityScoreInternalLinksFactor'; 65007225e5Sgerardnico const CONF_QUALITY_SCORE_TITLE_PRESENT = 'qualityScoreTitlePresent'; 66007225e5Sgerardnico const CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE = 'qualityScoreCorrectOutline'; 67007225e5Sgerardnico const CONF_QUALITY_SCORE_CORRECT_CONTENT = 'qualityScoreCorrectContentLength'; 68007225e5Sgerardnico const CONF_QUALITY_SCORE_NO_FIXME = 'qualityScoreNoFixMe'; 69007225e5Sgerardnico const CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE = 'qualityScoreCorrectWordSectionAvg'; 70007225e5Sgerardnico const CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR = 'qualityScoreNoBrokenLinks'; 71007225e5Sgerardnico const CONF_QUALITY_SCORE_CHANGES_FACTOR = 'qualityScoreChangesFactor'; 72007225e5Sgerardnico const CONF_QUALITY_SCORE_DESCRIPTION_PRESENT = 'qualityScoreDescriptionPresent'; 73007225e5Sgerardnico const CONF_QUALITY_SCORE_CANONICAL_PRESENT = 'qualityScoreCanonicalPresent'; 74007225e5Sgerardnico 75007225e5Sgerardnico 76007225e5Sgerardnico /** 77007225e5Sgerardnico * The processing data 78007225e5Sgerardnico * that should be {@link renderer_plugin_combo_analysis::reset()} 79007225e5Sgerardnico */ 80007225e5Sgerardnico public $stats = array(); // the stats 81007225e5Sgerardnico protected $metadata = array(); // the metadata 82007225e5Sgerardnico protected $headerId = 0; // the id of the header on the page (first, second, ...) 83007225e5Sgerardnico 84007225e5Sgerardnico /** 85007225e5Sgerardnico * Don't known this variable ? 86007225e5Sgerardnico */ 87007225e5Sgerardnico protected $quotelevel = 0; 88007225e5Sgerardnico protected $formattingBracket = 0; 89007225e5Sgerardnico protected $tableopen = false; 90007225e5Sgerardnico private $plainTextId = 0; 912c067407Sgerardnico /** 922c067407Sgerardnico * @var Page 932c067407Sgerardnico */ 942c067407Sgerardnico private $page; 952c067407Sgerardnico 962c067407Sgerardnico public function document_start() 972c067407Sgerardnico { 982c067407Sgerardnico global $ID; 992c067407Sgerardnico $this->page = new Page($ID); 100*1c5862d3Sgerardnico $analytics = $this->page->getAnalyticsFromDb(); 1012c067407Sgerardnico if (!empty($analytics)) { 1022c067407Sgerardnico $this->internalLinkBefore = $analytics[Analytics::STATISTICS]; 1032c067407Sgerardnico } 1042c067407Sgerardnico 1052c067407Sgerardnico } 106007225e5Sgerardnico 107007225e5Sgerardnico 108007225e5Sgerardnico /** 109007225e5Sgerardnico * Here the score is calculated 110007225e5Sgerardnico */ 111007225e5Sgerardnico public function document_end() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 112007225e5Sgerardnico { 113007225e5Sgerardnico /** 114007225e5Sgerardnico * The metadata 115007225e5Sgerardnico */ 116007225e5Sgerardnico global $ID; 117007225e5Sgerardnico $meta = p_get_metadata($ID); 118007225e5Sgerardnico 119007225e5Sgerardnico /** 120007225e5Sgerardnico * Word and chars count 121007225e5Sgerardnico * The word count does not take into account 122007225e5Sgerardnico * words with non-words characters such as < = 123007225e5Sgerardnico * Therefore the node and attribute are not taken in the count 124007225e5Sgerardnico */ 125007225e5Sgerardnico $text = rawWiki($ID); 126007225e5Sgerardnico $this->stats[Analytics::CHARS_COUNT] = strlen($text); 127007225e5Sgerardnico $this->stats[Analytics::WORDS_COUNT] = Text::getWordCount($text); 128007225e5Sgerardnico 129007225e5Sgerardnico /** 130007225e5Sgerardnico * The exported object 131007225e5Sgerardnico */ 132007225e5Sgerardnico $statExport = $this->stats; 133007225e5Sgerardnico 134007225e5Sgerardnico 135007225e5Sgerardnico /** 136007225e5Sgerardnico * Internal link distance summary calculation 137007225e5Sgerardnico */ 138007225e5Sgerardnico if (array_key_exists(Analytics::INTERNAL_LINK_DISTANCE, $statExport)) { 139007225e5Sgerardnico $linkLengths = $statExport[Analytics::INTERNAL_LINK_DISTANCE]; 140007225e5Sgerardnico unset($statExport[Analytics::INTERNAL_LINK_DISTANCE]); 141007225e5Sgerardnico $countBacklinks = count($linkLengths); 142007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = null; 143007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = null; 144007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = null; 145007225e5Sgerardnico if ($countBacklinks > 0) { 146007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = array_sum($linkLengths) / $countBacklinks; 147007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = max($linkLengths); 148007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = min($linkLengths); 149007225e5Sgerardnico } 150007225e5Sgerardnico } 151007225e5Sgerardnico 152007225e5Sgerardnico /** 153007225e5Sgerardnico * Quality Report / Rules 154007225e5Sgerardnico */ 155007225e5Sgerardnico // The array that hold the results of the quality rules 156007225e5Sgerardnico $ruleResults = array(); 157007225e5Sgerardnico // The array that hold the quality score details 158007225e5Sgerardnico $qualityScores = array(); 159007225e5Sgerardnico 160007225e5Sgerardnico 161007225e5Sgerardnico /** 162007225e5Sgerardnico * No fixme 163007225e5Sgerardnico */ 164007225e5Sgerardnico $fixmeCount = $this->stats[self::FIXME]; 165007225e5Sgerardnico $statExport[self::FIXME] = $fixmeCount == null ? 0 : $fixmeCount; 166007225e5Sgerardnico if ($fixmeCount != 0) { 167007225e5Sgerardnico $ruleResults[self::RULE_FIXME] = self::FAILED; 168007225e5Sgerardnico $qualityScores['no_' . self::FIXME] = 0; 169007225e5Sgerardnico } else { 170007225e5Sgerardnico $ruleResults[self::RULE_FIXME] = self::PASSED; 171007225e5Sgerardnico $qualityScores['no_' . self::FIXME] = $this->getConf(self::CONF_QUALITY_SCORE_NO_FIXME, 1);; 172007225e5Sgerardnico } 173007225e5Sgerardnico 174007225e5Sgerardnico /** 175007225e5Sgerardnico * A title should be present 176007225e5Sgerardnico */ 177007225e5Sgerardnico if (empty($this->metadata[Analytics::TITLE])) { 178007225e5Sgerardnico $ruleResults[self::RULE_TITLE_PRESENT] = self::FAILED; 179007225e5Sgerardnico $ruleInfo[self::RULE_TITLE_PRESENT] = "A title is not present in the frontmatter"; 180007225e5Sgerardnico $this->metadata[Analytics::TITLE] = $meta[Analytics::TITLE]; 181007225e5Sgerardnico $qualityScores[self::RULE_TITLE_PRESENT] = 0; 182007225e5Sgerardnico } else { 183007225e5Sgerardnico $qualityScores[self::RULE_TITLE_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_TITLE_PRESENT, 10);; 184007225e5Sgerardnico $ruleResults[self::RULE_TITLE_PRESENT] = self::PASSED; 185007225e5Sgerardnico } 186007225e5Sgerardnico 187007225e5Sgerardnico /** 188007225e5Sgerardnico * A description should be present 189007225e5Sgerardnico */ 190007225e5Sgerardnico if (empty($this->metadata[self::DESCRIPTION])) { 191007225e5Sgerardnico $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::FAILED; 192c25e802bSgerardnico $ruleInfo[self::RULE_DESCRIPTION_PRESENT] = "A description is not present in the frontmatter"; 193007225e5Sgerardnico $this->metadata[self::DESCRIPTION] = $meta[self::DESCRIPTION]["abstract"]; 194007225e5Sgerardnico $qualityScores[self::RULE_DESCRIPTION_PRESENT] = 0; 195007225e5Sgerardnico } else { 196007225e5Sgerardnico $qualityScores[self::RULE_DESCRIPTION_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_DESCRIPTION_PRESENT, 8);; 197007225e5Sgerardnico $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::PASSED; 198007225e5Sgerardnico } 199007225e5Sgerardnico 200007225e5Sgerardnico /** 201007225e5Sgerardnico * A canonical should be present 202007225e5Sgerardnico */ 20371f916b9Sgerardnico if (empty($this->metadata[Page::CANONICAL_PROPERTY])) { 204007225e5Sgerardnico $qualityScores[self::RULE_CANONICAL_PRESENT] = 0; 205007225e5Sgerardnico $ruleResults[self::RULE_CANONICAL_PRESENT] = self::FAILED; 206007225e5Sgerardnico $ruleInfo[self::RULE_CANONICAL_PRESENT] = "A canonical is not present in the frontmatter"; 207007225e5Sgerardnico } else { 208007225e5Sgerardnico $qualityScores[self::RULE_CANONICAL_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_CANONICAL_PRESENT, 5);; 209007225e5Sgerardnico $ruleResults[self::RULE_CANONICAL_PRESENT] = self::PASSED; 210007225e5Sgerardnico } 211007225e5Sgerardnico 212007225e5Sgerardnico /** 213007225e5Sgerardnico * Outline / Header structure 214007225e5Sgerardnico */ 215007225e5Sgerardnico $treeError = 0; 216007225e5Sgerardnico $headersCount = 0; 217007225e5Sgerardnico if (array_key_exists(Analytics::HEADER_POSITION, $this->stats)) { 218007225e5Sgerardnico $headersCount = count($this->stats[Analytics::HEADER_POSITION]); 219007225e5Sgerardnico unset($statExport[Analytics::HEADER_POSITION]); 220007225e5Sgerardnico for ($i = 1; $i < $headersCount; $i++) { 221007225e5Sgerardnico $currentHeaderLevel = $this->stats['header_struct'][$i]; 222007225e5Sgerardnico $previousHeaderLevel = $this->stats['header_struct'][$i - 1]; 223007225e5Sgerardnico if ($currentHeaderLevel - $previousHeaderLevel > 1) { 224007225e5Sgerardnico $treeError += 1; 225007225e5Sgerardnico $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "The " . $i . " header (h" . $currentHeaderLevel . ") has a level bigger than its precedent (" . $previousHeaderLevel . ")"; 226007225e5Sgerardnico } 227007225e5Sgerardnico } 228007225e5Sgerardnico } 229007225e5Sgerardnico if ($treeError > 0 || $headersCount == 0) { 230007225e5Sgerardnico $qualityScores['correct_outline'] = 0; 231007225e5Sgerardnico $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::FAILED; 232007225e5Sgerardnico if ($headersCount == 0) { 233007225e5Sgerardnico $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "There is no header"; 234007225e5Sgerardnico } 235007225e5Sgerardnico } else { 236007225e5Sgerardnico $qualityScores['correct_outline'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE, 3); 237007225e5Sgerardnico $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::PASSED; 238007225e5Sgerardnico } 239007225e5Sgerardnico 240007225e5Sgerardnico 241007225e5Sgerardnico /** 242007225e5Sgerardnico * Document length 243007225e5Sgerardnico */ 244007225e5Sgerardnico $minimalWordCount = 50; 245007225e5Sgerardnico $maximalWordCount = 1500; 246007225e5Sgerardnico $correctContentLength = true; 247007225e5Sgerardnico if ($this->stats[Analytics::WORDS_COUNT] < $minimalWordCount) { 248007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MINIMAL] = self::FAILED; 249007225e5Sgerardnico $correctContentLength = false; 250007225e5Sgerardnico $ruleInfo[self::RULE_WORDS_MINIMAL] = "The number of words is less than {$minimalWordCount}"; 251007225e5Sgerardnico } else { 252007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MINIMAL] = self::PASSED; 253007225e5Sgerardnico } 254007225e5Sgerardnico if ($this->stats[Analytics::WORDS_COUNT] > $maximalWordCount) { 255007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MAXIMAL] = self::FAILED; 256007225e5Sgerardnico $ruleInfo[self::RULE_WORDS_MAXIMAL] = "The number of words is more than {$maximalWordCount}"; 257007225e5Sgerardnico $correctContentLength = false; 258007225e5Sgerardnico } else { 259007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MAXIMAL] = self::PASSED; 260007225e5Sgerardnico } 261007225e5Sgerardnico if ($correctContentLength) { 262007225e5Sgerardnico $qualityScores['correct_content_length'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_CONTENT, 10); 263007225e5Sgerardnico } else { 264007225e5Sgerardnico $qualityScores['correct_content_length'] = 0; 265007225e5Sgerardnico } 266007225e5Sgerardnico 267007225e5Sgerardnico 268007225e5Sgerardnico /** 269007225e5Sgerardnico * Average Number of words by header section to text ratio 270007225e5Sgerardnico */ 271007225e5Sgerardnico $headers = $this->stats[Analytics::HEADERS_COUNT]; 272007225e5Sgerardnico if ($headers != null) { 273007225e5Sgerardnico $headerCount = array_sum($headers); 274007225e5Sgerardnico $headerCount--; // h1 is supposed to have no words 275007225e5Sgerardnico if ($headerCount > 0) { 276007225e5Sgerardnico 277007225e5Sgerardnico $avgWordsCountBySection = round($this->stats[Analytics::WORDS_COUNT] / $headerCount); 278007225e5Sgerardnico $statExport['word_section_count']['avg'] = $avgWordsCountBySection; 279007225e5Sgerardnico 280007225e5Sgerardnico /** 281007225e5Sgerardnico * Min words by header section 282007225e5Sgerardnico */ 283007225e5Sgerardnico $wordsByHeaderMin = 20; 284007225e5Sgerardnico /** 285007225e5Sgerardnico * Max words by header section 286007225e5Sgerardnico */ 287007225e5Sgerardnico $wordsByHeaderMax = 300; 288007225e5Sgerardnico $correctAverageWordsBySection = true; 289007225e5Sgerardnico if ($avgWordsCountBySection < $wordsByHeaderMin) { 290007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::FAILED; 291007225e5Sgerardnico $correctAverageWordsBySection = false; 292007225e5Sgerardnico $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = "The number of words by section is less than {$wordsByHeaderMin}"; 293007225e5Sgerardnico } else { 294007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::PASSED; 295007225e5Sgerardnico } 296007225e5Sgerardnico if ($avgWordsCountBySection > $wordsByHeaderMax) { 297007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::FAILED; 298007225e5Sgerardnico $correctAverageWordsBySection = false; 299007225e5Sgerardnico $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = "The number of words by section is more than {$wordsByHeaderMax}"; 300007225e5Sgerardnico } else { 301007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::PASSED; 302007225e5Sgerardnico } 303007225e5Sgerardnico if ($correctAverageWordsBySection) { 304007225e5Sgerardnico $qualityScores['correct_word_avg_by_section'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE, 10); 305007225e5Sgerardnico } else { 306007225e5Sgerardnico $qualityScores['correct_word_avg_by_section'] = 0; 307007225e5Sgerardnico } 308007225e5Sgerardnico 309007225e5Sgerardnico } 310007225e5Sgerardnico } 311007225e5Sgerardnico 312007225e5Sgerardnico /** 313007225e5Sgerardnico * Internal Backlinks rule 314007225e5Sgerardnico * 315007225e5Sgerardnico * If a page is a low quality page, if the process run 316007225e5Sgerardnico * anonymous, we will not see all {@link ft_backlinks()} 317007225e5Sgerardnico * we use then the index directly to avoid confusion 318007225e5Sgerardnico */ 319007225e5Sgerardnico $backlinks = idx_get_indexer()->lookupKey('relation_references', $ID); 320007225e5Sgerardnico $countBacklinks = count($backlinks); 321007225e5Sgerardnico $statExport[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks; 322007225e5Sgerardnico if ($countBacklinks == 0) { 323007225e5Sgerardnico $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = 0; 324007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::FAILED; 325007225e5Sgerardnico $ruleInfo[self::RULE_INTERNAL_BACKLINKS_MIN] = "There is no backlinks"; 326007225e5Sgerardnico } else { 327007225e5Sgerardnico $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks * $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR, 1); 328007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::PASSED; 329007225e5Sgerardnico } 330007225e5Sgerardnico 331007225e5Sgerardnico /** 332007225e5Sgerardnico * Internal links 333007225e5Sgerardnico */ 334007225e5Sgerardnico $internalLinksCount = $this->stats[Analytics::INTERNAL_LINKS_COUNT]; 335007225e5Sgerardnico if ($internalLinksCount == 0) { 336007225e5Sgerardnico $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = 0; 337007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::FAILED; 338*1c5862d3Sgerardnico $ruleInfo[self::RULE_INTERNAL_LINKS_MIN] = "There is no internal links"; 339007225e5Sgerardnico } else { 340007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::PASSED; 341007225e5Sgerardnico $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = $countBacklinks * $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR, 1);; 342007225e5Sgerardnico } 343007225e5Sgerardnico 344007225e5Sgerardnico /** 345007225e5Sgerardnico * Broken Links 346007225e5Sgerardnico */ 347007225e5Sgerardnico $brokenLinksCount = $this->stats[Analytics::INTERNAL_LINKS_BROKEN_COUNT]; 348007225e5Sgerardnico if ($brokenLinksCount > 2) { 349007225e5Sgerardnico $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = 0; 350007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::FAILED; 351*1c5862d3Sgerardnico $ruleInfo[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = "There is {$brokenLinksCount} broken links"; 352007225e5Sgerardnico } else { 353007225e5Sgerardnico $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR, 2);;; 354007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::PASSED; 355007225e5Sgerardnico } 356007225e5Sgerardnico 357007225e5Sgerardnico /** 358007225e5Sgerardnico * Changes, the more changes the better 359007225e5Sgerardnico */ 360007225e5Sgerardnico $qualityScores[Analytics::EDITS_COUNT] = $this->stats[Analytics::EDITS_COUNT] * $this->getConf(self::CONF_QUALITY_SCORE_CHANGES_FACTOR, 0.25);;; 361007225e5Sgerardnico 362007225e5Sgerardnico 363007225e5Sgerardnico /** 364007225e5Sgerardnico * Rules that comes from the qc plugin 365007225e5Sgerardnico * but are not yet fully implemented 366007225e5Sgerardnico */ 367007225e5Sgerardnico 368007225e5Sgerardnico// // 2 points for lot's of formatting 369007225e5Sgerardnico// if ($this->stats[self::PLAINTEXT] && $this->stats['chars'] / $this->stats[self::PLAINTEXT] < 3) { 370007225e5Sgerardnico// $ruleResults['manyformat'] = 2; 371007225e5Sgerardnico// } 372007225e5Sgerardnico// 373007225e5Sgerardnico// // 1/2 points for deeply nested quotations 374007225e5Sgerardnico// if ($this->stats['quote_nest'] > 2) { 375007225e5Sgerardnico// $ruleResults['deepquote'] += $this->stats['quote_nest'] / 2; 376007225e5Sgerardnico// } 377007225e5Sgerardnico// 378007225e5Sgerardnico// // 1/2 points for too many hr 379007225e5Sgerardnico// if ($this->stats['hr'] > 2) { 380007225e5Sgerardnico// $ruleResults['manyhr'] = ($this->stats['hr'] - 2) / 2; 381007225e5Sgerardnico// } 382007225e5Sgerardnico// 383007225e5Sgerardnico// // 1 point for too many line breaks 384007225e5Sgerardnico// if ($this->stats['linebreak'] > 2) { 385007225e5Sgerardnico// $ruleResults['manybr'] = $this->stats['linebreak'] - 2; 386007225e5Sgerardnico// } 387007225e5Sgerardnico// 388007225e5Sgerardnico// // 1 point for single author only 389007225e5Sgerardnico// if (!$this->getConf('single_author_only') && count($this->stats['authors']) == 1) { 390007225e5Sgerardnico// $ruleResults['singleauthor'] = 1; 391007225e5Sgerardnico// } 392007225e5Sgerardnico 393007225e5Sgerardnico // Too much cdata (plaintext), see cdata 394007225e5Sgerardnico // if ($len > 500) $statExport[self::QUALITY][self::ERROR]['plaintext']++; 395007225e5Sgerardnico // if ($len > 500) $statExport[self::QUALITY][self::ERROR]['plaintext']++; 396007225e5Sgerardnico // 397007225e5Sgerardnico // // 1 point for formattings longer than 500 chars 398007225e5Sgerardnico // $statExport[self::QUALITY][self::ERROR]['multiformat'] 399007225e5Sgerardnico 400007225e5Sgerardnico /** 401007225e5Sgerardnico * Quality Score 402007225e5Sgerardnico */ 403007225e5Sgerardnico ksort($qualityScores); 404007225e5Sgerardnico $qualityScoring = array(); 405007225e5Sgerardnico $qualityScoring["score"] = array_sum($qualityScores); 406007225e5Sgerardnico $qualityScoring["scores"] = $qualityScores; 407007225e5Sgerardnico 408007225e5Sgerardnico 409007225e5Sgerardnico /** 410007225e5Sgerardnico * The rule that if broken will set the quality level to low 411007225e5Sgerardnico */ 412007225e5Sgerardnico $brokenRules = array(); 413007225e5Sgerardnico foreach ($ruleResults as $ruleName => $ruleResult) { 414007225e5Sgerardnico if ($ruleResult == self::FAILED) { 415007225e5Sgerardnico $brokenRules[] = $ruleName; 416007225e5Sgerardnico } 417007225e5Sgerardnico } 418007225e5Sgerardnico $ruleErrorCount = sizeof($brokenRules); 419007225e5Sgerardnico if ($ruleErrorCount > 0) { 420007225e5Sgerardnico $qualityResult = $ruleErrorCount . " quality rules errors"; 421007225e5Sgerardnico } else { 422007225e5Sgerardnico $qualityResult = "All quality rules passed"; 423007225e5Sgerardnico } 424007225e5Sgerardnico 425007225e5Sgerardnico /** 426007225e5Sgerardnico * Low level 427007225e5Sgerardnico */ 428007225e5Sgerardnico $mandatoryRules = preg_split("/,/", $this->getConf(self::CONF_MANDATORY_QUALITY_RULES)); 429007225e5Sgerardnico $mandatoryRulesBroken = []; 430007225e5Sgerardnico foreach ($mandatoryRules as $lowLevelRule) { 431007225e5Sgerardnico if (in_array($lowLevelRule, $brokenRules)) { 432007225e5Sgerardnico $mandatoryRulesBroken[] = $lowLevelRule; 433007225e5Sgerardnico } 434007225e5Sgerardnico } 435007225e5Sgerardnico $lowLevel = false; 436007225e5Sgerardnico if (sizeof($mandatoryRulesBroken) > 0) { 437007225e5Sgerardnico $lowLevel = true; 438007225e5Sgerardnico } 439007225e5Sgerardnico LowQualityPage::setLowQualityPage($ID, $lowLevel); 440007225e5Sgerardnico 441007225e5Sgerardnico /** 442007225e5Sgerardnico * Building the quality object in order 443007225e5Sgerardnico */ 444007225e5Sgerardnico $quality["low"] = $lowLevel; 445007225e5Sgerardnico if (sizeof($mandatoryRulesBroken) > 0) { 446007225e5Sgerardnico ksort($mandatoryRulesBroken); 447007225e5Sgerardnico $quality['failed_mandatory_rules'] = $mandatoryRulesBroken; 448007225e5Sgerardnico } 449007225e5Sgerardnico $quality["scoring"] = $qualityScoring; 450007225e5Sgerardnico $quality["rules"][self::RESULT] = $qualityResult; 451007225e5Sgerardnico if (!empty($ruleInfo)) { 452007225e5Sgerardnico $quality["rules"]["info"] = $ruleInfo; 453007225e5Sgerardnico } 454007225e5Sgerardnico 455007225e5Sgerardnico ksort($ruleResults); 456007225e5Sgerardnico $quality["rules"]['details'] = $ruleResults; 457007225e5Sgerardnico 458007225e5Sgerardnico /** 459007225e5Sgerardnico * Metadata 460007225e5Sgerardnico */ 461c42a1196Sgerardnico $title = $meta['title']; 462c42a1196Sgerardnico $this->metadata[Analytics::TITLE] = $title; 463c42a1196Sgerardnico if ($title!=$meta['h1']) { 464c42a1196Sgerardnico $this->metadata[Analytics::H1] = $meta['h1']; 465c42a1196Sgerardnico } 466007225e5Sgerardnico $timestampCreation = $meta['date']['created']; 467007225e5Sgerardnico $this->metadata[self::DATE_CREATED] = date('Y-m-d h:i:s', $timestampCreation); 468007225e5Sgerardnico $timestampModification = $meta['date']['modified']; 469007225e5Sgerardnico $this->metadata[Analytics::DATE_MODIFIED] = date('Y-m-d h:i:s', $timestampModification); 470007225e5Sgerardnico $this->metadata['age_creation'] = round((time() - $timestampCreation) / 60 / 60 / 24); 471007225e5Sgerardnico $this->metadata['age_modification'] = round((time() - $timestampModification) / 60 / 60 / 24); 472007225e5Sgerardnico 473007225e5Sgerardnico 474007225e5Sgerardnico // get author info 475007225e5Sgerardnico $changelog = new PageChangeLog($ID); 476007225e5Sgerardnico $revs = $changelog->getRevisions(0, 10000); 477007225e5Sgerardnico array_push($revs, $meta['last_change']['date']); 478007225e5Sgerardnico $this->stats[Analytics::EDITS_COUNT] = count($revs); 479007225e5Sgerardnico foreach ($revs as $rev) { 480007225e5Sgerardnico $info = $changelog->getRevisionInfo($rev); 481007225e5Sgerardnico if ($info['user']) { 482007225e5Sgerardnico $this->stats['authors'][$info['user']] += 1; 483007225e5Sgerardnico } else { 484007225e5Sgerardnico $this->stats['authors']['*'] += 1; 485007225e5Sgerardnico } 486007225e5Sgerardnico } 487007225e5Sgerardnico 488007225e5Sgerardnico /** 489007225e5Sgerardnico * Building the Top JSON in order 490007225e5Sgerardnico */ 491007225e5Sgerardnico global $ID; 4922c067407Sgerardnico $finalStats = array(); 4932c067407Sgerardnico $finalStats["id"] = $ID; 494c42a1196Sgerardnico $finalStats["date"] = date('Y-m-d H:i:s', time()); 4952c067407Sgerardnico $finalStats['metadata'] = $this->metadata; 496007225e5Sgerardnico ksort($statExport); 4972c067407Sgerardnico $finalStats[Analytics::STATISTICS] = $statExport; 4982c067407Sgerardnico $finalStats[Analytics::QUALITY] = $quality; // Quality after the sort to get them at the end 499007225e5Sgerardnico 500007225e5Sgerardnico 501007225e5Sgerardnico /** 502007225e5Sgerardnico * The result can be seen with 503007225e5Sgerardnico * doku.php?id=somepage&do=export_combo_analysis 504007225e5Sgerardnico */ 505007225e5Sgerardnico /** 506007225e5Sgerardnico * Set the header for the export.php file 507007225e5Sgerardnico */ 508007225e5Sgerardnico p_set_metadata($ID, array("format" => 509007225e5Sgerardnico array("combo_" . $this->getPluginComponent() => array("Content-Type" => 'application/json')) 510007225e5Sgerardnico )); 5112c067407Sgerardnico $json_encoded = json_encode($finalStats, JSON_PRETTY_PRINT); 512007225e5Sgerardnico 5132c067407Sgerardnico $page = new Page($ID); 5142c067407Sgerardnico $page->saveAnalytics($finalStats); 515007225e5Sgerardnico $this->doc .= $json_encoded; 516007225e5Sgerardnico 517007225e5Sgerardnico } 518007225e5Sgerardnico 519007225e5Sgerardnico /** 520007225e5Sgerardnico */ 521007225e5Sgerardnico public function getFormat() 522007225e5Sgerardnico { 523007225e5Sgerardnico return Analytics::RENDERER_FORMAT; 524007225e5Sgerardnico } 525007225e5Sgerardnico 526007225e5Sgerardnico public function internallink($id, $name = null, $search = null, $returnonly = false, $linktype = 'content') 527007225e5Sgerardnico { 528007225e5Sgerardnico 529007225e5Sgerardnico LinkUtility::processInternalLinkStats($id, $this->stats); 530007225e5Sgerardnico 531007225e5Sgerardnico } 532007225e5Sgerardnico 533007225e5Sgerardnico public function externallink($url, $name = null) 534007225e5Sgerardnico { 535007225e5Sgerardnico $this->stats[Analytics::EXTERNAL_LINKS_COUNT]++; 536007225e5Sgerardnico } 537007225e5Sgerardnico 538007225e5Sgerardnico public function header($text, $level, $pos) 539007225e5Sgerardnico { 540007225e5Sgerardnico $this->stats[Analytics::HEADERS_COUNT]['h' . $level]++; 541007225e5Sgerardnico $this->headerId++; 542007225e5Sgerardnico $this->stats[Analytics::HEADER_POSITION][$this->headerId] = 'h' . $level; 543007225e5Sgerardnico 544007225e5Sgerardnico } 545007225e5Sgerardnico 546007225e5Sgerardnico public function smiley($smiley) 547007225e5Sgerardnico { 548007225e5Sgerardnico if ($smiley == 'FIXME') $this->stats[self::FIXME]++; 549007225e5Sgerardnico } 550007225e5Sgerardnico 551007225e5Sgerardnico public function linebreak() 552007225e5Sgerardnico { 553007225e5Sgerardnico if (!$this->tableopen) { 554007225e5Sgerardnico $this->stats['linebreak']++; 555007225e5Sgerardnico } 556007225e5Sgerardnico } 557007225e5Sgerardnico 558007225e5Sgerardnico public function table_open($maxcols = null, $numrows = null, $pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 559007225e5Sgerardnico { 560007225e5Sgerardnico $this->tableopen = true; 561007225e5Sgerardnico } 562007225e5Sgerardnico 563007225e5Sgerardnico public function table_close($pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 564007225e5Sgerardnico { 565007225e5Sgerardnico $this->tableopen = false; 566007225e5Sgerardnico } 567007225e5Sgerardnico 568007225e5Sgerardnico public function hr() 569007225e5Sgerardnico { 570007225e5Sgerardnico $this->stats['hr']++; 571007225e5Sgerardnico } 572007225e5Sgerardnico 573007225e5Sgerardnico public function quote_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 574007225e5Sgerardnico { 575007225e5Sgerardnico $this->stats['quote_count']++; 576007225e5Sgerardnico $this->quotelevel++; 577007225e5Sgerardnico $this->stats['quote_nest'] = max($this->quotelevel, $this->stats['quote_nest']); 578007225e5Sgerardnico } 579007225e5Sgerardnico 580007225e5Sgerardnico public function quote_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 581007225e5Sgerardnico { 582007225e5Sgerardnico $this->quotelevel--; 583007225e5Sgerardnico } 584007225e5Sgerardnico 585007225e5Sgerardnico public function strong_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 586007225e5Sgerardnico { 587007225e5Sgerardnico $this->formattingBracket++; 588007225e5Sgerardnico } 589007225e5Sgerardnico 590007225e5Sgerardnico public function strong_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 591007225e5Sgerardnico { 592007225e5Sgerardnico $this->formattingBracket--; 593007225e5Sgerardnico } 594007225e5Sgerardnico 595007225e5Sgerardnico public function emphasis_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 596007225e5Sgerardnico { 597007225e5Sgerardnico $this->formattingBracket++; 598007225e5Sgerardnico } 599007225e5Sgerardnico 600007225e5Sgerardnico public function emphasis_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 601007225e5Sgerardnico { 602007225e5Sgerardnico $this->formattingBracket--; 603007225e5Sgerardnico } 604007225e5Sgerardnico 605007225e5Sgerardnico public function underline_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 606007225e5Sgerardnico { 607007225e5Sgerardnico $this->formattingBracket++; 608007225e5Sgerardnico } 609007225e5Sgerardnico 610007225e5Sgerardnico public function underline_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 611007225e5Sgerardnico { 612007225e5Sgerardnico $this->formattingBracket--; 613007225e5Sgerardnico } 614007225e5Sgerardnico 615007225e5Sgerardnico public function cdata($text) 616007225e5Sgerardnico { 617007225e5Sgerardnico 618007225e5Sgerardnico /** 619007225e5Sgerardnico * It seems that you receive cdata 620007225e5Sgerardnico * when emphasis_open / underline_open / strong_open 621007225e5Sgerardnico * Stats are not for them 622007225e5Sgerardnico */ 623007225e5Sgerardnico if (!$this->formattingBracket) return; 624007225e5Sgerardnico 625007225e5Sgerardnico $this->plainTextId++; 626007225e5Sgerardnico 627007225e5Sgerardnico /** 628007225e5Sgerardnico * Length 629007225e5Sgerardnico */ 630007225e5Sgerardnico $len = strlen($text); 631007225e5Sgerardnico $this->stats[self::PLAINTEXT][$this->plainTextId]['len'] = $len; 632007225e5Sgerardnico 633007225e5Sgerardnico 634007225e5Sgerardnico /** 635007225e5Sgerardnico * Multi-formatting 636007225e5Sgerardnico */ 637007225e5Sgerardnico if ($this->formattingBracket > 1) { 638007225e5Sgerardnico $numberOfFormats = 1 * ($this->formattingBracket - 1); 639007225e5Sgerardnico $this->stats[self::PLAINTEXT][$this->plainTextId]['multiformat'] += $numberOfFormats; 640007225e5Sgerardnico } 641007225e5Sgerardnico 642007225e5Sgerardnico /** 643007225e5Sgerardnico * Total 644007225e5Sgerardnico */ 645007225e5Sgerardnico $this->stats[self::PLAINTEXT][0] += $len; 646007225e5Sgerardnico } 647007225e5Sgerardnico 648007225e5Sgerardnico public function internalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null) 649007225e5Sgerardnico { 650007225e5Sgerardnico $this->stats[Analytics::INTERNAL_MEDIAS_COUNT]++; 651007225e5Sgerardnico } 652007225e5Sgerardnico 653007225e5Sgerardnico public function externalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null) 654007225e5Sgerardnico { 655007225e5Sgerardnico $this->stats[Analytics::EXTERNAL_MEDIAS]++; 656007225e5Sgerardnico } 657007225e5Sgerardnico 658007225e5Sgerardnico public function reset() 659007225e5Sgerardnico { 660007225e5Sgerardnico $this->stats = array(); 661007225e5Sgerardnico $this->metadata = array(); 662007225e5Sgerardnico $this->headerId = 0; 663007225e5Sgerardnico } 664007225e5Sgerardnico 665007225e5Sgerardnico public function setMeta($key, $value) 666007225e5Sgerardnico { 667007225e5Sgerardnico $this->metadata[$key] = $value; 668007225e5Sgerardnico } 669007225e5Sgerardnico 670007225e5Sgerardnico 671007225e5Sgerardnico} 672007225e5Sgerardnico 673