1007225e5Sgerardnico<?php 2007225e5Sgerardnico 3007225e5Sgerardnico 4007225e5Sgerardnicouse ComboStrap\Analytics; 5007225e5Sgerardnicouse ComboStrap\LinkUtility; 67c33ecc6Sgerardnicouse ComboStrap\StringUtility; 77c33ecc6Sgerardnico 871f916b9Sgerardnicouse ComboStrap\Page; 9007225e5Sgerardnicouse dokuwiki\ChangeLog\PageChangeLog; 10007225e5Sgerardnico 11007225e5Sgerardnicorequire_once(__DIR__ . '/../class/LowQualityPage.php'); 12007225e5Sgerardnicorequire_once(__DIR__ . '/../class/Analytics.php'); 13007225e5Sgerardnico 14007225e5Sgerardnico 15007225e5Sgerardnico/** 16007225e5Sgerardnico * A analysis Renderer that exports stats/quality/metadata in a json format 17007225e5Sgerardnico * You can export the data with 18007225e5Sgerardnico * doku.php?id=somepage&do=export_combo_analytics 19007225e5Sgerardnico */ 20007225e5Sgerardnicoclass renderer_plugin_combo_analytics extends Doku_Renderer 21007225e5Sgerardnico{ 227c33ecc6Sgerardnico 23007225e5Sgerardnico const DATE_CREATED = 'date_created'; 24007225e5Sgerardnico const PLAINTEXT = 'formatted'; 25007225e5Sgerardnico const RESULT = "result"; 26007225e5Sgerardnico const DESCRIPTION = "description"; 27007225e5Sgerardnico const PASSED = "Passed"; 28007225e5Sgerardnico const FAILED = "Failed"; 29007225e5Sgerardnico const FIXME = 'fixme'; 30007225e5Sgerardnico 31007225e5Sgerardnico /** 32007225e5Sgerardnico * Rules key 33007225e5Sgerardnico */ 34007225e5Sgerardnico const RULE_WORDS_MINIMAL = 'words_min'; 35007225e5Sgerardnico const RULE_OUTLINE_STRUCTURE = "outline_structure"; 36007225e5Sgerardnico const RULE_INTERNAL_BACKLINKS_MIN = 'internal_backlinks_min'; 37007225e5Sgerardnico const RULE_WORDS_MAXIMAL = "words_max"; 38007225e5Sgerardnico const RULE_AVERAGE_WORDS_BY_SECTION_MIN = 'words_by_section_avg_min'; 39007225e5Sgerardnico const RULE_AVERAGE_WORDS_BY_SECTION_MAX = 'words_by_section_avg_max'; 40007225e5Sgerardnico const RULE_INTERNAL_LINKS_MIN = 'internal_links_min'; 41007225e5Sgerardnico const RULE_INTERNAL_BROKEN_LINKS_MAX = 'internal_links_broken_max'; 42007225e5Sgerardnico const RULE_DESCRIPTION_PRESENT = 'description_present'; 43007225e5Sgerardnico const RULE_FIXME = "fixme_min"; 44007225e5Sgerardnico const RULE_TITLE_PRESENT = "title_present"; 45007225e5Sgerardnico const RULE_CANONICAL_PRESENT = "canonical_present"; 46aa3cb38fSgerardnico const QUALITY_RULES = [ 47aa3cb38fSgerardnico self::RULE_CANONICAL_PRESENT, 48aa3cb38fSgerardnico self::RULE_DESCRIPTION_PRESENT, 49aa3cb38fSgerardnico self::RULE_FIXME, 50aa3cb38fSgerardnico self::RULE_INTERNAL_BACKLINKS_MIN, 51aa3cb38fSgerardnico self::RULE_INTERNAL_BROKEN_LINKS_MAX, 52aa3cb38fSgerardnico self::RULE_INTERNAL_LINKS_MIN, 53aa3cb38fSgerardnico self::RULE_OUTLINE_STRUCTURE, 54aa3cb38fSgerardnico self::RULE_TITLE_PRESENT, 55aa3cb38fSgerardnico self::RULE_WORDS_MINIMAL, 56aa3cb38fSgerardnico self::RULE_WORDS_MAXIMAL, 57aa3cb38fSgerardnico self::RULE_AVERAGE_WORDS_BY_SECTION_MIN, 58aa3cb38fSgerardnico self::RULE_AVERAGE_WORDS_BY_SECTION_MAX 59aa3cb38fSgerardnico ]; 60007225e5Sgerardnico 61007225e5Sgerardnico /** 62007225e5Sgerardnico * The default man 63007225e5Sgerardnico */ 64007225e5Sgerardnico const CONF_MANDATORY_QUALITY_RULES_DEFAULT_VALUE = [ 65007225e5Sgerardnico self::RULE_WORDS_MINIMAL, 66007225e5Sgerardnico self::RULE_INTERNAL_BACKLINKS_MIN, 67007225e5Sgerardnico self::RULE_INTERNAL_LINKS_MIN 68007225e5Sgerardnico ]; 69007225e5Sgerardnico const CONF_MANDATORY_QUALITY_RULES = "mandatoryQualityRules"; 70007225e5Sgerardnico 71007225e5Sgerardnico /** 72007225e5Sgerardnico * Quality Score factors 73007225e5Sgerardnico * They are used to calculate the score 74007225e5Sgerardnico */ 75007225e5Sgerardnico const CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR = 'qualityScoreInternalBacklinksFactor'; 76007225e5Sgerardnico const CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR = 'qualityScoreInternalLinksFactor'; 77007225e5Sgerardnico const CONF_QUALITY_SCORE_TITLE_PRESENT = 'qualityScoreTitlePresent'; 78007225e5Sgerardnico const CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE = 'qualityScoreCorrectOutline'; 79007225e5Sgerardnico const CONF_QUALITY_SCORE_CORRECT_CONTENT = 'qualityScoreCorrectContentLength'; 80007225e5Sgerardnico const CONF_QUALITY_SCORE_NO_FIXME = 'qualityScoreNoFixMe'; 81007225e5Sgerardnico const CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE = 'qualityScoreCorrectWordSectionAvg'; 82007225e5Sgerardnico const CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR = 'qualityScoreNoBrokenLinks'; 83007225e5Sgerardnico const CONF_QUALITY_SCORE_CHANGES_FACTOR = 'qualityScoreChangesFactor'; 84007225e5Sgerardnico const CONF_QUALITY_SCORE_DESCRIPTION_PRESENT = 'qualityScoreDescriptionPresent'; 85007225e5Sgerardnico const CONF_QUALITY_SCORE_CANONICAL_PRESENT = 'qualityScoreCanonicalPresent'; 8608ca4f85Sgerardnico const SCORING = "scoring"; 8708ca4f85Sgerardnico const SCORE = "score"; 88ebdc69ceSgerardnico const HEADER_STRUCT = 'header_struct'; 89007225e5Sgerardnico 90aa3cb38fSgerardnico 91007225e5Sgerardnico /** 92007225e5Sgerardnico * The processing data 93007225e5Sgerardnico * that should be {@link renderer_plugin_combo_analysis::reset()} 94007225e5Sgerardnico */ 95007225e5Sgerardnico public $stats = array(); // the stats 96fa5961eaSgerardnico protected $analyticsMetadata = array(); // the metadata 97007225e5Sgerardnico protected $headerId = 0; // the id of the header on the page (first, second, ...) 98007225e5Sgerardnico 99007225e5Sgerardnico /** 100007225e5Sgerardnico * Don't known this variable ? 101007225e5Sgerardnico */ 102007225e5Sgerardnico protected $quotelevel = 0; 103007225e5Sgerardnico protected $formattingBracket = 0; 104007225e5Sgerardnico protected $tableopen = false; 105007225e5Sgerardnico private $plainTextId = 0; 1062c067407Sgerardnico /** 1072c067407Sgerardnico * @var Page 1082c067407Sgerardnico */ 1092c067407Sgerardnico private $page; 1102c067407Sgerardnico 1112c067407Sgerardnico public function document_start() 1122c067407Sgerardnico { 1137c33ecc6Sgerardnico $this->reset(); 1142c067407Sgerardnico global $ID; 1152c067407Sgerardnico $this->page = new Page($ID); 1162c067407Sgerardnico 1172c067407Sgerardnico } 118007225e5Sgerardnico 119007225e5Sgerardnico 120007225e5Sgerardnico /** 121007225e5Sgerardnico * Here the score is calculated 122007225e5Sgerardnico */ 123007225e5Sgerardnico public function document_end() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 124007225e5Sgerardnico { 125007225e5Sgerardnico /** 126f3748b38Sgerardnico * The exported object 127f3748b38Sgerardnico */ 128f3748b38Sgerardnico $statExport = $this->stats; 129f3748b38Sgerardnico 130f3748b38Sgerardnico /** 131007225e5Sgerardnico * The metadata 132007225e5Sgerardnico */ 133007225e5Sgerardnico global $ID; 134fa5961eaSgerardnico $dokuWikiMetadata = p_get_metadata($ID); 135007225e5Sgerardnico 136007225e5Sgerardnico /** 137f3748b38Sgerardnico * Edit author stats 138f3748b38Sgerardnico */ 139f3748b38Sgerardnico $changelog = new PageChangeLog($ID); 140f3748b38Sgerardnico $revs = $changelog->getRevisions(0, 10000); 141fa5961eaSgerardnico array_push($revs, $dokuWikiMetadata['last_change']['date']); 142f3748b38Sgerardnico $statExport[Analytics::EDITS_COUNT] = count($revs); 143f3748b38Sgerardnico foreach ($revs as $rev) { 1442128d419Sgerardnico 145ebdc69ceSgerardnico 146ebdc69ceSgerardnico /** 147ebdc69ceSgerardnico * Init the authors array 148ebdc69ceSgerardnico */ 149ebdc69ceSgerardnico if (!array_key_exists('authors', $statExport)) { 150ebdc69ceSgerardnico $statExport['authors'] = []; 151f3748b38Sgerardnico } 152ebdc69ceSgerardnico /** 153ebdc69ceSgerardnico * Analytics by users 154ebdc69ceSgerardnico */ 1552128d419Sgerardnico $info = $changelog->getRevisionInfo($rev); 1562128d419Sgerardnico if (is_array($info)) { 157ebdc69ceSgerardnico $user = "*"; 158ebdc69ceSgerardnico if (array_key_exists('user', $info)) { 159ebdc69ceSgerardnico $user = $info['user']; 160ebdc69ceSgerardnico } 161ebdc69ceSgerardnico if (!array_key_exists('authors', $statExport['authors'])) { 162ebdc69ceSgerardnico $statExport['authors'][$user] = 0; 163ebdc69ceSgerardnico } 164ebdc69ceSgerardnico $statExport['authors'][$user] += 1; 165f3748b38Sgerardnico } 1662128d419Sgerardnico } 167f3748b38Sgerardnico 168f3748b38Sgerardnico /** 169007225e5Sgerardnico * Word and chars count 170007225e5Sgerardnico * The word count does not take into account 171007225e5Sgerardnico * words with non-words characters such as < = 172007225e5Sgerardnico * Therefore the node and attribute are not taken in the count 173007225e5Sgerardnico */ 174007225e5Sgerardnico $text = rawWiki($ID); 175f3748b38Sgerardnico $statExport[Analytics::CHARS_COUNT] = strlen($text); 1767c33ecc6Sgerardnico $statExport[Analytics::WORDS_COUNT] = StringUtility::getWordCount($text); 177007225e5Sgerardnico 178007225e5Sgerardnico 179007225e5Sgerardnico /** 180007225e5Sgerardnico * Internal link distance summary calculation 181007225e5Sgerardnico */ 182007225e5Sgerardnico if (array_key_exists(Analytics::INTERNAL_LINK_DISTANCE, $statExport)) { 183007225e5Sgerardnico $linkLengths = $statExport[Analytics::INTERNAL_LINK_DISTANCE]; 184007225e5Sgerardnico unset($statExport[Analytics::INTERNAL_LINK_DISTANCE]); 185007225e5Sgerardnico $countBacklinks = count($linkLengths); 186007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = null; 187007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = null; 188007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = null; 189007225e5Sgerardnico if ($countBacklinks > 0) { 190007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = array_sum($linkLengths) / $countBacklinks; 191007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = max($linkLengths); 192007225e5Sgerardnico $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = min($linkLengths); 193007225e5Sgerardnico } 194007225e5Sgerardnico } 195007225e5Sgerardnico 196007225e5Sgerardnico /** 197007225e5Sgerardnico * Quality Report / Rules 198007225e5Sgerardnico */ 199007225e5Sgerardnico // The array that hold the results of the quality rules 200007225e5Sgerardnico $ruleResults = array(); 201007225e5Sgerardnico // The array that hold the quality score details 202007225e5Sgerardnico $qualityScores = array(); 203007225e5Sgerardnico 204007225e5Sgerardnico 205007225e5Sgerardnico /** 206007225e5Sgerardnico * No fixme 207007225e5Sgerardnico */ 208ebdc69ceSgerardnico if (array_key_exists(self::FIXME, $this->stats)) { 209007225e5Sgerardnico $fixmeCount = $this->stats[self::FIXME]; 210007225e5Sgerardnico $statExport[self::FIXME] = $fixmeCount == null ? 0 : $fixmeCount; 211007225e5Sgerardnico if ($fixmeCount != 0) { 212007225e5Sgerardnico $ruleResults[self::RULE_FIXME] = self::FAILED; 213007225e5Sgerardnico $qualityScores['no_' . self::FIXME] = 0; 214007225e5Sgerardnico } else { 215007225e5Sgerardnico $ruleResults[self::RULE_FIXME] = self::PASSED; 2167c33ecc6Sgerardnico $qualityScores['no_' . self::FIXME] = $this->getConf(self::CONF_QUALITY_SCORE_NO_FIXME, 1); 217007225e5Sgerardnico } 218ebdc69ceSgerardnico } 219007225e5Sgerardnico 220007225e5Sgerardnico /** 221007225e5Sgerardnico * A title should be present 222007225e5Sgerardnico */ 22308ca4f85Sgerardnico $titleScore = $this->getConf(self::CONF_QUALITY_SCORE_TITLE_PRESENT, 10); 224fa5961eaSgerardnico if (empty($this->analyticsMetadata[Analytics::TITLE])) { 225007225e5Sgerardnico $ruleResults[self::RULE_TITLE_PRESENT] = self::FAILED; 22608ca4f85Sgerardnico $ruleInfo[self::RULE_TITLE_PRESENT] = "Add a title in the frontmatter for {$titleScore} points"; 227fa5961eaSgerardnico $this->analyticsMetadata[Analytics::TITLE] = $dokuWikiMetadata[Analytics::TITLE]; 228007225e5Sgerardnico $qualityScores[self::RULE_TITLE_PRESENT] = 0; 229007225e5Sgerardnico } else { 2307c33ecc6Sgerardnico $qualityScores[self::RULE_TITLE_PRESENT] = $titleScore; 231007225e5Sgerardnico $ruleResults[self::RULE_TITLE_PRESENT] = self::PASSED; 232007225e5Sgerardnico } 233007225e5Sgerardnico 234007225e5Sgerardnico /** 235007225e5Sgerardnico * A description should be present 236007225e5Sgerardnico */ 23708ca4f85Sgerardnico $descScore = $this->getConf(self::CONF_QUALITY_SCORE_DESCRIPTION_PRESENT, 8); 238fa5961eaSgerardnico if (empty($this->analyticsMetadata[self::DESCRIPTION])) { 239007225e5Sgerardnico $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::FAILED; 24008ca4f85Sgerardnico $ruleInfo[self::RULE_DESCRIPTION_PRESENT] = "Add a description in the frontmatter for {$descScore} points"; 241fa5961eaSgerardnico $this->analyticsMetadata[self::DESCRIPTION] = $dokuWikiMetadata[self::DESCRIPTION]["abstract"]; 242007225e5Sgerardnico $qualityScores[self::RULE_DESCRIPTION_PRESENT] = 0; 243007225e5Sgerardnico } else { 2447c33ecc6Sgerardnico $qualityScores[self::RULE_DESCRIPTION_PRESENT] = $descScore; 245007225e5Sgerardnico $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::PASSED; 246007225e5Sgerardnico } 247007225e5Sgerardnico 248007225e5Sgerardnico /** 249007225e5Sgerardnico * A canonical should be present 250007225e5Sgerardnico */ 25108ca4f85Sgerardnico $canonicalScore = $this->getConf(self::CONF_QUALITY_SCORE_CANONICAL_PRESENT, 5); 252fa5961eaSgerardnico if (empty($this->analyticsMetadata[Page::CANONICAL_PROPERTY])) { 253f3748b38Sgerardnico global $conf; 254f3748b38Sgerardnico $root = $conf['start']; 255f3748b38Sgerardnico if ($ID != $root) { 256007225e5Sgerardnico $qualityScores[self::RULE_CANONICAL_PRESENT] = 0; 257007225e5Sgerardnico $ruleResults[self::RULE_CANONICAL_PRESENT] = self::FAILED; 25808ca4f85Sgerardnico $ruleInfo[self::RULE_CANONICAL_PRESENT] = "Add a canonical in the frontmatter for {$canonicalScore} points"; 259f3748b38Sgerardnico } 260007225e5Sgerardnico } else { 2617c33ecc6Sgerardnico $qualityScores[self::RULE_CANONICAL_PRESENT] = $canonicalScore; 262007225e5Sgerardnico $ruleResults[self::RULE_CANONICAL_PRESENT] = self::PASSED; 263007225e5Sgerardnico } 264007225e5Sgerardnico 265007225e5Sgerardnico /** 266007225e5Sgerardnico * Outline / Header structure 267007225e5Sgerardnico */ 268007225e5Sgerardnico $treeError = 0; 269007225e5Sgerardnico $headersCount = 0; 270007225e5Sgerardnico if (array_key_exists(Analytics::HEADER_POSITION, $this->stats)) { 271007225e5Sgerardnico $headersCount = count($this->stats[Analytics::HEADER_POSITION]); 272007225e5Sgerardnico unset($statExport[Analytics::HEADER_POSITION]); 273007225e5Sgerardnico for ($i = 1; $i < $headersCount; $i++) { 274ebdc69ceSgerardnico $currentHeaderLevel = $this->stats[self::HEADER_STRUCT][$i]; 275ebdc69ceSgerardnico $previousHeaderLevel = $this->stats[self::HEADER_STRUCT][$i - 1]; 276007225e5Sgerardnico if ($currentHeaderLevel - $previousHeaderLevel > 1) { 277007225e5Sgerardnico $treeError += 1; 278007225e5Sgerardnico $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "The " . $i . " header (h" . $currentHeaderLevel . ") has a level bigger than its precedent (" . $previousHeaderLevel . ")"; 279007225e5Sgerardnico } 280007225e5Sgerardnico } 281ebdc69ceSgerardnico unset($statExport[self::HEADER_STRUCT]); 282007225e5Sgerardnico } 283eee76a3dSgerardnico $outlinePoints = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE, 3); 284007225e5Sgerardnico if ($treeError > 0 || $headersCount == 0) { 285007225e5Sgerardnico $qualityScores['correct_outline'] = 0; 286007225e5Sgerardnico $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::FAILED; 287007225e5Sgerardnico if ($headersCount == 0) { 288eee76a3dSgerardnico $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "Add headings to create a document outline for {$outlinePoints} points"; 289007225e5Sgerardnico } 290007225e5Sgerardnico } else { 291eee76a3dSgerardnico $qualityScores['correct_outline'] = $outlinePoints; 292007225e5Sgerardnico $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::PASSED; 293007225e5Sgerardnico } 294007225e5Sgerardnico 295007225e5Sgerardnico 296007225e5Sgerardnico /** 297007225e5Sgerardnico * Document length 298007225e5Sgerardnico */ 299007225e5Sgerardnico $minimalWordCount = 50; 300007225e5Sgerardnico $maximalWordCount = 1500; 301007225e5Sgerardnico $correctContentLength = true; 30208ca4f85Sgerardnico $correctLengthScore = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_CONTENT, 10); 30308ca4f85Sgerardnico $missingWords = $minimalWordCount - $statExport[Analytics::WORDS_COUNT]; 30408ca4f85Sgerardnico if ($missingWords > 0) { 305007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MINIMAL] = self::FAILED; 306007225e5Sgerardnico $correctContentLength = false; 30708ca4f85Sgerardnico $ruleInfo[self::RULE_WORDS_MINIMAL] = "Add {$missingWords} words to get {$correctLengthScore} points"; 308007225e5Sgerardnico } else { 309007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MINIMAL] = self::PASSED; 310007225e5Sgerardnico } 31108ca4f85Sgerardnico $tooMuchWords = $statExport[Analytics::WORDS_COUNT] - $maximalWordCount; 31208ca4f85Sgerardnico if ($tooMuchWords > 0) { 313007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MAXIMAL] = self::FAILED; 31408ca4f85Sgerardnico $ruleInfo[self::RULE_WORDS_MAXIMAL] = "Delete {$tooMuchWords} words to get {$correctLengthScore} points"; 315007225e5Sgerardnico $correctContentLength = false; 316007225e5Sgerardnico } else { 317007225e5Sgerardnico $ruleResults[self::RULE_WORDS_MAXIMAL] = self::PASSED; 318007225e5Sgerardnico } 319007225e5Sgerardnico if ($correctContentLength) { 32008ca4f85Sgerardnico $qualityScores['correct_content_length'] = $correctLengthScore; 321007225e5Sgerardnico } else { 322007225e5Sgerardnico $qualityScores['correct_content_length'] = 0; 323007225e5Sgerardnico } 324007225e5Sgerardnico 325007225e5Sgerardnico 326007225e5Sgerardnico /** 327007225e5Sgerardnico * Average Number of words by header section to text ratio 328007225e5Sgerardnico */ 329007225e5Sgerardnico $headers = $this->stats[Analytics::HEADERS_COUNT]; 330007225e5Sgerardnico if ($headers != null) { 331007225e5Sgerardnico $headerCount = array_sum($headers); 332007225e5Sgerardnico $headerCount--; // h1 is supposed to have no words 333007225e5Sgerardnico if ($headerCount > 0) { 334007225e5Sgerardnico 335007225e5Sgerardnico $avgWordsCountBySection = round($this->stats[Analytics::WORDS_COUNT] / $headerCount); 336007225e5Sgerardnico $statExport['word_section_count']['avg'] = $avgWordsCountBySection; 337007225e5Sgerardnico 338007225e5Sgerardnico /** 339007225e5Sgerardnico * Min words by header section 340007225e5Sgerardnico */ 341007225e5Sgerardnico $wordsByHeaderMin = 20; 342007225e5Sgerardnico /** 343007225e5Sgerardnico * Max words by header section 344007225e5Sgerardnico */ 345007225e5Sgerardnico $wordsByHeaderMax = 300; 346007225e5Sgerardnico $correctAverageWordsBySection = true; 347007225e5Sgerardnico if ($avgWordsCountBySection < $wordsByHeaderMin) { 348007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::FAILED; 349007225e5Sgerardnico $correctAverageWordsBySection = false; 35008ca4f85Sgerardnico $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = "The number of words by section is less than {$wordsByHeaderMin}"; 351007225e5Sgerardnico } else { 352007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::PASSED; 353007225e5Sgerardnico } 354007225e5Sgerardnico if ($avgWordsCountBySection > $wordsByHeaderMax) { 355007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::FAILED; 356007225e5Sgerardnico $correctAverageWordsBySection = false; 357007225e5Sgerardnico $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = "The number of words by section is more than {$wordsByHeaderMax}"; 358007225e5Sgerardnico } else { 359007225e5Sgerardnico $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::PASSED; 360007225e5Sgerardnico } 361007225e5Sgerardnico if ($correctAverageWordsBySection) { 362007225e5Sgerardnico $qualityScores['correct_word_avg_by_section'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE, 10); 363007225e5Sgerardnico } else { 364007225e5Sgerardnico $qualityScores['correct_word_avg_by_section'] = 0; 365007225e5Sgerardnico } 366007225e5Sgerardnico 367007225e5Sgerardnico } 368007225e5Sgerardnico } 369007225e5Sgerardnico 370007225e5Sgerardnico /** 371007225e5Sgerardnico * Internal Backlinks rule 372007225e5Sgerardnico * 373007225e5Sgerardnico * If a page is a low quality page, if the process run 374007225e5Sgerardnico * anonymous, we will not see all {@link ft_backlinks()} 375007225e5Sgerardnico * we use then the index directly to avoid confusion 376007225e5Sgerardnico */ 377007225e5Sgerardnico $backlinks = idx_get_indexer()->lookupKey('relation_references', $ID); 378007225e5Sgerardnico $countBacklinks = count($backlinks); 379007225e5Sgerardnico $statExport[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks; 380d262537cSgerardnico $backlinkScore = $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR, 1); 381007225e5Sgerardnico if ($countBacklinks == 0) { 382007225e5Sgerardnico $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = 0; 383007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::FAILED; 384d262537cSgerardnico $ruleInfo[self::RULE_INTERNAL_BACKLINKS_MIN] = "Add backlinks for {$backlinkScore} point each"; 385007225e5Sgerardnico } else { 386d262537cSgerardnico 387d262537cSgerardnico $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks * $backlinkScore; 388007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::PASSED; 389007225e5Sgerardnico } 390007225e5Sgerardnico 391007225e5Sgerardnico /** 392007225e5Sgerardnico * Internal links 393007225e5Sgerardnico */ 394007225e5Sgerardnico $internalLinksCount = $this->stats[Analytics::INTERNAL_LINKS_COUNT]; 395d262537cSgerardnico $internalLinkScore = $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR, 1); 396007225e5Sgerardnico if ($internalLinksCount == 0) { 397007225e5Sgerardnico $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = 0; 398007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::FAILED; 399d262537cSgerardnico $ruleInfo[self::RULE_INTERNAL_LINKS_MIN] = "Add internal links for {$internalLinkScore} point each"; 400007225e5Sgerardnico } else { 401007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::PASSED; 402d262537cSgerardnico $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = $countBacklinks * $internalLinkScore; 403007225e5Sgerardnico } 404007225e5Sgerardnico 405007225e5Sgerardnico /** 406007225e5Sgerardnico * Broken Links 407007225e5Sgerardnico */ 408d262537cSgerardnico $brokenLinkScore = $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR, 2); 409ebdc69ceSgerardnico $brokenLinksCount = 0; 410ebdc69ceSgerardnico if (array_key_exists(Analytics::INTERNAL_LINKS_BROKEN_COUNT, $this->stats)) { 411007225e5Sgerardnico $brokenLinksCount = $this->stats[Analytics::INTERNAL_LINKS_BROKEN_COUNT]; 412ebdc69ceSgerardnico } 413007225e5Sgerardnico if ($brokenLinksCount > 2) { 414007225e5Sgerardnico $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = 0; 415007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::FAILED; 416d262537cSgerardnico $ruleInfo[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = "Delete the {$brokenLinksCount} broken links and add {$brokenLinkScore} points"; 417007225e5Sgerardnico } else { 418d262537cSgerardnico $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = $brokenLinkScore; 419007225e5Sgerardnico $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::PASSED; 420007225e5Sgerardnico } 421007225e5Sgerardnico 422007225e5Sgerardnico /** 423007225e5Sgerardnico * Changes, the more changes the better 424007225e5Sgerardnico */ 425ebdc69ceSgerardnico $qualityScores[Analytics::EDITS_COUNT] = $statExport[Analytics::EDITS_COUNT] * $this->getConf(self::CONF_QUALITY_SCORE_CHANGES_FACTOR, 0.25); 426007225e5Sgerardnico 427007225e5Sgerardnico 428007225e5Sgerardnico /** 429007225e5Sgerardnico * Quality Score 430007225e5Sgerardnico */ 431007225e5Sgerardnico ksort($qualityScores); 432007225e5Sgerardnico $qualityScoring = array(); 43308ca4f85Sgerardnico $qualityScoring[self::SCORE] = array_sum($qualityScores); 434007225e5Sgerardnico $qualityScoring["scores"] = $qualityScores; 435007225e5Sgerardnico 436007225e5Sgerardnico 437007225e5Sgerardnico /** 438007225e5Sgerardnico * The rule that if broken will set the quality level to low 439007225e5Sgerardnico */ 440007225e5Sgerardnico $brokenRules = array(); 441007225e5Sgerardnico foreach ($ruleResults as $ruleName => $ruleResult) { 442007225e5Sgerardnico if ($ruleResult == self::FAILED) { 443007225e5Sgerardnico $brokenRules[] = $ruleName; 444007225e5Sgerardnico } 445007225e5Sgerardnico } 446007225e5Sgerardnico $ruleErrorCount = sizeof($brokenRules); 447007225e5Sgerardnico if ($ruleErrorCount > 0) { 448007225e5Sgerardnico $qualityResult = $ruleErrorCount . " quality rules errors"; 449007225e5Sgerardnico } else { 450007225e5Sgerardnico $qualityResult = "All quality rules passed"; 451007225e5Sgerardnico } 452007225e5Sgerardnico 453007225e5Sgerardnico /** 454fa5961eaSgerardnico * Low level Computation 455007225e5Sgerardnico */ 456007225e5Sgerardnico $mandatoryRules = preg_split("/,/", $this->getConf(self::CONF_MANDATORY_QUALITY_RULES)); 457007225e5Sgerardnico $mandatoryRulesBroken = []; 458007225e5Sgerardnico foreach ($mandatoryRules as $lowLevelRule) { 459007225e5Sgerardnico if (in_array($lowLevelRule, $brokenRules)) { 460007225e5Sgerardnico $mandatoryRulesBroken[] = $lowLevelRule; 461007225e5Sgerardnico } 462007225e5Sgerardnico } 463fa5961eaSgerardnico /** 4646f847fc2Sgerardnico * If the low level is not set manually 465fa5961eaSgerardnico */ 466fa5961eaSgerardnico if (empty($this->analyticsMetadata[Page::LOW_QUALITY_PAGE_INDICATOR])) { 467007225e5Sgerardnico $lowLevel = false; 468007225e5Sgerardnico if (sizeof($mandatoryRulesBroken) > 0) { 469007225e5Sgerardnico $lowLevel = true; 470007225e5Sgerardnico } 4719b9e6d1fSgerardnico } else { 4726f847fc2Sgerardnico $lowLevel = filter_var($this->analyticsMetadata[Page::LOW_QUALITY_PAGE_INDICATOR], FILTER_VALIDATE_BOOLEAN); 4739b9e6d1fSgerardnico } 474*5f891b7eSNickeau if (!$this->page->isBar()) { 4756f847fc2Sgerardnico $this->page->setLowQualityIndicator($lowLevel); 476*5f891b7eSNickeau } else { 477*5f891b7eSNickeau $this->page->setLowQualityIndicator(false); 478*5f891b7eSNickeau } 479007225e5Sgerardnico 480007225e5Sgerardnico /** 481007225e5Sgerardnico * Building the quality object in order 482007225e5Sgerardnico */ 483f3748b38Sgerardnico $quality[Analytics::LOW] = $lowLevel; 484007225e5Sgerardnico if (sizeof($mandatoryRulesBroken) > 0) { 485007225e5Sgerardnico ksort($mandatoryRulesBroken); 486722648eaSgerardnico $quality[Analytics::FAILED_MANDATORY_RULES] = $mandatoryRulesBroken; 487007225e5Sgerardnico } 48808ca4f85Sgerardnico $quality[self::SCORING] = $qualityScoring; 489f3748b38Sgerardnico $quality[Analytics::RULES][self::RESULT] = $qualityResult; 490007225e5Sgerardnico if (!empty($ruleInfo)) { 491f3748b38Sgerardnico $quality[Analytics::RULES]["info"] = $ruleInfo; 492007225e5Sgerardnico } 493007225e5Sgerardnico 494007225e5Sgerardnico ksort($ruleResults); 495f3748b38Sgerardnico $quality[Analytics::RULES][Analytics::DETAILS] = $ruleResults; 496007225e5Sgerardnico 497007225e5Sgerardnico /** 498007225e5Sgerardnico * Metadata 499007225e5Sgerardnico */ 500fa5961eaSgerardnico $title = $dokuWikiMetadata['title']; 501fa5961eaSgerardnico $this->analyticsMetadata[Analytics::TITLE] = $title; 502fa5961eaSgerardnico if ($title != @$dokuWikiMetadata['h1']) { 503fa5961eaSgerardnico $this->analyticsMetadata[Analytics::H1] = $dokuWikiMetadata['h1']; 504c42a1196Sgerardnico } 505fa5961eaSgerardnico $timestampCreation = $dokuWikiMetadata['date']['created']; 506fa5961eaSgerardnico $this->analyticsMetadata[self::DATE_CREATED] = date('Y-m-d h:i:s', $timestampCreation); 507fa5961eaSgerardnico $timestampModification = $dokuWikiMetadata['date']['modified']; 508fa5961eaSgerardnico $this->analyticsMetadata[Analytics::DATE_MODIFIED] = date('Y-m-d h:i:s', $timestampModification); 509fa5961eaSgerardnico $this->analyticsMetadata['age_creation'] = round((time() - $timestampCreation) / 60 / 60 / 24); 510fa5961eaSgerardnico $this->analyticsMetadata['age_modification'] = round((time() - $timestampModification) / 60 / 60 / 24); 511007225e5Sgerardnico 512007225e5Sgerardnico 513007225e5Sgerardnico /** 514007225e5Sgerardnico * Building the Top JSON in order 515007225e5Sgerardnico */ 516007225e5Sgerardnico global $ID; 5172c067407Sgerardnico $finalStats = array(); 5182c067407Sgerardnico $finalStats["id"] = $ID; 519c42a1196Sgerardnico $finalStats["date"] = date('Y-m-d H:i:s', time()); 520fa5961eaSgerardnico $finalStats['metadata'] = $this->analyticsMetadata; 521007225e5Sgerardnico ksort($statExport); 5222c067407Sgerardnico $finalStats[Analytics::STATISTICS] = $statExport; 5232c067407Sgerardnico $finalStats[Analytics::QUALITY] = $quality; // Quality after the sort to get them at the end 524007225e5Sgerardnico 525007225e5Sgerardnico 526007225e5Sgerardnico /** 527007225e5Sgerardnico * The result can be seen with 528007225e5Sgerardnico * doku.php?id=somepage&do=export_combo_analysis 5297c33ecc6Sgerardnico * 5307c33ecc6Sgerardnico * Set the header temporarily for the export.php file 531007225e5Sgerardnico */ 5327c33ecc6Sgerardnico p_set_metadata( 5337c33ecc6Sgerardnico $ID, 5347c33ecc6Sgerardnico array("format" => array("combo_" . $this->getPluginComponent() => array("Content-Type" => 'application/json'))), 5357c33ecc6Sgerardnico false, 5367c33ecc6Sgerardnico false // Persistence is not needed, this is just in case this is an export 5377c33ecc6Sgerardnico ); 5382c067407Sgerardnico $json_encoded = json_encode($finalStats, JSON_PRETTY_PRINT); 539007225e5Sgerardnico 5407c33ecc6Sgerardnico $this->page->saveAnalytics($finalStats); 541007225e5Sgerardnico $this->doc .= $json_encoded; 542007225e5Sgerardnico 543007225e5Sgerardnico } 544007225e5Sgerardnico 545007225e5Sgerardnico /** 546007225e5Sgerardnico */ 547007225e5Sgerardnico public function getFormat() 548007225e5Sgerardnico { 549007225e5Sgerardnico return Analytics::RENDERER_FORMAT; 550007225e5Sgerardnico } 551007225e5Sgerardnico 552007225e5Sgerardnico public function internallink($id, $name = null, $search = null, $returnonly = false, $linktype = 'content') 553007225e5Sgerardnico { 554007225e5Sgerardnico 5559b9e6d1fSgerardnico $link = new LinkUtility($id); 5569b9e6d1fSgerardnico $link->setType(LinkUtility::TYPE_INTERNAL); 5579b9e6d1fSgerardnico $link->processLinkStats($this->stats); 558007225e5Sgerardnico 559007225e5Sgerardnico } 560007225e5Sgerardnico 561007225e5Sgerardnico public function externallink($url, $name = null) 562007225e5Sgerardnico { 563ef295d81Sgerardnico $link = new LinkUtility($url); 564ef295d81Sgerardnico $link->setType(LinkUtility::TYPE_EXTERNAL); 565ef295d81Sgerardnico if ($name != null) { 566ef295d81Sgerardnico $link->setName($name); 567ef295d81Sgerardnico } 568ef295d81Sgerardnico $link->processLinkStats($this->stats); 569007225e5Sgerardnico } 570007225e5Sgerardnico 571007225e5Sgerardnico public function header($text, $level, $pos) 572007225e5Sgerardnico { 573ebdc69ceSgerardnico if (!array_key_exists(Analytics::HEADERS_COUNT, $this->stats)) { 574ebdc69ceSgerardnico $this->stats[Analytics::HEADERS_COUNT] = []; 575ebdc69ceSgerardnico } 576ebdc69ceSgerardnico $heading = 'h' . $level; 577ebdc69ceSgerardnico if (!array_key_exists( 578ebdc69ceSgerardnico $heading, 579ebdc69ceSgerardnico $this->stats[Analytics::HEADERS_COUNT])) { 580ebdc69ceSgerardnico $this->stats[Analytics::HEADERS_COUNT][$heading] = 0; 581ebdc69ceSgerardnico } 582ebdc69ceSgerardnico $this->stats[Analytics::HEADERS_COUNT][$heading]++; 583ebdc69ceSgerardnico 584007225e5Sgerardnico $this->headerId++; 585ebdc69ceSgerardnico $this->stats[Analytics::HEADER_POSITION][$this->headerId] = $heading; 586ebdc69ceSgerardnico 587ebdc69ceSgerardnico /** 588ebdc69ceSgerardnico * Store the level of each heading 589ebdc69ceSgerardnico * They should only go from low to highest value 590ebdc69ceSgerardnico * for a good outline 591ebdc69ceSgerardnico */ 592ebdc69ceSgerardnico if (!array_key_exists(Analytics::HEADERS_COUNT, $this->stats)) { 593ebdc69ceSgerardnico $this->stats[self::HEADER_STRUCT] = []; 594ebdc69ceSgerardnico } 595ebdc69ceSgerardnico $this->stats[self::HEADER_STRUCT][] = $level; 596007225e5Sgerardnico 597007225e5Sgerardnico } 598007225e5Sgerardnico 599007225e5Sgerardnico public function smiley($smiley) 600007225e5Sgerardnico { 601007225e5Sgerardnico if ($smiley == 'FIXME') $this->stats[self::FIXME]++; 602007225e5Sgerardnico } 603007225e5Sgerardnico 604007225e5Sgerardnico public function linebreak() 605007225e5Sgerardnico { 606007225e5Sgerardnico if (!$this->tableopen) { 607007225e5Sgerardnico $this->stats['linebreak']++; 608007225e5Sgerardnico } 609007225e5Sgerardnico } 610007225e5Sgerardnico 611007225e5Sgerardnico public function table_open($maxcols = null, $numrows = null, $pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 612007225e5Sgerardnico { 613007225e5Sgerardnico $this->tableopen = true; 614007225e5Sgerardnico } 615007225e5Sgerardnico 616007225e5Sgerardnico public function table_close($pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 617007225e5Sgerardnico { 618007225e5Sgerardnico $this->tableopen = false; 619007225e5Sgerardnico } 620007225e5Sgerardnico 621007225e5Sgerardnico public function hr() 622007225e5Sgerardnico { 623007225e5Sgerardnico $this->stats['hr']++; 624007225e5Sgerardnico } 625007225e5Sgerardnico 626007225e5Sgerardnico public function quote_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 627007225e5Sgerardnico { 628007225e5Sgerardnico $this->stats['quote_count']++; 629007225e5Sgerardnico $this->quotelevel++; 630007225e5Sgerardnico $this->stats['quote_nest'] = max($this->quotelevel, $this->stats['quote_nest']); 631007225e5Sgerardnico } 632007225e5Sgerardnico 633007225e5Sgerardnico public function quote_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 634007225e5Sgerardnico { 635007225e5Sgerardnico $this->quotelevel--; 636007225e5Sgerardnico } 637007225e5Sgerardnico 638007225e5Sgerardnico public function strong_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 639007225e5Sgerardnico { 640007225e5Sgerardnico $this->formattingBracket++; 641007225e5Sgerardnico } 642007225e5Sgerardnico 643007225e5Sgerardnico public function strong_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 644007225e5Sgerardnico { 645007225e5Sgerardnico $this->formattingBracket--; 646007225e5Sgerardnico } 647007225e5Sgerardnico 648007225e5Sgerardnico public function emphasis_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 649007225e5Sgerardnico { 650007225e5Sgerardnico $this->formattingBracket++; 651007225e5Sgerardnico } 652007225e5Sgerardnico 653007225e5Sgerardnico public function emphasis_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 654007225e5Sgerardnico { 655007225e5Sgerardnico $this->formattingBracket--; 656007225e5Sgerardnico } 657007225e5Sgerardnico 658007225e5Sgerardnico public function underline_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 659007225e5Sgerardnico { 660007225e5Sgerardnico $this->formattingBracket++; 661007225e5Sgerardnico } 662007225e5Sgerardnico 663007225e5Sgerardnico public function underline_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps 664007225e5Sgerardnico { 665007225e5Sgerardnico $this->formattingBracket--; 666007225e5Sgerardnico } 667007225e5Sgerardnico 668007225e5Sgerardnico public function cdata($text) 669007225e5Sgerardnico { 670007225e5Sgerardnico 671007225e5Sgerardnico /** 672007225e5Sgerardnico * It seems that you receive cdata 673007225e5Sgerardnico * when emphasis_open / underline_open / strong_open 674007225e5Sgerardnico * Stats are not for them 675007225e5Sgerardnico */ 676007225e5Sgerardnico if (!$this->formattingBracket) return; 677007225e5Sgerardnico 678007225e5Sgerardnico $this->plainTextId++; 679007225e5Sgerardnico 680007225e5Sgerardnico /** 681007225e5Sgerardnico * Length 682007225e5Sgerardnico */ 683007225e5Sgerardnico $len = strlen($text); 684007225e5Sgerardnico $this->stats[self::PLAINTEXT][$this->plainTextId]['len'] = $len; 685007225e5Sgerardnico 686007225e5Sgerardnico 687007225e5Sgerardnico /** 688007225e5Sgerardnico * Multi-formatting 689007225e5Sgerardnico */ 690007225e5Sgerardnico if ($this->formattingBracket > 1) { 691007225e5Sgerardnico $numberOfFormats = 1 * ($this->formattingBracket - 1); 692007225e5Sgerardnico $this->stats[self::PLAINTEXT][$this->plainTextId]['multiformat'] += $numberOfFormats; 693007225e5Sgerardnico } 694007225e5Sgerardnico 695007225e5Sgerardnico /** 696007225e5Sgerardnico * Total 697007225e5Sgerardnico */ 698007225e5Sgerardnico $this->stats[self::PLAINTEXT][0] += $len; 699007225e5Sgerardnico } 700007225e5Sgerardnico 701007225e5Sgerardnico public function internalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null) 702007225e5Sgerardnico { 703007225e5Sgerardnico $this->stats[Analytics::INTERNAL_MEDIAS_COUNT]++; 704007225e5Sgerardnico } 705007225e5Sgerardnico 706007225e5Sgerardnico public function externalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null) 707007225e5Sgerardnico { 708007225e5Sgerardnico $this->stats[Analytics::EXTERNAL_MEDIAS]++; 709007225e5Sgerardnico } 710007225e5Sgerardnico 711007225e5Sgerardnico public function reset() 712007225e5Sgerardnico { 713007225e5Sgerardnico $this->stats = array(); 714fa5961eaSgerardnico $this->analyticsMetadata = array(); 715007225e5Sgerardnico $this->headerId = 0; 716007225e5Sgerardnico } 717007225e5Sgerardnico 718007225e5Sgerardnico public function setMeta($key, $value) 719007225e5Sgerardnico { 720fa5961eaSgerardnico $this->analyticsMetadata[$key] = $value; 721007225e5Sgerardnico } 722007225e5Sgerardnico 723007225e5Sgerardnico 724007225e5Sgerardnico} 725007225e5Sgerardnico 726