xref: /plugin/combo/renderer/analytics.php (revision ef295d81c8f9cce3d7e7673ca8909fdd45b2e219)
1007225e5Sgerardnico<?php
2007225e5Sgerardnico
3007225e5Sgerardnico
4007225e5Sgerardnicouse ComboStrap\Analytics;
5007225e5Sgerardnicouse ComboStrap\LinkUtility;
67c33ecc6Sgerardnicouse ComboStrap\StringUtility;
77c33ecc6Sgerardnico
871f916b9Sgerardnicouse ComboStrap\Page;
9007225e5Sgerardnicouse dokuwiki\ChangeLog\PageChangeLog;
10007225e5Sgerardnico
11007225e5Sgerardnicorequire_once(__DIR__ . '/../class/LowQualityPage.php');
12007225e5Sgerardnicorequire_once(__DIR__ . '/../class/Analytics.php');
13007225e5Sgerardnico
14007225e5Sgerardnico
15007225e5Sgerardnico/**
16007225e5Sgerardnico * A analysis Renderer that exports stats/quality/metadata in a json format
17007225e5Sgerardnico * You can export the data with
18007225e5Sgerardnico * doku.php?id=somepage&do=export_combo_analytics
19007225e5Sgerardnico */
20007225e5Sgerardnicoclass renderer_plugin_combo_analytics extends Doku_Renderer
21007225e5Sgerardnico{
227c33ecc6Sgerardnico
23007225e5Sgerardnico    const DATE_CREATED = 'date_created';
24007225e5Sgerardnico    const PLAINTEXT = 'formatted';
25007225e5Sgerardnico    const RESULT = "result";
26007225e5Sgerardnico    const DESCRIPTION = "description";
27007225e5Sgerardnico    const PASSED = "Passed";
28007225e5Sgerardnico    const FAILED = "Failed";
29007225e5Sgerardnico    const FIXME = 'fixme';
30007225e5Sgerardnico
31007225e5Sgerardnico    /**
32007225e5Sgerardnico     * Rules key
33007225e5Sgerardnico     */
34007225e5Sgerardnico    const RULE_WORDS_MINIMAL = 'words_min';
35007225e5Sgerardnico    const RULE_OUTLINE_STRUCTURE = "outline_structure";
36007225e5Sgerardnico    const RULE_INTERNAL_BACKLINKS_MIN = 'internal_backlinks_min';
37007225e5Sgerardnico    const RULE_WORDS_MAXIMAL = "words_max";
38007225e5Sgerardnico    const RULE_AVERAGE_WORDS_BY_SECTION_MIN = 'words_by_section_avg_min';
39007225e5Sgerardnico    const RULE_AVERAGE_WORDS_BY_SECTION_MAX = 'words_by_section_avg_max';
40007225e5Sgerardnico    const RULE_INTERNAL_LINKS_MIN = 'internal_links_min';
41007225e5Sgerardnico    const RULE_INTERNAL_BROKEN_LINKS_MAX = 'internal_links_broken_max';
42007225e5Sgerardnico    const RULE_DESCRIPTION_PRESENT = 'description_present';
43007225e5Sgerardnico    const RULE_FIXME = "fixme_min";
44007225e5Sgerardnico    const RULE_TITLE_PRESENT = "title_present";
45007225e5Sgerardnico    const RULE_CANONICAL_PRESENT = "canonical_present";
46aa3cb38fSgerardnico    const QUALITY_RULES = [
47aa3cb38fSgerardnico        self::RULE_CANONICAL_PRESENT,
48aa3cb38fSgerardnico        self::RULE_DESCRIPTION_PRESENT,
49aa3cb38fSgerardnico        self::RULE_FIXME,
50aa3cb38fSgerardnico        self::RULE_INTERNAL_BACKLINKS_MIN,
51aa3cb38fSgerardnico        self::RULE_INTERNAL_BROKEN_LINKS_MAX,
52aa3cb38fSgerardnico        self::RULE_INTERNAL_LINKS_MIN,
53aa3cb38fSgerardnico        self::RULE_OUTLINE_STRUCTURE,
54aa3cb38fSgerardnico        self::RULE_TITLE_PRESENT,
55aa3cb38fSgerardnico        self::RULE_WORDS_MINIMAL,
56aa3cb38fSgerardnico        self::RULE_WORDS_MAXIMAL,
57aa3cb38fSgerardnico        self::RULE_AVERAGE_WORDS_BY_SECTION_MIN,
58aa3cb38fSgerardnico        self::RULE_AVERAGE_WORDS_BY_SECTION_MAX
59aa3cb38fSgerardnico    ];
60007225e5Sgerardnico
61007225e5Sgerardnico    /**
62007225e5Sgerardnico     * The default man
63007225e5Sgerardnico     */
64007225e5Sgerardnico    const CONF_MANDATORY_QUALITY_RULES_DEFAULT_VALUE = [
65007225e5Sgerardnico        self::RULE_WORDS_MINIMAL,
66007225e5Sgerardnico        self::RULE_INTERNAL_BACKLINKS_MIN,
67007225e5Sgerardnico        self::RULE_INTERNAL_LINKS_MIN
68007225e5Sgerardnico    ];
69007225e5Sgerardnico    const CONF_MANDATORY_QUALITY_RULES = "mandatoryQualityRules";
70007225e5Sgerardnico
71007225e5Sgerardnico    /**
72007225e5Sgerardnico     * Quality Score factors
73007225e5Sgerardnico     * They are used to calculate the score
74007225e5Sgerardnico     */
75007225e5Sgerardnico    const CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR = 'qualityScoreInternalBacklinksFactor';
76007225e5Sgerardnico    const CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR = 'qualityScoreInternalLinksFactor';
77007225e5Sgerardnico    const CONF_QUALITY_SCORE_TITLE_PRESENT = 'qualityScoreTitlePresent';
78007225e5Sgerardnico    const CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE = 'qualityScoreCorrectOutline';
79007225e5Sgerardnico    const CONF_QUALITY_SCORE_CORRECT_CONTENT = 'qualityScoreCorrectContentLength';
80007225e5Sgerardnico    const CONF_QUALITY_SCORE_NO_FIXME = 'qualityScoreNoFixMe';
81007225e5Sgerardnico    const CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE = 'qualityScoreCorrectWordSectionAvg';
82007225e5Sgerardnico    const CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR = 'qualityScoreNoBrokenLinks';
83007225e5Sgerardnico    const CONF_QUALITY_SCORE_CHANGES_FACTOR = 'qualityScoreChangesFactor';
84007225e5Sgerardnico    const CONF_QUALITY_SCORE_DESCRIPTION_PRESENT = 'qualityScoreDescriptionPresent';
85007225e5Sgerardnico    const CONF_QUALITY_SCORE_CANONICAL_PRESENT = 'qualityScoreCanonicalPresent';
8608ca4f85Sgerardnico    const SCORING = "scoring";
8708ca4f85Sgerardnico    const SCORE = "score";
88007225e5Sgerardnico
89007225e5Sgerardnico
90aa3cb38fSgerardnico
91007225e5Sgerardnico    /**
92007225e5Sgerardnico     * The processing data
93007225e5Sgerardnico     * that should be {@link  renderer_plugin_combo_analysis::reset()}
94007225e5Sgerardnico     */
95007225e5Sgerardnico    public $stats = array(); // the stats
96007225e5Sgerardnico    protected $metadata = array(); // the metadata
97007225e5Sgerardnico    protected $headerId = 0; // the id of the header on the page (first, second, ...)
98007225e5Sgerardnico
99007225e5Sgerardnico    /**
100007225e5Sgerardnico     * Don't known this variable ?
101007225e5Sgerardnico     */
102007225e5Sgerardnico    protected $quotelevel = 0;
103007225e5Sgerardnico    protected $formattingBracket = 0;
104007225e5Sgerardnico    protected $tableopen = false;
105007225e5Sgerardnico    private $plainTextId = 0;
1062c067407Sgerardnico    /**
1072c067407Sgerardnico     * @var Page
1082c067407Sgerardnico     */
1092c067407Sgerardnico    private $page;
1102c067407Sgerardnico
1112c067407Sgerardnico    public function document_start()
1122c067407Sgerardnico    {
1137c33ecc6Sgerardnico        $this->reset();
1142c067407Sgerardnico        global $ID;
1152c067407Sgerardnico        $this->page = new Page($ID);
1162c067407Sgerardnico
1172c067407Sgerardnico    }
118007225e5Sgerardnico
119007225e5Sgerardnico
120007225e5Sgerardnico    /**
121007225e5Sgerardnico     * Here the score is calculated
122007225e5Sgerardnico     */
123007225e5Sgerardnico    public function document_end() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
124007225e5Sgerardnico    {
125007225e5Sgerardnico        /**
126f3748b38Sgerardnico         * The exported object
127f3748b38Sgerardnico         */
128f3748b38Sgerardnico        $statExport = $this->stats;
129f3748b38Sgerardnico
130f3748b38Sgerardnico        /**
131007225e5Sgerardnico         * The metadata
132007225e5Sgerardnico         */
133007225e5Sgerardnico        global $ID;
134007225e5Sgerardnico        $meta = p_get_metadata($ID);
135007225e5Sgerardnico
136007225e5Sgerardnico        /**
137f3748b38Sgerardnico         * Edit author stats
138f3748b38Sgerardnico         */
139f3748b38Sgerardnico        $changelog = new PageChangeLog($ID);
140f3748b38Sgerardnico        $revs = $changelog->getRevisions(0, 10000);
141f3748b38Sgerardnico        array_push($revs, $meta['last_change']['date']);
142f3748b38Sgerardnico        $statExport[Analytics::EDITS_COUNT] = count($revs);
143f3748b38Sgerardnico        foreach ($revs as $rev) {
144f3748b38Sgerardnico            $info = $changelog->getRevisionInfo($rev);
145f3748b38Sgerardnico            if ($info['user']) {
146f3748b38Sgerardnico                $statExport['authors'][$info['user']] += 1;
147f3748b38Sgerardnico            } else {
148f3748b38Sgerardnico                $statExport['authors']['*'] += 1;
149f3748b38Sgerardnico            }
150f3748b38Sgerardnico        }
151f3748b38Sgerardnico
152f3748b38Sgerardnico        /**
153007225e5Sgerardnico         * Word and chars count
154007225e5Sgerardnico         * The word count does not take into account
155007225e5Sgerardnico         * words with non-words characters such as < =
156007225e5Sgerardnico         * Therefore the node and attribute are not taken in the count
157007225e5Sgerardnico         */
158007225e5Sgerardnico        $text = rawWiki($ID);
159f3748b38Sgerardnico        $statExport[Analytics::CHARS_COUNT] = strlen($text);
1607c33ecc6Sgerardnico        $statExport[Analytics::WORDS_COUNT] = StringUtility::getWordCount($text);
161007225e5Sgerardnico
162007225e5Sgerardnico
163007225e5Sgerardnico        /**
164007225e5Sgerardnico         * Internal link distance summary calculation
165007225e5Sgerardnico         */
166007225e5Sgerardnico        if (array_key_exists(Analytics::INTERNAL_LINK_DISTANCE, $statExport)) {
167007225e5Sgerardnico            $linkLengths = $statExport[Analytics::INTERNAL_LINK_DISTANCE];
168007225e5Sgerardnico            unset($statExport[Analytics::INTERNAL_LINK_DISTANCE]);
169007225e5Sgerardnico            $countBacklinks = count($linkLengths);
170007225e5Sgerardnico            $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = null;
171007225e5Sgerardnico            $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = null;
172007225e5Sgerardnico            $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = null;
173007225e5Sgerardnico            if ($countBacklinks > 0) {
174007225e5Sgerardnico                $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = array_sum($linkLengths) / $countBacklinks;
175007225e5Sgerardnico                $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = max($linkLengths);
176007225e5Sgerardnico                $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = min($linkLengths);
177007225e5Sgerardnico            }
178007225e5Sgerardnico        }
179007225e5Sgerardnico
180007225e5Sgerardnico        /**
181007225e5Sgerardnico         * Quality Report / Rules
182007225e5Sgerardnico         */
183007225e5Sgerardnico        // The array that hold the results of the quality rules
184007225e5Sgerardnico        $ruleResults = array();
185007225e5Sgerardnico        // The array that hold the quality score details
186007225e5Sgerardnico        $qualityScores = array();
187007225e5Sgerardnico
188007225e5Sgerardnico
189007225e5Sgerardnico        /**
190007225e5Sgerardnico         * No fixme
191007225e5Sgerardnico         */
192007225e5Sgerardnico        $fixmeCount = $this->stats[self::FIXME];
193007225e5Sgerardnico        $statExport[self::FIXME] = $fixmeCount == null ? 0 : $fixmeCount;
194007225e5Sgerardnico        if ($fixmeCount != 0) {
195007225e5Sgerardnico            $ruleResults[self::RULE_FIXME] = self::FAILED;
196007225e5Sgerardnico            $qualityScores['no_' . self::FIXME] = 0;
197007225e5Sgerardnico        } else {
198007225e5Sgerardnico            $ruleResults[self::RULE_FIXME] = self::PASSED;
1997c33ecc6Sgerardnico            $qualityScores['no_' . self::FIXME] = $this->getConf(self::CONF_QUALITY_SCORE_NO_FIXME, 1);
200007225e5Sgerardnico        }
201007225e5Sgerardnico
202007225e5Sgerardnico        /**
203007225e5Sgerardnico         * A title should be present
204007225e5Sgerardnico         */
20508ca4f85Sgerardnico        $titleScore = $this->getConf(self::CONF_QUALITY_SCORE_TITLE_PRESENT, 10);
206007225e5Sgerardnico        if (empty($this->metadata[Analytics::TITLE])) {
207007225e5Sgerardnico            $ruleResults[self::RULE_TITLE_PRESENT] = self::FAILED;
20808ca4f85Sgerardnico            $ruleInfo[self::RULE_TITLE_PRESENT] = "Add a title in the frontmatter for {$titleScore} points";
209007225e5Sgerardnico            $this->metadata[Analytics::TITLE] = $meta[Analytics::TITLE];
210007225e5Sgerardnico            $qualityScores[self::RULE_TITLE_PRESENT] = 0;
211007225e5Sgerardnico        } else {
2127c33ecc6Sgerardnico            $qualityScores[self::RULE_TITLE_PRESENT] = $titleScore;
213007225e5Sgerardnico            $ruleResults[self::RULE_TITLE_PRESENT] = self::PASSED;
214007225e5Sgerardnico        }
215007225e5Sgerardnico
216007225e5Sgerardnico        /**
217007225e5Sgerardnico         * A description should be present
218007225e5Sgerardnico         */
21908ca4f85Sgerardnico        $descScore = $this->getConf(self::CONF_QUALITY_SCORE_DESCRIPTION_PRESENT, 8);
220007225e5Sgerardnico        if (empty($this->metadata[self::DESCRIPTION])) {
221007225e5Sgerardnico            $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::FAILED;
22208ca4f85Sgerardnico            $ruleInfo[self::RULE_DESCRIPTION_PRESENT] = "Add a description in the frontmatter for {$descScore} points";
223007225e5Sgerardnico            $this->metadata[self::DESCRIPTION] = $meta[self::DESCRIPTION]["abstract"];
224007225e5Sgerardnico            $qualityScores[self::RULE_DESCRIPTION_PRESENT] = 0;
225007225e5Sgerardnico        } else {
2267c33ecc6Sgerardnico            $qualityScores[self::RULE_DESCRIPTION_PRESENT] = $descScore;
227007225e5Sgerardnico            $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::PASSED;
228007225e5Sgerardnico        }
229007225e5Sgerardnico
230007225e5Sgerardnico        /**
231007225e5Sgerardnico         * A canonical should be present
232007225e5Sgerardnico         */
23308ca4f85Sgerardnico        $canonicalScore = $this->getConf(self::CONF_QUALITY_SCORE_CANONICAL_PRESENT, 5);
23471f916b9Sgerardnico        if (empty($this->metadata[Page::CANONICAL_PROPERTY])) {
235f3748b38Sgerardnico            global $conf;
236f3748b38Sgerardnico            $root = $conf['start'];
237f3748b38Sgerardnico            if ($ID != $root) {
238007225e5Sgerardnico                $qualityScores[self::RULE_CANONICAL_PRESENT] = 0;
239007225e5Sgerardnico                $ruleResults[self::RULE_CANONICAL_PRESENT] = self::FAILED;
24008ca4f85Sgerardnico                $ruleInfo[self::RULE_CANONICAL_PRESENT] = "Add a canonical in the frontmatter for {$canonicalScore} points";
241f3748b38Sgerardnico            }
242007225e5Sgerardnico        } else {
2437c33ecc6Sgerardnico            $qualityScores[self::RULE_CANONICAL_PRESENT] = $canonicalScore;
244007225e5Sgerardnico            $ruleResults[self::RULE_CANONICAL_PRESENT] = self::PASSED;
245007225e5Sgerardnico        }
246007225e5Sgerardnico
247007225e5Sgerardnico        /**
248007225e5Sgerardnico         * Outline / Header structure
249007225e5Sgerardnico         */
250007225e5Sgerardnico        $treeError = 0;
251007225e5Sgerardnico        $headersCount = 0;
252007225e5Sgerardnico        if (array_key_exists(Analytics::HEADER_POSITION, $this->stats)) {
253007225e5Sgerardnico            $headersCount = count($this->stats[Analytics::HEADER_POSITION]);
254007225e5Sgerardnico            unset($statExport[Analytics::HEADER_POSITION]);
255007225e5Sgerardnico            for ($i = 1; $i < $headersCount; $i++) {
256007225e5Sgerardnico                $currentHeaderLevel = $this->stats['header_struct'][$i];
257007225e5Sgerardnico                $previousHeaderLevel = $this->stats['header_struct'][$i - 1];
258007225e5Sgerardnico                if ($currentHeaderLevel - $previousHeaderLevel > 1) {
259007225e5Sgerardnico                    $treeError += 1;
260007225e5Sgerardnico                    $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "The " . $i . " header (h" . $currentHeaderLevel . ") has a level bigger than its precedent (" . $previousHeaderLevel . ")";
261007225e5Sgerardnico                }
262007225e5Sgerardnico            }
263007225e5Sgerardnico        }
264007225e5Sgerardnico        if ($treeError > 0 || $headersCount == 0) {
265007225e5Sgerardnico            $qualityScores['correct_outline'] = 0;
266007225e5Sgerardnico            $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::FAILED;
267007225e5Sgerardnico            if ($headersCount == 0) {
268007225e5Sgerardnico                $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "There is no header";
269007225e5Sgerardnico            }
270007225e5Sgerardnico        } else {
271007225e5Sgerardnico            $qualityScores['correct_outline'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE, 3);
272007225e5Sgerardnico            $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::PASSED;
273007225e5Sgerardnico        }
274007225e5Sgerardnico
275007225e5Sgerardnico
276007225e5Sgerardnico        /**
277007225e5Sgerardnico         * Document length
278007225e5Sgerardnico         */
279007225e5Sgerardnico        $minimalWordCount = 50;
280007225e5Sgerardnico        $maximalWordCount = 1500;
281007225e5Sgerardnico        $correctContentLength = true;
28208ca4f85Sgerardnico        $correctLengthScore = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_CONTENT, 10);
28308ca4f85Sgerardnico        $missingWords = $minimalWordCount - $statExport[Analytics::WORDS_COUNT];
28408ca4f85Sgerardnico        if ($missingWords > 0) {
285007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MINIMAL] = self::FAILED;
286007225e5Sgerardnico            $correctContentLength = false;
28708ca4f85Sgerardnico            $ruleInfo[self::RULE_WORDS_MINIMAL] = "Add {$missingWords} words to get {$correctLengthScore} points";
288007225e5Sgerardnico        } else {
289007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MINIMAL] = self::PASSED;
290007225e5Sgerardnico        }
29108ca4f85Sgerardnico        $tooMuchWords = $statExport[Analytics::WORDS_COUNT] - $maximalWordCount;
29208ca4f85Sgerardnico        if ($tooMuchWords > 0) {
293007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MAXIMAL] = self::FAILED;
29408ca4f85Sgerardnico            $ruleInfo[self::RULE_WORDS_MAXIMAL] = "Delete {$tooMuchWords} words to get {$correctLengthScore} points";
295007225e5Sgerardnico            $correctContentLength = false;
296007225e5Sgerardnico        } else {
297007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MAXIMAL] = self::PASSED;
298007225e5Sgerardnico        }
299007225e5Sgerardnico        if ($correctContentLength) {
30008ca4f85Sgerardnico            $qualityScores['correct_content_length'] = $correctLengthScore;
301007225e5Sgerardnico        } else {
302007225e5Sgerardnico            $qualityScores['correct_content_length'] = 0;
303007225e5Sgerardnico        }
304007225e5Sgerardnico
305007225e5Sgerardnico
306007225e5Sgerardnico        /**
307007225e5Sgerardnico         * Average Number of words by header section to text ratio
308007225e5Sgerardnico         */
309007225e5Sgerardnico        $headers = $this->stats[Analytics::HEADERS_COUNT];
310007225e5Sgerardnico        if ($headers != null) {
311007225e5Sgerardnico            $headerCount = array_sum($headers);
312007225e5Sgerardnico            $headerCount--; // h1 is supposed to have no words
313007225e5Sgerardnico            if ($headerCount > 0) {
314007225e5Sgerardnico
315007225e5Sgerardnico                $avgWordsCountBySection = round($this->stats[Analytics::WORDS_COUNT] / $headerCount);
316007225e5Sgerardnico                $statExport['word_section_count']['avg'] = $avgWordsCountBySection;
317007225e5Sgerardnico
318007225e5Sgerardnico                /**
319007225e5Sgerardnico                 * Min words by header section
320007225e5Sgerardnico                 */
321007225e5Sgerardnico                $wordsByHeaderMin = 20;
322007225e5Sgerardnico                /**
323007225e5Sgerardnico                 * Max words by header section
324007225e5Sgerardnico                 */
325007225e5Sgerardnico                $wordsByHeaderMax = 300;
326007225e5Sgerardnico                $correctAverageWordsBySection = true;
327007225e5Sgerardnico                if ($avgWordsCountBySection < $wordsByHeaderMin) {
328007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::FAILED;
329007225e5Sgerardnico                    $correctAverageWordsBySection = false;
33008ca4f85Sgerardnico                    $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = "The number of words by section is less than {$wordsByHeaderMin}";
331007225e5Sgerardnico                } else {
332007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::PASSED;
333007225e5Sgerardnico                }
334007225e5Sgerardnico                if ($avgWordsCountBySection > $wordsByHeaderMax) {
335007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::FAILED;
336007225e5Sgerardnico                    $correctAverageWordsBySection = false;
337007225e5Sgerardnico                    $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = "The number of words by section is more than {$wordsByHeaderMax}";
338007225e5Sgerardnico                } else {
339007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::PASSED;
340007225e5Sgerardnico                }
341007225e5Sgerardnico                if ($correctAverageWordsBySection) {
342007225e5Sgerardnico                    $qualityScores['correct_word_avg_by_section'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE, 10);
343007225e5Sgerardnico                } else {
344007225e5Sgerardnico                    $qualityScores['correct_word_avg_by_section'] = 0;
345007225e5Sgerardnico                }
346007225e5Sgerardnico
347007225e5Sgerardnico            }
348007225e5Sgerardnico        }
349007225e5Sgerardnico
350007225e5Sgerardnico        /**
351007225e5Sgerardnico         * Internal Backlinks rule
352007225e5Sgerardnico         *
353007225e5Sgerardnico         * If a page is a low quality page, if the process run
354007225e5Sgerardnico         * anonymous, we will not see all {@link ft_backlinks()}
355007225e5Sgerardnico         * we use then the index directly to avoid confusion
356007225e5Sgerardnico         */
357007225e5Sgerardnico        $backlinks = idx_get_indexer()->lookupKey('relation_references', $ID);
358007225e5Sgerardnico        $countBacklinks = count($backlinks);
359007225e5Sgerardnico        $statExport[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks;
360d262537cSgerardnico        $backlinkScore = $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR, 1);
361007225e5Sgerardnico        if ($countBacklinks == 0) {
362007225e5Sgerardnico            $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = 0;
363007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::FAILED;
364d262537cSgerardnico            $ruleInfo[self::RULE_INTERNAL_BACKLINKS_MIN] = "Add backlinks for {$backlinkScore} point each";
365007225e5Sgerardnico        } else {
366d262537cSgerardnico
367d262537cSgerardnico            $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks * $backlinkScore;
368007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::PASSED;
369007225e5Sgerardnico        }
370007225e5Sgerardnico
371007225e5Sgerardnico        /**
372007225e5Sgerardnico         * Internal links
373007225e5Sgerardnico         */
374007225e5Sgerardnico        $internalLinksCount = $this->stats[Analytics::INTERNAL_LINKS_COUNT];
375d262537cSgerardnico        $internalLinkScore = $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR, 1);
376007225e5Sgerardnico        if ($internalLinksCount == 0) {
377007225e5Sgerardnico            $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = 0;
378007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::FAILED;
379d262537cSgerardnico            $ruleInfo[self::RULE_INTERNAL_LINKS_MIN] = "Add internal links for {$internalLinkScore} point each";
380007225e5Sgerardnico        } else {
381007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::PASSED;
382d262537cSgerardnico            $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = $countBacklinks * $internalLinkScore;
383007225e5Sgerardnico        }
384007225e5Sgerardnico
385007225e5Sgerardnico        /**
386007225e5Sgerardnico         * Broken Links
387007225e5Sgerardnico         */
388d262537cSgerardnico        $brokenLinkScore = $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR, 2);
389007225e5Sgerardnico        $brokenLinksCount = $this->stats[Analytics::INTERNAL_LINKS_BROKEN_COUNT];
390007225e5Sgerardnico        if ($brokenLinksCount > 2) {
391007225e5Sgerardnico            $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = 0;
392007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::FAILED;
393d262537cSgerardnico            $ruleInfo[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = "Delete the {$brokenLinksCount} broken links and add {$brokenLinkScore} points";
394007225e5Sgerardnico        } else {
395d262537cSgerardnico            $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = $brokenLinkScore;
396007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::PASSED;
397007225e5Sgerardnico        }
398007225e5Sgerardnico
399007225e5Sgerardnico        /**
400007225e5Sgerardnico         * Changes, the more changes the better
401007225e5Sgerardnico         */
4027c33ecc6Sgerardnico        $qualityScores[Analytics::EDITS_COUNT] = $this->stats[Analytics::EDITS_COUNT] * $this->getConf(self::CONF_QUALITY_SCORE_CHANGES_FACTOR, 0.25);
403007225e5Sgerardnico
404007225e5Sgerardnico
405007225e5Sgerardnico        /**
406007225e5Sgerardnico         * Quality Score
407007225e5Sgerardnico         */
408007225e5Sgerardnico        ksort($qualityScores);
409007225e5Sgerardnico        $qualityScoring = array();
41008ca4f85Sgerardnico        $qualityScoring[self::SCORE] = array_sum($qualityScores);
411007225e5Sgerardnico        $qualityScoring["scores"] = $qualityScores;
412007225e5Sgerardnico
413007225e5Sgerardnico
414007225e5Sgerardnico        /**
415007225e5Sgerardnico         * The rule that if broken will set the quality level to low
416007225e5Sgerardnico         */
417007225e5Sgerardnico        $brokenRules = array();
418007225e5Sgerardnico        foreach ($ruleResults as $ruleName => $ruleResult) {
419007225e5Sgerardnico            if ($ruleResult == self::FAILED) {
420007225e5Sgerardnico                $brokenRules[] = $ruleName;
421007225e5Sgerardnico            }
422007225e5Sgerardnico        }
423007225e5Sgerardnico        $ruleErrorCount = sizeof($brokenRules);
424007225e5Sgerardnico        if ($ruleErrorCount > 0) {
425007225e5Sgerardnico            $qualityResult = $ruleErrorCount . " quality rules errors";
426007225e5Sgerardnico        } else {
427007225e5Sgerardnico            $qualityResult = "All quality rules passed";
428007225e5Sgerardnico        }
429007225e5Sgerardnico
430007225e5Sgerardnico        /**
431007225e5Sgerardnico         * Low level
432007225e5Sgerardnico         */
433007225e5Sgerardnico        $mandatoryRules = preg_split("/,/", $this->getConf(self::CONF_MANDATORY_QUALITY_RULES));
434007225e5Sgerardnico        $mandatoryRulesBroken = [];
435007225e5Sgerardnico        foreach ($mandatoryRules as $lowLevelRule) {
436007225e5Sgerardnico            if (in_array($lowLevelRule, $brokenRules)) {
437007225e5Sgerardnico                $mandatoryRulesBroken[] = $lowLevelRule;
438007225e5Sgerardnico            }
439007225e5Sgerardnico        }
440007225e5Sgerardnico        $lowLevel = false;
441007225e5Sgerardnico        if (sizeof($mandatoryRulesBroken) > 0) {
442007225e5Sgerardnico            $lowLevel = true;
443007225e5Sgerardnico        }
4447c33ecc6Sgerardnico        $this->page->setLowQualityIndicator($lowLevel);
445007225e5Sgerardnico
446007225e5Sgerardnico        /**
447007225e5Sgerardnico         * Building the quality object in order
448007225e5Sgerardnico         */
449f3748b38Sgerardnico        $quality[Analytics::LOW] = $lowLevel;
450007225e5Sgerardnico        if (sizeof($mandatoryRulesBroken) > 0) {
451007225e5Sgerardnico            ksort($mandatoryRulesBroken);
452722648eaSgerardnico            $quality[Analytics::FAILED_MANDATORY_RULES] = $mandatoryRulesBroken;
453007225e5Sgerardnico        }
45408ca4f85Sgerardnico        $quality[self::SCORING] = $qualityScoring;
455f3748b38Sgerardnico        $quality[Analytics::RULES][self::RESULT] = $qualityResult;
456007225e5Sgerardnico        if (!empty($ruleInfo)) {
457f3748b38Sgerardnico            $quality[Analytics::RULES]["info"] = $ruleInfo;
458007225e5Sgerardnico        }
459007225e5Sgerardnico
460007225e5Sgerardnico        ksort($ruleResults);
461f3748b38Sgerardnico        $quality[Analytics::RULES][Analytics::DETAILS] = $ruleResults;
462007225e5Sgerardnico
463007225e5Sgerardnico        /**
464007225e5Sgerardnico         * Metadata
465007225e5Sgerardnico         */
466c42a1196Sgerardnico        $title = $meta['title'];
467c42a1196Sgerardnico        $this->metadata[Analytics::TITLE] = $title;
468c42a1196Sgerardnico        if ($title != $meta['h1']) {
469c42a1196Sgerardnico            $this->metadata[Analytics::H1] = $meta['h1'];
470c42a1196Sgerardnico        }
471007225e5Sgerardnico        $timestampCreation = $meta['date']['created'];
472007225e5Sgerardnico        $this->metadata[self::DATE_CREATED] = date('Y-m-d h:i:s', $timestampCreation);
473007225e5Sgerardnico        $timestampModification = $meta['date']['modified'];
474007225e5Sgerardnico        $this->metadata[Analytics::DATE_MODIFIED] = date('Y-m-d h:i:s', $timestampModification);
475007225e5Sgerardnico        $this->metadata['age_creation'] = round((time() - $timestampCreation) / 60 / 60 / 24);
476007225e5Sgerardnico        $this->metadata['age_modification'] = round((time() - $timestampModification) / 60 / 60 / 24);
477007225e5Sgerardnico
478007225e5Sgerardnico
479007225e5Sgerardnico        /**
480007225e5Sgerardnico         * Building the Top JSON in order
481007225e5Sgerardnico         */
482007225e5Sgerardnico        global $ID;
4832c067407Sgerardnico        $finalStats = array();
4842c067407Sgerardnico        $finalStats["id"] = $ID;
485c42a1196Sgerardnico        $finalStats["date"] = date('Y-m-d H:i:s', time());
4862c067407Sgerardnico        $finalStats['metadata'] = $this->metadata;
487007225e5Sgerardnico        ksort($statExport);
4882c067407Sgerardnico        $finalStats[Analytics::STATISTICS] = $statExport;
4892c067407Sgerardnico        $finalStats[Analytics::QUALITY] = $quality; // Quality after the sort to get them at the end
490007225e5Sgerardnico
491007225e5Sgerardnico
492007225e5Sgerardnico        /**
493007225e5Sgerardnico         * The result can be seen with
494007225e5Sgerardnico         * doku.php?id=somepage&do=export_combo_analysis
4957c33ecc6Sgerardnico         *
4967c33ecc6Sgerardnico         * Set the header temporarily for the export.php file
497007225e5Sgerardnico         */
4987c33ecc6Sgerardnico        p_set_metadata(
4997c33ecc6Sgerardnico            $ID,
5007c33ecc6Sgerardnico            array("format" => array("combo_" . $this->getPluginComponent() => array("Content-Type" => 'application/json'))),
5017c33ecc6Sgerardnico            false,
5027c33ecc6Sgerardnico            false // Persistence is not needed, this is just in case this is an export
5037c33ecc6Sgerardnico        );
5042c067407Sgerardnico        $json_encoded = json_encode($finalStats, JSON_PRETTY_PRINT);
505007225e5Sgerardnico
5067c33ecc6Sgerardnico        $this->page->saveAnalytics($finalStats);
507007225e5Sgerardnico        $this->doc .= $json_encoded;
508007225e5Sgerardnico
509007225e5Sgerardnico    }
510007225e5Sgerardnico
511007225e5Sgerardnico    /**
512007225e5Sgerardnico     */
513007225e5Sgerardnico    public function getFormat()
514007225e5Sgerardnico    {
515007225e5Sgerardnico        return Analytics::RENDERER_FORMAT;
516007225e5Sgerardnico    }
517007225e5Sgerardnico
518007225e5Sgerardnico    public function internallink($id, $name = null, $search = null, $returnonly = false, $linktype = 'content')
519007225e5Sgerardnico    {
520007225e5Sgerardnico
521f3748b38Sgerardnico        $attribute = array(
5229f4383e9Sgerardnico            LinkUtility::ATTRIBUTE_REF => $id,
523f3748b38Sgerardnico            LinkUtility::ATTRIBUTE_TYPE => LinkUtility::TYPE_INTERNAL
524f3748b38Sgerardnico        );
525f3748b38Sgerardnico        LinkUtility::processLinkStats($attribute, $this->stats);
526007225e5Sgerardnico
527007225e5Sgerardnico    }
528007225e5Sgerardnico
529007225e5Sgerardnico    public function externallink($url, $name = null)
530007225e5Sgerardnico    {
531*ef295d81Sgerardnico        $link = new LinkUtility($url);
532*ef295d81Sgerardnico        $link->setType(LinkUtility::TYPE_EXTERNAL);
533*ef295d81Sgerardnico        if($name !=null) {
534*ef295d81Sgerardnico            $link->setName($name);
535*ef295d81Sgerardnico        }
536*ef295d81Sgerardnico        $link->processLinkStats( $this->stats);
537007225e5Sgerardnico    }
538007225e5Sgerardnico
539007225e5Sgerardnico    public function header($text, $level, $pos)
540007225e5Sgerardnico    {
541007225e5Sgerardnico        $this->stats[Analytics::HEADERS_COUNT]['h' . $level]++;
542007225e5Sgerardnico        $this->headerId++;
543007225e5Sgerardnico        $this->stats[Analytics::HEADER_POSITION][$this->headerId] = 'h' . $level;
544007225e5Sgerardnico
545007225e5Sgerardnico    }
546007225e5Sgerardnico
547007225e5Sgerardnico    public function smiley($smiley)
548007225e5Sgerardnico    {
549007225e5Sgerardnico        if ($smiley == 'FIXME') $this->stats[self::FIXME]++;
550007225e5Sgerardnico    }
551007225e5Sgerardnico
552007225e5Sgerardnico    public function linebreak()
553007225e5Sgerardnico    {
554007225e5Sgerardnico        if (!$this->tableopen) {
555007225e5Sgerardnico            $this->stats['linebreak']++;
556007225e5Sgerardnico        }
557007225e5Sgerardnico    }
558007225e5Sgerardnico
559007225e5Sgerardnico    public function table_open($maxcols = null, $numrows = null, $pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
560007225e5Sgerardnico    {
561007225e5Sgerardnico        $this->tableopen = true;
562007225e5Sgerardnico    }
563007225e5Sgerardnico
564007225e5Sgerardnico    public function table_close($pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
565007225e5Sgerardnico    {
566007225e5Sgerardnico        $this->tableopen = false;
567007225e5Sgerardnico    }
568007225e5Sgerardnico
569007225e5Sgerardnico    public function hr()
570007225e5Sgerardnico    {
571007225e5Sgerardnico        $this->stats['hr']++;
572007225e5Sgerardnico    }
573007225e5Sgerardnico
574007225e5Sgerardnico    public function quote_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
575007225e5Sgerardnico    {
576007225e5Sgerardnico        $this->stats['quote_count']++;
577007225e5Sgerardnico        $this->quotelevel++;
578007225e5Sgerardnico        $this->stats['quote_nest'] = max($this->quotelevel, $this->stats['quote_nest']);
579007225e5Sgerardnico    }
580007225e5Sgerardnico
581007225e5Sgerardnico    public function quote_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
582007225e5Sgerardnico    {
583007225e5Sgerardnico        $this->quotelevel--;
584007225e5Sgerardnico    }
585007225e5Sgerardnico
586007225e5Sgerardnico    public function strong_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
587007225e5Sgerardnico    {
588007225e5Sgerardnico        $this->formattingBracket++;
589007225e5Sgerardnico    }
590007225e5Sgerardnico
591007225e5Sgerardnico    public function strong_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
592007225e5Sgerardnico    {
593007225e5Sgerardnico        $this->formattingBracket--;
594007225e5Sgerardnico    }
595007225e5Sgerardnico
596007225e5Sgerardnico    public function emphasis_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
597007225e5Sgerardnico    {
598007225e5Sgerardnico        $this->formattingBracket++;
599007225e5Sgerardnico    }
600007225e5Sgerardnico
601007225e5Sgerardnico    public function emphasis_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
602007225e5Sgerardnico    {
603007225e5Sgerardnico        $this->formattingBracket--;
604007225e5Sgerardnico    }
605007225e5Sgerardnico
606007225e5Sgerardnico    public function underline_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
607007225e5Sgerardnico    {
608007225e5Sgerardnico        $this->formattingBracket++;
609007225e5Sgerardnico    }
610007225e5Sgerardnico
611007225e5Sgerardnico    public function underline_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
612007225e5Sgerardnico    {
613007225e5Sgerardnico        $this->formattingBracket--;
614007225e5Sgerardnico    }
615007225e5Sgerardnico
616007225e5Sgerardnico    public function cdata($text)
617007225e5Sgerardnico    {
618007225e5Sgerardnico
619007225e5Sgerardnico        /**
620007225e5Sgerardnico         * It seems that you receive cdata
621007225e5Sgerardnico         * when emphasis_open / underline_open / strong_open
622007225e5Sgerardnico         * Stats are not for them
623007225e5Sgerardnico         */
624007225e5Sgerardnico        if (!$this->formattingBracket) return;
625007225e5Sgerardnico
626007225e5Sgerardnico        $this->plainTextId++;
627007225e5Sgerardnico
628007225e5Sgerardnico        /**
629007225e5Sgerardnico         * Length
630007225e5Sgerardnico         */
631007225e5Sgerardnico        $len = strlen($text);
632007225e5Sgerardnico        $this->stats[self::PLAINTEXT][$this->plainTextId]['len'] = $len;
633007225e5Sgerardnico
634007225e5Sgerardnico
635007225e5Sgerardnico        /**
636007225e5Sgerardnico         * Multi-formatting
637007225e5Sgerardnico         */
638007225e5Sgerardnico        if ($this->formattingBracket > 1) {
639007225e5Sgerardnico            $numberOfFormats = 1 * ($this->formattingBracket - 1);
640007225e5Sgerardnico            $this->stats[self::PLAINTEXT][$this->plainTextId]['multiformat'] += $numberOfFormats;
641007225e5Sgerardnico        }
642007225e5Sgerardnico
643007225e5Sgerardnico        /**
644007225e5Sgerardnico         * Total
645007225e5Sgerardnico         */
646007225e5Sgerardnico        $this->stats[self::PLAINTEXT][0] += $len;
647007225e5Sgerardnico    }
648007225e5Sgerardnico
649007225e5Sgerardnico    public function internalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null)
650007225e5Sgerardnico    {
651007225e5Sgerardnico        $this->stats[Analytics::INTERNAL_MEDIAS_COUNT]++;
652007225e5Sgerardnico    }
653007225e5Sgerardnico
654007225e5Sgerardnico    public function externalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null)
655007225e5Sgerardnico    {
656007225e5Sgerardnico        $this->stats[Analytics::EXTERNAL_MEDIAS]++;
657007225e5Sgerardnico    }
658007225e5Sgerardnico
659007225e5Sgerardnico    public function reset()
660007225e5Sgerardnico    {
661007225e5Sgerardnico        $this->stats = array();
662007225e5Sgerardnico        $this->metadata = array();
663007225e5Sgerardnico        $this->headerId = 0;
664007225e5Sgerardnico    }
665007225e5Sgerardnico
666007225e5Sgerardnico    public function setMeta($key, $value)
667007225e5Sgerardnico    {
668007225e5Sgerardnico        $this->metadata[$key] = $value;
669007225e5Sgerardnico    }
670007225e5Sgerardnico
671007225e5Sgerardnico
672007225e5Sgerardnico}
673007225e5Sgerardnico
674