xref: /plugin/combo/renderer/analytics.php (revision 1c5862d391f305e3b32cb5b3b21a994b9301db59)
1007225e5Sgerardnico<?php
2007225e5Sgerardnico
3007225e5Sgerardnico
4007225e5Sgerardnicouse ComboStrap\Analytics;
5007225e5Sgerardnicouse ComboStrap\LinkUtility;
6007225e5Sgerardnicouse ComboStrap\LogUtility;
7007225e5Sgerardnicouse ComboStrap\LowQualityPage;
8007225e5Sgerardnicouse ComboStrap\Sqlite;
9007225e5Sgerardnicouse ComboStrap\Text;
1071f916b9Sgerardnicouse ComboStrap\Page;
11007225e5Sgerardnicouse dokuwiki\ChangeLog\PageChangeLog;
12007225e5Sgerardnico
13007225e5Sgerardnicorequire_once(__DIR__ . '/../class/Text.php');
14007225e5Sgerardnicorequire_once(__DIR__ . '/../class/LowQualityPage.php');
15007225e5Sgerardnicorequire_once(__DIR__ . '/../class/Analytics.php');
16007225e5Sgerardnico
17007225e5Sgerardnico
18007225e5Sgerardnico/**
19007225e5Sgerardnico * A analysis Renderer that exports stats/quality/metadata in a json format
20007225e5Sgerardnico * You can export the data with
21007225e5Sgerardnico * doku.php?id=somepage&do=export_combo_analytics
22007225e5Sgerardnico */
23007225e5Sgerardnicoclass renderer_plugin_combo_analytics extends Doku_Renderer
24007225e5Sgerardnico{
25007225e5Sgerardnico    const DATE_CREATED = 'date_created';
26007225e5Sgerardnico    const PLAINTEXT = 'formatted';
27007225e5Sgerardnico    const RESULT = "result";
28007225e5Sgerardnico    const DESCRIPTION = "description";
29007225e5Sgerardnico    const PASSED = "Passed";
30007225e5Sgerardnico    const FAILED = "Failed";
31007225e5Sgerardnico    const FIXME = 'fixme';
32007225e5Sgerardnico
33007225e5Sgerardnico    /**
34007225e5Sgerardnico     * Rules key
35007225e5Sgerardnico     */
36007225e5Sgerardnico    const RULE_WORDS_MINIMAL = 'words_min';
37007225e5Sgerardnico    const RULE_OUTLINE_STRUCTURE = "outline_structure";
38007225e5Sgerardnico    const RULE_INTERNAL_BACKLINKS_MIN = 'internal_backlinks_min';
39007225e5Sgerardnico    const RULE_WORDS_MAXIMAL = "words_max";
40007225e5Sgerardnico    const RULE_AVERAGE_WORDS_BY_SECTION_MIN = 'words_by_section_avg_min';
41007225e5Sgerardnico    const RULE_AVERAGE_WORDS_BY_SECTION_MAX = 'words_by_section_avg_max';
42007225e5Sgerardnico    const RULE_INTERNAL_LINKS_MIN = 'internal_links_min';
43007225e5Sgerardnico    const RULE_INTERNAL_BROKEN_LINKS_MAX = 'internal_links_broken_max';
44007225e5Sgerardnico    const RULE_DESCRIPTION_PRESENT = 'description_present';
45007225e5Sgerardnico    const RULE_FIXME = "fixme_min";
46007225e5Sgerardnico    const RULE_TITLE_PRESENT = "title_present";
47007225e5Sgerardnico    const RULE_CANONICAL_PRESENT = "canonical_present";
48007225e5Sgerardnico
49007225e5Sgerardnico    /**
50007225e5Sgerardnico     * The default man
51007225e5Sgerardnico     */
52007225e5Sgerardnico    const CONF_MANDATORY_QUALITY_RULES_DEFAULT_VALUE = [
53007225e5Sgerardnico        self::RULE_WORDS_MINIMAL,
54007225e5Sgerardnico        self::RULE_INTERNAL_BACKLINKS_MIN,
55007225e5Sgerardnico        self::RULE_INTERNAL_LINKS_MIN
56007225e5Sgerardnico    ];
57007225e5Sgerardnico    const CONF_MANDATORY_QUALITY_RULES = "mandatoryQualityRules";
58007225e5Sgerardnico
59007225e5Sgerardnico    /**
60007225e5Sgerardnico     * Quality Score factors
61007225e5Sgerardnico     * They are used to calculate the score
62007225e5Sgerardnico     */
63007225e5Sgerardnico    const CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR = 'qualityScoreInternalBacklinksFactor';
64007225e5Sgerardnico    const CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR = 'qualityScoreInternalLinksFactor';
65007225e5Sgerardnico    const CONF_QUALITY_SCORE_TITLE_PRESENT = 'qualityScoreTitlePresent';
66007225e5Sgerardnico    const CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE = 'qualityScoreCorrectOutline';
67007225e5Sgerardnico    const CONF_QUALITY_SCORE_CORRECT_CONTENT = 'qualityScoreCorrectContentLength';
68007225e5Sgerardnico    const CONF_QUALITY_SCORE_NO_FIXME = 'qualityScoreNoFixMe';
69007225e5Sgerardnico    const CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE = 'qualityScoreCorrectWordSectionAvg';
70007225e5Sgerardnico    const CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR = 'qualityScoreNoBrokenLinks';
71007225e5Sgerardnico    const CONF_QUALITY_SCORE_CHANGES_FACTOR = 'qualityScoreChangesFactor';
72007225e5Sgerardnico    const CONF_QUALITY_SCORE_DESCRIPTION_PRESENT = 'qualityScoreDescriptionPresent';
73007225e5Sgerardnico    const CONF_QUALITY_SCORE_CANONICAL_PRESENT = 'qualityScoreCanonicalPresent';
74007225e5Sgerardnico
75007225e5Sgerardnico
76007225e5Sgerardnico    /**
77007225e5Sgerardnico     * The processing data
78007225e5Sgerardnico     * that should be {@link  renderer_plugin_combo_analysis::reset()}
79007225e5Sgerardnico     */
80007225e5Sgerardnico    public $stats = array(); // the stats
81007225e5Sgerardnico    protected $metadata = array(); // the metadata
82007225e5Sgerardnico    protected $headerId = 0; // the id of the header on the page (first, second, ...)
83007225e5Sgerardnico
84007225e5Sgerardnico    /**
85007225e5Sgerardnico     * Don't known this variable ?
86007225e5Sgerardnico     */
87007225e5Sgerardnico    protected $quotelevel = 0;
88007225e5Sgerardnico    protected $formattingBracket = 0;
89007225e5Sgerardnico    protected $tableopen = false;
90007225e5Sgerardnico    private $plainTextId = 0;
912c067407Sgerardnico    /**
922c067407Sgerardnico     * @var Page
932c067407Sgerardnico     */
942c067407Sgerardnico    private $page;
952c067407Sgerardnico
962c067407Sgerardnico    public function document_start()
972c067407Sgerardnico    {
982c067407Sgerardnico        global $ID;
992c067407Sgerardnico        $this->page = new Page($ID);
100*1c5862d3Sgerardnico        $analytics = $this->page->getAnalyticsFromDb();
1012c067407Sgerardnico        if (!empty($analytics)) {
1022c067407Sgerardnico            $this->internalLinkBefore = $analytics[Analytics::STATISTICS];
1032c067407Sgerardnico        }
1042c067407Sgerardnico
1052c067407Sgerardnico    }
106007225e5Sgerardnico
107007225e5Sgerardnico
108007225e5Sgerardnico    /**
109007225e5Sgerardnico     * Here the score is calculated
110007225e5Sgerardnico     */
111007225e5Sgerardnico    public function document_end() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
112007225e5Sgerardnico    {
113007225e5Sgerardnico        /**
114007225e5Sgerardnico         * The metadata
115007225e5Sgerardnico         */
116007225e5Sgerardnico        global $ID;
117007225e5Sgerardnico        $meta = p_get_metadata($ID);
118007225e5Sgerardnico
119007225e5Sgerardnico        /**
120007225e5Sgerardnico         * Word and chars count
121007225e5Sgerardnico         * The word count does not take into account
122007225e5Sgerardnico         * words with non-words characters such as < =
123007225e5Sgerardnico         * Therefore the node and attribute are not taken in the count
124007225e5Sgerardnico         */
125007225e5Sgerardnico        $text = rawWiki($ID);
126007225e5Sgerardnico        $this->stats[Analytics::CHARS_COUNT] = strlen($text);
127007225e5Sgerardnico        $this->stats[Analytics::WORDS_COUNT] = Text::getWordCount($text);
128007225e5Sgerardnico
129007225e5Sgerardnico        /**
130007225e5Sgerardnico         * The exported object
131007225e5Sgerardnico         */
132007225e5Sgerardnico        $statExport = $this->stats;
133007225e5Sgerardnico
134007225e5Sgerardnico
135007225e5Sgerardnico        /**
136007225e5Sgerardnico         * Internal link distance summary calculation
137007225e5Sgerardnico         */
138007225e5Sgerardnico        if (array_key_exists(Analytics::INTERNAL_LINK_DISTANCE, $statExport)) {
139007225e5Sgerardnico            $linkLengths = $statExport[Analytics::INTERNAL_LINK_DISTANCE];
140007225e5Sgerardnico            unset($statExport[Analytics::INTERNAL_LINK_DISTANCE]);
141007225e5Sgerardnico            $countBacklinks = count($linkLengths);
142007225e5Sgerardnico            $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = null;
143007225e5Sgerardnico            $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = null;
144007225e5Sgerardnico            $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = null;
145007225e5Sgerardnico            if ($countBacklinks > 0) {
146007225e5Sgerardnico                $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = array_sum($linkLengths) / $countBacklinks;
147007225e5Sgerardnico                $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = max($linkLengths);
148007225e5Sgerardnico                $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = min($linkLengths);
149007225e5Sgerardnico            }
150007225e5Sgerardnico        }
151007225e5Sgerardnico
152007225e5Sgerardnico        /**
153007225e5Sgerardnico         * Quality Report / Rules
154007225e5Sgerardnico         */
155007225e5Sgerardnico        // The array that hold the results of the quality rules
156007225e5Sgerardnico        $ruleResults = array();
157007225e5Sgerardnico        // The array that hold the quality score details
158007225e5Sgerardnico        $qualityScores = array();
159007225e5Sgerardnico
160007225e5Sgerardnico
161007225e5Sgerardnico        /**
162007225e5Sgerardnico         * No fixme
163007225e5Sgerardnico         */
164007225e5Sgerardnico        $fixmeCount = $this->stats[self::FIXME];
165007225e5Sgerardnico        $statExport[self::FIXME] = $fixmeCount == null ? 0 : $fixmeCount;
166007225e5Sgerardnico        if ($fixmeCount != 0) {
167007225e5Sgerardnico            $ruleResults[self::RULE_FIXME] = self::FAILED;
168007225e5Sgerardnico            $qualityScores['no_' . self::FIXME] = 0;
169007225e5Sgerardnico        } else {
170007225e5Sgerardnico            $ruleResults[self::RULE_FIXME] = self::PASSED;
171007225e5Sgerardnico            $qualityScores['no_' . self::FIXME] = $this->getConf(self::CONF_QUALITY_SCORE_NO_FIXME, 1);;
172007225e5Sgerardnico        }
173007225e5Sgerardnico
174007225e5Sgerardnico        /**
175007225e5Sgerardnico         * A title should be present
176007225e5Sgerardnico         */
177007225e5Sgerardnico        if (empty($this->metadata[Analytics::TITLE])) {
178007225e5Sgerardnico            $ruleResults[self::RULE_TITLE_PRESENT] = self::FAILED;
179007225e5Sgerardnico            $ruleInfo[self::RULE_TITLE_PRESENT] = "A title is not present in the frontmatter";
180007225e5Sgerardnico            $this->metadata[Analytics::TITLE] = $meta[Analytics::TITLE];
181007225e5Sgerardnico            $qualityScores[self::RULE_TITLE_PRESENT] = 0;
182007225e5Sgerardnico        } else {
183007225e5Sgerardnico            $qualityScores[self::RULE_TITLE_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_TITLE_PRESENT, 10);;
184007225e5Sgerardnico            $ruleResults[self::RULE_TITLE_PRESENT] = self::PASSED;
185007225e5Sgerardnico        }
186007225e5Sgerardnico
187007225e5Sgerardnico        /**
188007225e5Sgerardnico         * A description should be present
189007225e5Sgerardnico         */
190007225e5Sgerardnico        if (empty($this->metadata[self::DESCRIPTION])) {
191007225e5Sgerardnico            $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::FAILED;
192c25e802bSgerardnico            $ruleInfo[self::RULE_DESCRIPTION_PRESENT] = "A description is not present in the frontmatter";
193007225e5Sgerardnico            $this->metadata[self::DESCRIPTION] = $meta[self::DESCRIPTION]["abstract"];
194007225e5Sgerardnico            $qualityScores[self::RULE_DESCRIPTION_PRESENT] = 0;
195007225e5Sgerardnico        } else {
196007225e5Sgerardnico            $qualityScores[self::RULE_DESCRIPTION_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_DESCRIPTION_PRESENT, 8);;
197007225e5Sgerardnico            $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::PASSED;
198007225e5Sgerardnico        }
199007225e5Sgerardnico
200007225e5Sgerardnico        /**
201007225e5Sgerardnico         * A canonical should be present
202007225e5Sgerardnico         */
20371f916b9Sgerardnico        if (empty($this->metadata[Page::CANONICAL_PROPERTY])) {
204007225e5Sgerardnico            $qualityScores[self::RULE_CANONICAL_PRESENT] = 0;
205007225e5Sgerardnico            $ruleResults[self::RULE_CANONICAL_PRESENT] = self::FAILED;
206007225e5Sgerardnico            $ruleInfo[self::RULE_CANONICAL_PRESENT] = "A canonical is not present in the frontmatter";
207007225e5Sgerardnico        } else {
208007225e5Sgerardnico            $qualityScores[self::RULE_CANONICAL_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_CANONICAL_PRESENT, 5);;
209007225e5Sgerardnico            $ruleResults[self::RULE_CANONICAL_PRESENT] = self::PASSED;
210007225e5Sgerardnico        }
211007225e5Sgerardnico
212007225e5Sgerardnico        /**
213007225e5Sgerardnico         * Outline / Header structure
214007225e5Sgerardnico         */
215007225e5Sgerardnico        $treeError = 0;
216007225e5Sgerardnico        $headersCount = 0;
217007225e5Sgerardnico        if (array_key_exists(Analytics::HEADER_POSITION, $this->stats)) {
218007225e5Sgerardnico            $headersCount = count($this->stats[Analytics::HEADER_POSITION]);
219007225e5Sgerardnico            unset($statExport[Analytics::HEADER_POSITION]);
220007225e5Sgerardnico            for ($i = 1; $i < $headersCount; $i++) {
221007225e5Sgerardnico                $currentHeaderLevel = $this->stats['header_struct'][$i];
222007225e5Sgerardnico                $previousHeaderLevel = $this->stats['header_struct'][$i - 1];
223007225e5Sgerardnico                if ($currentHeaderLevel - $previousHeaderLevel > 1) {
224007225e5Sgerardnico                    $treeError += 1;
225007225e5Sgerardnico                    $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "The " . $i . " header (h" . $currentHeaderLevel . ") has a level bigger than its precedent (" . $previousHeaderLevel . ")";
226007225e5Sgerardnico                }
227007225e5Sgerardnico            }
228007225e5Sgerardnico        }
229007225e5Sgerardnico        if ($treeError > 0 || $headersCount == 0) {
230007225e5Sgerardnico            $qualityScores['correct_outline'] = 0;
231007225e5Sgerardnico            $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::FAILED;
232007225e5Sgerardnico            if ($headersCount == 0) {
233007225e5Sgerardnico                $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "There is no header";
234007225e5Sgerardnico            }
235007225e5Sgerardnico        } else {
236007225e5Sgerardnico            $qualityScores['correct_outline'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE, 3);
237007225e5Sgerardnico            $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::PASSED;
238007225e5Sgerardnico        }
239007225e5Sgerardnico
240007225e5Sgerardnico
241007225e5Sgerardnico        /**
242007225e5Sgerardnico         * Document length
243007225e5Sgerardnico         */
244007225e5Sgerardnico        $minimalWordCount = 50;
245007225e5Sgerardnico        $maximalWordCount = 1500;
246007225e5Sgerardnico        $correctContentLength = true;
247007225e5Sgerardnico        if ($this->stats[Analytics::WORDS_COUNT] < $minimalWordCount) {
248007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MINIMAL] = self::FAILED;
249007225e5Sgerardnico            $correctContentLength = false;
250007225e5Sgerardnico            $ruleInfo[self::RULE_WORDS_MINIMAL] = "The number of words is less than {$minimalWordCount}";
251007225e5Sgerardnico        } else {
252007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MINIMAL] = self::PASSED;
253007225e5Sgerardnico        }
254007225e5Sgerardnico        if ($this->stats[Analytics::WORDS_COUNT] > $maximalWordCount) {
255007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MAXIMAL] = self::FAILED;
256007225e5Sgerardnico            $ruleInfo[self::RULE_WORDS_MAXIMAL] = "The number of words is more than {$maximalWordCount}";
257007225e5Sgerardnico            $correctContentLength = false;
258007225e5Sgerardnico        } else {
259007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MAXIMAL] = self::PASSED;
260007225e5Sgerardnico        }
261007225e5Sgerardnico        if ($correctContentLength) {
262007225e5Sgerardnico            $qualityScores['correct_content_length'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_CONTENT, 10);
263007225e5Sgerardnico        } else {
264007225e5Sgerardnico            $qualityScores['correct_content_length'] = 0;
265007225e5Sgerardnico        }
266007225e5Sgerardnico
267007225e5Sgerardnico
268007225e5Sgerardnico        /**
269007225e5Sgerardnico         * Average Number of words by header section to text ratio
270007225e5Sgerardnico         */
271007225e5Sgerardnico        $headers = $this->stats[Analytics::HEADERS_COUNT];
272007225e5Sgerardnico        if ($headers != null) {
273007225e5Sgerardnico            $headerCount = array_sum($headers);
274007225e5Sgerardnico            $headerCount--; // h1 is supposed to have no words
275007225e5Sgerardnico            if ($headerCount > 0) {
276007225e5Sgerardnico
277007225e5Sgerardnico                $avgWordsCountBySection = round($this->stats[Analytics::WORDS_COUNT] / $headerCount);
278007225e5Sgerardnico                $statExport['word_section_count']['avg'] = $avgWordsCountBySection;
279007225e5Sgerardnico
280007225e5Sgerardnico                /**
281007225e5Sgerardnico                 * Min words by header section
282007225e5Sgerardnico                 */
283007225e5Sgerardnico                $wordsByHeaderMin = 20;
284007225e5Sgerardnico                /**
285007225e5Sgerardnico                 * Max words by header section
286007225e5Sgerardnico                 */
287007225e5Sgerardnico                $wordsByHeaderMax = 300;
288007225e5Sgerardnico                $correctAverageWordsBySection = true;
289007225e5Sgerardnico                if ($avgWordsCountBySection < $wordsByHeaderMin) {
290007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::FAILED;
291007225e5Sgerardnico                    $correctAverageWordsBySection = false;
292007225e5Sgerardnico                    $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = "The number of words by section is less than {$wordsByHeaderMin}";
293007225e5Sgerardnico                } else {
294007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::PASSED;
295007225e5Sgerardnico                }
296007225e5Sgerardnico                if ($avgWordsCountBySection > $wordsByHeaderMax) {
297007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::FAILED;
298007225e5Sgerardnico                    $correctAverageWordsBySection = false;
299007225e5Sgerardnico                    $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = "The number of words by section is more than {$wordsByHeaderMax}";
300007225e5Sgerardnico                } else {
301007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::PASSED;
302007225e5Sgerardnico                }
303007225e5Sgerardnico                if ($correctAverageWordsBySection) {
304007225e5Sgerardnico                    $qualityScores['correct_word_avg_by_section'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE, 10);
305007225e5Sgerardnico                } else {
306007225e5Sgerardnico                    $qualityScores['correct_word_avg_by_section'] = 0;
307007225e5Sgerardnico                }
308007225e5Sgerardnico
309007225e5Sgerardnico            }
310007225e5Sgerardnico        }
311007225e5Sgerardnico
312007225e5Sgerardnico        /**
313007225e5Sgerardnico         * Internal Backlinks rule
314007225e5Sgerardnico         *
315007225e5Sgerardnico         * If a page is a low quality page, if the process run
316007225e5Sgerardnico         * anonymous, we will not see all {@link ft_backlinks()}
317007225e5Sgerardnico         * we use then the index directly to avoid confusion
318007225e5Sgerardnico         */
319007225e5Sgerardnico        $backlinks = idx_get_indexer()->lookupKey('relation_references', $ID);
320007225e5Sgerardnico        $countBacklinks = count($backlinks);
321007225e5Sgerardnico        $statExport[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks;
322007225e5Sgerardnico        if ($countBacklinks == 0) {
323007225e5Sgerardnico            $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = 0;
324007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::FAILED;
325007225e5Sgerardnico            $ruleInfo[self::RULE_INTERNAL_BACKLINKS_MIN] = "There is no backlinks";
326007225e5Sgerardnico        } else {
327007225e5Sgerardnico            $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks * $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR, 1);
328007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::PASSED;
329007225e5Sgerardnico        }
330007225e5Sgerardnico
331007225e5Sgerardnico        /**
332007225e5Sgerardnico         * Internal links
333007225e5Sgerardnico         */
334007225e5Sgerardnico        $internalLinksCount = $this->stats[Analytics::INTERNAL_LINKS_COUNT];
335007225e5Sgerardnico        if ($internalLinksCount == 0) {
336007225e5Sgerardnico            $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = 0;
337007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::FAILED;
338*1c5862d3Sgerardnico            $ruleInfo[self::RULE_INTERNAL_LINKS_MIN] = "There is no internal links";
339007225e5Sgerardnico        } else {
340007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::PASSED;
341007225e5Sgerardnico            $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = $countBacklinks * $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR, 1);;
342007225e5Sgerardnico        }
343007225e5Sgerardnico
344007225e5Sgerardnico        /**
345007225e5Sgerardnico         * Broken Links
346007225e5Sgerardnico         */
347007225e5Sgerardnico        $brokenLinksCount = $this->stats[Analytics::INTERNAL_LINKS_BROKEN_COUNT];
348007225e5Sgerardnico        if ($brokenLinksCount > 2) {
349007225e5Sgerardnico            $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = 0;
350007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::FAILED;
351*1c5862d3Sgerardnico            $ruleInfo[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = "There is {$brokenLinksCount} broken links";
352007225e5Sgerardnico        } else {
353007225e5Sgerardnico            $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR, 2);;;
354007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::PASSED;
355007225e5Sgerardnico        }
356007225e5Sgerardnico
357007225e5Sgerardnico        /**
358007225e5Sgerardnico         * Changes, the more changes the better
359007225e5Sgerardnico         */
360007225e5Sgerardnico        $qualityScores[Analytics::EDITS_COUNT] = $this->stats[Analytics::EDITS_COUNT] * $this->getConf(self::CONF_QUALITY_SCORE_CHANGES_FACTOR, 0.25);;;
361007225e5Sgerardnico
362007225e5Sgerardnico
363007225e5Sgerardnico        /**
364007225e5Sgerardnico         * Rules that comes from the qc plugin
365007225e5Sgerardnico         * but are not yet fully implemented
366007225e5Sgerardnico         */
367007225e5Sgerardnico
368007225e5Sgerardnico//        // 2 points for lot's of formatting
369007225e5Sgerardnico//        if ($this->stats[self::PLAINTEXT] && $this->stats['chars'] / $this->stats[self::PLAINTEXT] < 3) {
370007225e5Sgerardnico//            $ruleResults['manyformat'] = 2;
371007225e5Sgerardnico//        }
372007225e5Sgerardnico//
373007225e5Sgerardnico//        // 1/2 points for deeply nested quotations
374007225e5Sgerardnico//        if ($this->stats['quote_nest'] > 2) {
375007225e5Sgerardnico//            $ruleResults['deepquote'] += $this->stats['quote_nest'] / 2;
376007225e5Sgerardnico//        }
377007225e5Sgerardnico//
378007225e5Sgerardnico//        // 1/2 points for too many hr
379007225e5Sgerardnico//        if ($this->stats['hr'] > 2) {
380007225e5Sgerardnico//            $ruleResults['manyhr'] = ($this->stats['hr'] - 2) / 2;
381007225e5Sgerardnico//        }
382007225e5Sgerardnico//
383007225e5Sgerardnico//        // 1 point for too many line breaks
384007225e5Sgerardnico//        if ($this->stats['linebreak'] > 2) {
385007225e5Sgerardnico//            $ruleResults['manybr'] = $this->stats['linebreak'] - 2;
386007225e5Sgerardnico//        }
387007225e5Sgerardnico//
388007225e5Sgerardnico//        // 1 point for single author only
389007225e5Sgerardnico//        if (!$this->getConf('single_author_only') && count($this->stats['authors']) == 1) {
390007225e5Sgerardnico//            $ruleResults['singleauthor'] = 1;
391007225e5Sgerardnico//        }
392007225e5Sgerardnico
393007225e5Sgerardnico        // Too much cdata (plaintext), see cdata
394007225e5Sgerardnico        // if ($len > 500) $statExport[self::QUALITY][self::ERROR]['plaintext']++;
395007225e5Sgerardnico        // if ($len > 500) $statExport[self::QUALITY][self::ERROR]['plaintext']++;
396007225e5Sgerardnico        //
397007225e5Sgerardnico        // // 1 point for formattings longer than 500 chars
398007225e5Sgerardnico        // $statExport[self::QUALITY][self::ERROR]['multiformat']
399007225e5Sgerardnico
400007225e5Sgerardnico        /**
401007225e5Sgerardnico         * Quality Score
402007225e5Sgerardnico         */
403007225e5Sgerardnico        ksort($qualityScores);
404007225e5Sgerardnico        $qualityScoring = array();
405007225e5Sgerardnico        $qualityScoring["score"] = array_sum($qualityScores);
406007225e5Sgerardnico        $qualityScoring["scores"] = $qualityScores;
407007225e5Sgerardnico
408007225e5Sgerardnico
409007225e5Sgerardnico        /**
410007225e5Sgerardnico         * The rule that if broken will set the quality level to low
411007225e5Sgerardnico         */
412007225e5Sgerardnico        $brokenRules = array();
413007225e5Sgerardnico        foreach ($ruleResults as $ruleName => $ruleResult) {
414007225e5Sgerardnico            if ($ruleResult == self::FAILED) {
415007225e5Sgerardnico                $brokenRules[] = $ruleName;
416007225e5Sgerardnico            }
417007225e5Sgerardnico        }
418007225e5Sgerardnico        $ruleErrorCount = sizeof($brokenRules);
419007225e5Sgerardnico        if ($ruleErrorCount > 0) {
420007225e5Sgerardnico            $qualityResult = $ruleErrorCount . " quality rules errors";
421007225e5Sgerardnico        } else {
422007225e5Sgerardnico            $qualityResult = "All quality rules passed";
423007225e5Sgerardnico        }
424007225e5Sgerardnico
425007225e5Sgerardnico        /**
426007225e5Sgerardnico         * Low level
427007225e5Sgerardnico         */
428007225e5Sgerardnico        $mandatoryRules = preg_split("/,/", $this->getConf(self::CONF_MANDATORY_QUALITY_RULES));
429007225e5Sgerardnico        $mandatoryRulesBroken = [];
430007225e5Sgerardnico        foreach ($mandatoryRules as $lowLevelRule) {
431007225e5Sgerardnico            if (in_array($lowLevelRule, $brokenRules)) {
432007225e5Sgerardnico                $mandatoryRulesBroken[] = $lowLevelRule;
433007225e5Sgerardnico            }
434007225e5Sgerardnico        }
435007225e5Sgerardnico        $lowLevel = false;
436007225e5Sgerardnico        if (sizeof($mandatoryRulesBroken) > 0) {
437007225e5Sgerardnico            $lowLevel = true;
438007225e5Sgerardnico        }
439007225e5Sgerardnico        LowQualityPage::setLowQualityPage($ID, $lowLevel);
440007225e5Sgerardnico
441007225e5Sgerardnico        /**
442007225e5Sgerardnico         * Building the quality object in order
443007225e5Sgerardnico         */
444007225e5Sgerardnico        $quality["low"] = $lowLevel;
445007225e5Sgerardnico        if (sizeof($mandatoryRulesBroken) > 0) {
446007225e5Sgerardnico            ksort($mandatoryRulesBroken);
447007225e5Sgerardnico            $quality['failed_mandatory_rules'] = $mandatoryRulesBroken;
448007225e5Sgerardnico        }
449007225e5Sgerardnico        $quality["scoring"] = $qualityScoring;
450007225e5Sgerardnico        $quality["rules"][self::RESULT] = $qualityResult;
451007225e5Sgerardnico        if (!empty($ruleInfo)) {
452007225e5Sgerardnico            $quality["rules"]["info"] = $ruleInfo;
453007225e5Sgerardnico        }
454007225e5Sgerardnico
455007225e5Sgerardnico        ksort($ruleResults);
456007225e5Sgerardnico        $quality["rules"]['details'] = $ruleResults;
457007225e5Sgerardnico
458007225e5Sgerardnico        /**
459007225e5Sgerardnico         * Metadata
460007225e5Sgerardnico         */
461c42a1196Sgerardnico        $title = $meta['title'];
462c42a1196Sgerardnico        $this->metadata[Analytics::TITLE] = $title;
463c42a1196Sgerardnico        if ($title!=$meta['h1']) {
464c42a1196Sgerardnico            $this->metadata[Analytics::H1] = $meta['h1'];
465c42a1196Sgerardnico        }
466007225e5Sgerardnico        $timestampCreation = $meta['date']['created'];
467007225e5Sgerardnico        $this->metadata[self::DATE_CREATED] = date('Y-m-d h:i:s', $timestampCreation);
468007225e5Sgerardnico        $timestampModification = $meta['date']['modified'];
469007225e5Sgerardnico        $this->metadata[Analytics::DATE_MODIFIED] = date('Y-m-d h:i:s', $timestampModification);
470007225e5Sgerardnico        $this->metadata['age_creation'] = round((time() - $timestampCreation) / 60 / 60 / 24);
471007225e5Sgerardnico        $this->metadata['age_modification'] = round((time() - $timestampModification) / 60 / 60 / 24);
472007225e5Sgerardnico
473007225e5Sgerardnico
474007225e5Sgerardnico        // get author info
475007225e5Sgerardnico        $changelog = new PageChangeLog($ID);
476007225e5Sgerardnico        $revs = $changelog->getRevisions(0, 10000);
477007225e5Sgerardnico        array_push($revs, $meta['last_change']['date']);
478007225e5Sgerardnico        $this->stats[Analytics::EDITS_COUNT] = count($revs);
479007225e5Sgerardnico        foreach ($revs as $rev) {
480007225e5Sgerardnico            $info = $changelog->getRevisionInfo($rev);
481007225e5Sgerardnico            if ($info['user']) {
482007225e5Sgerardnico                $this->stats['authors'][$info['user']] += 1;
483007225e5Sgerardnico            } else {
484007225e5Sgerardnico                $this->stats['authors']['*'] += 1;
485007225e5Sgerardnico            }
486007225e5Sgerardnico        }
487007225e5Sgerardnico
488007225e5Sgerardnico        /**
489007225e5Sgerardnico         * Building the Top JSON in order
490007225e5Sgerardnico         */
491007225e5Sgerardnico        global $ID;
4922c067407Sgerardnico        $finalStats = array();
4932c067407Sgerardnico        $finalStats["id"] = $ID;
494c42a1196Sgerardnico        $finalStats["date"] = date('Y-m-d H:i:s', time());
4952c067407Sgerardnico        $finalStats['metadata'] = $this->metadata;
496007225e5Sgerardnico        ksort($statExport);
4972c067407Sgerardnico        $finalStats[Analytics::STATISTICS] = $statExport;
4982c067407Sgerardnico        $finalStats[Analytics::QUALITY] = $quality; // Quality after the sort to get them at the end
499007225e5Sgerardnico
500007225e5Sgerardnico
501007225e5Sgerardnico        /**
502007225e5Sgerardnico         * The result can be seen with
503007225e5Sgerardnico         * doku.php?id=somepage&do=export_combo_analysis
504007225e5Sgerardnico         */
505007225e5Sgerardnico        /**
506007225e5Sgerardnico         * Set the header for the export.php file
507007225e5Sgerardnico         */
508007225e5Sgerardnico        p_set_metadata($ID, array("format" =>
509007225e5Sgerardnico            array("combo_" . $this->getPluginComponent() => array("Content-Type" => 'application/json'))
510007225e5Sgerardnico        ));
5112c067407Sgerardnico        $json_encoded = json_encode($finalStats, JSON_PRETTY_PRINT);
512007225e5Sgerardnico
5132c067407Sgerardnico        $page = new Page($ID);
5142c067407Sgerardnico        $page->saveAnalytics($finalStats);
515007225e5Sgerardnico        $this->doc .= $json_encoded;
516007225e5Sgerardnico
517007225e5Sgerardnico    }
518007225e5Sgerardnico
519007225e5Sgerardnico    /**
520007225e5Sgerardnico     */
521007225e5Sgerardnico    public function getFormat()
522007225e5Sgerardnico    {
523007225e5Sgerardnico        return Analytics::RENDERER_FORMAT;
524007225e5Sgerardnico    }
525007225e5Sgerardnico
526007225e5Sgerardnico    public function internallink($id, $name = null, $search = null, $returnonly = false, $linktype = 'content')
527007225e5Sgerardnico    {
528007225e5Sgerardnico
529007225e5Sgerardnico        LinkUtility::processInternalLinkStats($id, $this->stats);
530007225e5Sgerardnico
531007225e5Sgerardnico    }
532007225e5Sgerardnico
533007225e5Sgerardnico    public function externallink($url, $name = null)
534007225e5Sgerardnico    {
535007225e5Sgerardnico        $this->stats[Analytics::EXTERNAL_LINKS_COUNT]++;
536007225e5Sgerardnico    }
537007225e5Sgerardnico
538007225e5Sgerardnico    public function header($text, $level, $pos)
539007225e5Sgerardnico    {
540007225e5Sgerardnico        $this->stats[Analytics::HEADERS_COUNT]['h' . $level]++;
541007225e5Sgerardnico        $this->headerId++;
542007225e5Sgerardnico        $this->stats[Analytics::HEADER_POSITION][$this->headerId] = 'h' . $level;
543007225e5Sgerardnico
544007225e5Sgerardnico    }
545007225e5Sgerardnico
546007225e5Sgerardnico    public function smiley($smiley)
547007225e5Sgerardnico    {
548007225e5Sgerardnico        if ($smiley == 'FIXME') $this->stats[self::FIXME]++;
549007225e5Sgerardnico    }
550007225e5Sgerardnico
551007225e5Sgerardnico    public function linebreak()
552007225e5Sgerardnico    {
553007225e5Sgerardnico        if (!$this->tableopen) {
554007225e5Sgerardnico            $this->stats['linebreak']++;
555007225e5Sgerardnico        }
556007225e5Sgerardnico    }
557007225e5Sgerardnico
558007225e5Sgerardnico    public function table_open($maxcols = null, $numrows = null, $pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
559007225e5Sgerardnico    {
560007225e5Sgerardnico        $this->tableopen = true;
561007225e5Sgerardnico    }
562007225e5Sgerardnico
563007225e5Sgerardnico    public function table_close($pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
564007225e5Sgerardnico    {
565007225e5Sgerardnico        $this->tableopen = false;
566007225e5Sgerardnico    }
567007225e5Sgerardnico
568007225e5Sgerardnico    public function hr()
569007225e5Sgerardnico    {
570007225e5Sgerardnico        $this->stats['hr']++;
571007225e5Sgerardnico    }
572007225e5Sgerardnico
573007225e5Sgerardnico    public function quote_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
574007225e5Sgerardnico    {
575007225e5Sgerardnico        $this->stats['quote_count']++;
576007225e5Sgerardnico        $this->quotelevel++;
577007225e5Sgerardnico        $this->stats['quote_nest'] = max($this->quotelevel, $this->stats['quote_nest']);
578007225e5Sgerardnico    }
579007225e5Sgerardnico
580007225e5Sgerardnico    public function quote_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
581007225e5Sgerardnico    {
582007225e5Sgerardnico        $this->quotelevel--;
583007225e5Sgerardnico    }
584007225e5Sgerardnico
585007225e5Sgerardnico    public function strong_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
586007225e5Sgerardnico    {
587007225e5Sgerardnico        $this->formattingBracket++;
588007225e5Sgerardnico    }
589007225e5Sgerardnico
590007225e5Sgerardnico    public function strong_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
591007225e5Sgerardnico    {
592007225e5Sgerardnico        $this->formattingBracket--;
593007225e5Sgerardnico    }
594007225e5Sgerardnico
595007225e5Sgerardnico    public function emphasis_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
596007225e5Sgerardnico    {
597007225e5Sgerardnico        $this->formattingBracket++;
598007225e5Sgerardnico    }
599007225e5Sgerardnico
600007225e5Sgerardnico    public function emphasis_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
601007225e5Sgerardnico    {
602007225e5Sgerardnico        $this->formattingBracket--;
603007225e5Sgerardnico    }
604007225e5Sgerardnico
605007225e5Sgerardnico    public function underline_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
606007225e5Sgerardnico    {
607007225e5Sgerardnico        $this->formattingBracket++;
608007225e5Sgerardnico    }
609007225e5Sgerardnico
610007225e5Sgerardnico    public function underline_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
611007225e5Sgerardnico    {
612007225e5Sgerardnico        $this->formattingBracket--;
613007225e5Sgerardnico    }
614007225e5Sgerardnico
615007225e5Sgerardnico    public function cdata($text)
616007225e5Sgerardnico    {
617007225e5Sgerardnico
618007225e5Sgerardnico        /**
619007225e5Sgerardnico         * It seems that you receive cdata
620007225e5Sgerardnico         * when emphasis_open / underline_open / strong_open
621007225e5Sgerardnico         * Stats are not for them
622007225e5Sgerardnico         */
623007225e5Sgerardnico        if (!$this->formattingBracket) return;
624007225e5Sgerardnico
625007225e5Sgerardnico        $this->plainTextId++;
626007225e5Sgerardnico
627007225e5Sgerardnico        /**
628007225e5Sgerardnico         * Length
629007225e5Sgerardnico         */
630007225e5Sgerardnico        $len = strlen($text);
631007225e5Sgerardnico        $this->stats[self::PLAINTEXT][$this->plainTextId]['len'] = $len;
632007225e5Sgerardnico
633007225e5Sgerardnico
634007225e5Sgerardnico        /**
635007225e5Sgerardnico         * Multi-formatting
636007225e5Sgerardnico         */
637007225e5Sgerardnico        if ($this->formattingBracket > 1) {
638007225e5Sgerardnico            $numberOfFormats = 1 * ($this->formattingBracket - 1);
639007225e5Sgerardnico            $this->stats[self::PLAINTEXT][$this->plainTextId]['multiformat'] += $numberOfFormats;
640007225e5Sgerardnico        }
641007225e5Sgerardnico
642007225e5Sgerardnico        /**
643007225e5Sgerardnico         * Total
644007225e5Sgerardnico         */
645007225e5Sgerardnico        $this->stats[self::PLAINTEXT][0] += $len;
646007225e5Sgerardnico    }
647007225e5Sgerardnico
648007225e5Sgerardnico    public function internalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null)
649007225e5Sgerardnico    {
650007225e5Sgerardnico        $this->stats[Analytics::INTERNAL_MEDIAS_COUNT]++;
651007225e5Sgerardnico    }
652007225e5Sgerardnico
653007225e5Sgerardnico    public function externalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null)
654007225e5Sgerardnico    {
655007225e5Sgerardnico        $this->stats[Analytics::EXTERNAL_MEDIAS]++;
656007225e5Sgerardnico    }
657007225e5Sgerardnico
658007225e5Sgerardnico    public function reset()
659007225e5Sgerardnico    {
660007225e5Sgerardnico        $this->stats = array();
661007225e5Sgerardnico        $this->metadata = array();
662007225e5Sgerardnico        $this->headerId = 0;
663007225e5Sgerardnico    }
664007225e5Sgerardnico
665007225e5Sgerardnico    public function setMeta($key, $value)
666007225e5Sgerardnico    {
667007225e5Sgerardnico        $this->metadata[$key] = $value;
668007225e5Sgerardnico    }
669007225e5Sgerardnico
670007225e5Sgerardnico
671007225e5Sgerardnico}
672007225e5Sgerardnico
673