xref: /plugin/combo/renderer/analytics.php (revision 007225e5fb2d3f64edaccd3bd447ca26effb9d68)
1*007225e5Sgerardnico<?php
2*007225e5Sgerardnico
3*007225e5Sgerardnico
4*007225e5Sgerardnicouse ComboStrap\Analytics;
5*007225e5Sgerardnicouse ComboStrap\LinkUtility;
6*007225e5Sgerardnicouse ComboStrap\LogUtility;
7*007225e5Sgerardnicouse ComboStrap\LowQualityPage;
8*007225e5Sgerardnicouse ComboStrap\Sqlite;
9*007225e5Sgerardnicouse ComboStrap\Text;
10*007225e5Sgerardnicouse ComboStrap\UrlCanonical;
11*007225e5Sgerardnicouse dokuwiki\ChangeLog\PageChangeLog;
12*007225e5Sgerardnico
13*007225e5Sgerardnicorequire_once(__DIR__ . '/../class/Text.php');
14*007225e5Sgerardnicorequire_once(__DIR__ . '/../class/LowQualityPage.php');
15*007225e5Sgerardnicorequire_once(__DIR__ . '/../class/Analytics.php');
16*007225e5Sgerardnico
17*007225e5Sgerardnico
18*007225e5Sgerardnico/**
19*007225e5Sgerardnico * A analysis Renderer that exports stats/quality/metadata in a json format
20*007225e5Sgerardnico * You can export the data with
21*007225e5Sgerardnico * doku.php?id=somepage&do=export_combo_analytics
22*007225e5Sgerardnico */
23*007225e5Sgerardnicoclass renderer_plugin_combo_analytics extends Doku_Renderer
24*007225e5Sgerardnico{
25*007225e5Sgerardnico    const DATE_CREATED = 'date_created';
26*007225e5Sgerardnico    const PLAINTEXT = 'formatted';
27*007225e5Sgerardnico    const RESULT = "result";
28*007225e5Sgerardnico    const DESCRIPTION = "description";
29*007225e5Sgerardnico    const PASSED = "Passed";
30*007225e5Sgerardnico    const FAILED = "Failed";
31*007225e5Sgerardnico    const FIXME = 'fixme';
32*007225e5Sgerardnico
33*007225e5Sgerardnico    /**
34*007225e5Sgerardnico     * Rules key
35*007225e5Sgerardnico     */
36*007225e5Sgerardnico    const RULE_WORDS_MINIMAL = 'words_min';
37*007225e5Sgerardnico    const RULE_OUTLINE_STRUCTURE = "outline_structure";
38*007225e5Sgerardnico    const RULE_INTERNAL_BACKLINKS_MIN = 'internal_backlinks_min';
39*007225e5Sgerardnico    const RULE_WORDS_MAXIMAL = "words_max";
40*007225e5Sgerardnico    const RULE_AVERAGE_WORDS_BY_SECTION_MIN = 'words_by_section_avg_min';
41*007225e5Sgerardnico    const RULE_AVERAGE_WORDS_BY_SECTION_MAX = 'words_by_section_avg_max';
42*007225e5Sgerardnico    const RULE_INTERNAL_LINKS_MIN = 'internal_links_min';
43*007225e5Sgerardnico    const RULE_INTERNAL_BROKEN_LINKS_MAX = 'internal_links_broken_max';
44*007225e5Sgerardnico    const RULE_DESCRIPTION_PRESENT = 'description_present';
45*007225e5Sgerardnico    const RULE_FIXME = "fixme_min";
46*007225e5Sgerardnico    const RULE_TITLE_PRESENT = "title_present";
47*007225e5Sgerardnico    const RULE_CANONICAL_PRESENT = "canonical_present";
48*007225e5Sgerardnico
49*007225e5Sgerardnico    /**
50*007225e5Sgerardnico     * The default man
51*007225e5Sgerardnico     */
52*007225e5Sgerardnico    const CONF_MANDATORY_QUALITY_RULES_DEFAULT_VALUE = [
53*007225e5Sgerardnico        self::RULE_WORDS_MINIMAL,
54*007225e5Sgerardnico        self::RULE_INTERNAL_BACKLINKS_MIN,
55*007225e5Sgerardnico        self::RULE_INTERNAL_LINKS_MIN
56*007225e5Sgerardnico    ];
57*007225e5Sgerardnico    const CONF_MANDATORY_QUALITY_RULES = "mandatoryQualityRules";
58*007225e5Sgerardnico
59*007225e5Sgerardnico    /**
60*007225e5Sgerardnico     * Quality Score factors
61*007225e5Sgerardnico     * They are used to calculate the score
62*007225e5Sgerardnico     */
63*007225e5Sgerardnico    const CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR = 'qualityScoreInternalBacklinksFactor';
64*007225e5Sgerardnico    const CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR = 'qualityScoreInternalLinksFactor';
65*007225e5Sgerardnico    const CONF_QUALITY_SCORE_TITLE_PRESENT = 'qualityScoreTitlePresent';
66*007225e5Sgerardnico    const CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE = 'qualityScoreCorrectOutline';
67*007225e5Sgerardnico    const CONF_QUALITY_SCORE_CORRECT_CONTENT = 'qualityScoreCorrectContentLength';
68*007225e5Sgerardnico    const CONF_QUALITY_SCORE_NO_FIXME = 'qualityScoreNoFixMe';
69*007225e5Sgerardnico    const CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE = 'qualityScoreCorrectWordSectionAvg';
70*007225e5Sgerardnico    const CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR = 'qualityScoreNoBrokenLinks';
71*007225e5Sgerardnico    const CONF_QUALITY_SCORE_CHANGES_FACTOR = 'qualityScoreChangesFactor';
72*007225e5Sgerardnico    const CONF_QUALITY_SCORE_DESCRIPTION_PRESENT = 'qualityScoreDescriptionPresent';
73*007225e5Sgerardnico    const CONF_QUALITY_SCORE_CANONICAL_PRESENT = 'qualityScoreCanonicalPresent';
74*007225e5Sgerardnico
75*007225e5Sgerardnico
76*007225e5Sgerardnico    /**
77*007225e5Sgerardnico     * The processing data
78*007225e5Sgerardnico     * that should be {@link  renderer_plugin_combo_analysis::reset()}
79*007225e5Sgerardnico     */
80*007225e5Sgerardnico    public $stats = array(); // the stats
81*007225e5Sgerardnico    protected $metadata = array(); // the metadata
82*007225e5Sgerardnico    protected $headerId = 0; // the id of the header on the page (first, second, ...)
83*007225e5Sgerardnico
84*007225e5Sgerardnico    /**
85*007225e5Sgerardnico     * Don't known this variable ?
86*007225e5Sgerardnico     */
87*007225e5Sgerardnico    protected $quotelevel = 0;
88*007225e5Sgerardnico    protected $formattingBracket = 0;
89*007225e5Sgerardnico    protected $tableopen = false;
90*007225e5Sgerardnico    private $plainTextId = 0;
91*007225e5Sgerardnico
92*007225e5Sgerardnico
93*007225e5Sgerardnico    /**
94*007225e5Sgerardnico     * Here the score is calculated
95*007225e5Sgerardnico     */
96*007225e5Sgerardnico    public function document_end() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
97*007225e5Sgerardnico    {
98*007225e5Sgerardnico        /**
99*007225e5Sgerardnico         * The metadata
100*007225e5Sgerardnico         */
101*007225e5Sgerardnico        global $ID;
102*007225e5Sgerardnico        $meta = p_get_metadata($ID);
103*007225e5Sgerardnico
104*007225e5Sgerardnico        /**
105*007225e5Sgerardnico         * Word and chars count
106*007225e5Sgerardnico         * The word count does not take into account
107*007225e5Sgerardnico         * words with non-words characters such as < =
108*007225e5Sgerardnico         * Therefore the node and attribute are not taken in the count
109*007225e5Sgerardnico         */
110*007225e5Sgerardnico        $text = rawWiki($ID);
111*007225e5Sgerardnico        $this->stats[Analytics::CHARS_COUNT] = strlen($text);
112*007225e5Sgerardnico        $this->stats[Analytics::WORDS_COUNT] = Text::getWordCount($text);
113*007225e5Sgerardnico
114*007225e5Sgerardnico        /**
115*007225e5Sgerardnico         * The exported object
116*007225e5Sgerardnico         */
117*007225e5Sgerardnico        $statExport = $this->stats;
118*007225e5Sgerardnico
119*007225e5Sgerardnico
120*007225e5Sgerardnico        /**
121*007225e5Sgerardnico         * Internal link distance summary calculation
122*007225e5Sgerardnico         */
123*007225e5Sgerardnico        if (array_key_exists(Analytics::INTERNAL_LINK_DISTANCE, $statExport)) {
124*007225e5Sgerardnico            $linkLengths = $statExport[Analytics::INTERNAL_LINK_DISTANCE];
125*007225e5Sgerardnico            unset($statExport[Analytics::INTERNAL_LINK_DISTANCE]);
126*007225e5Sgerardnico            $countBacklinks = count($linkLengths);
127*007225e5Sgerardnico            $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = null;
128*007225e5Sgerardnico            $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = null;
129*007225e5Sgerardnico            $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = null;
130*007225e5Sgerardnico            if ($countBacklinks > 0) {
131*007225e5Sgerardnico                $statExport[Analytics::INTERNAL_LINK_DISTANCE]['avg'] = array_sum($linkLengths) / $countBacklinks;
132*007225e5Sgerardnico                $statExport[Analytics::INTERNAL_LINK_DISTANCE]['max'] = max($linkLengths);
133*007225e5Sgerardnico                $statExport[Analytics::INTERNAL_LINK_DISTANCE]['min'] = min($linkLengths);
134*007225e5Sgerardnico            }
135*007225e5Sgerardnico        }
136*007225e5Sgerardnico
137*007225e5Sgerardnico        /**
138*007225e5Sgerardnico         * Quality Report / Rules
139*007225e5Sgerardnico         */
140*007225e5Sgerardnico        // The array that hold the results of the quality rules
141*007225e5Sgerardnico        $ruleResults = array();
142*007225e5Sgerardnico        // The array that hold the quality score details
143*007225e5Sgerardnico        $qualityScores = array();
144*007225e5Sgerardnico
145*007225e5Sgerardnico
146*007225e5Sgerardnico        /**
147*007225e5Sgerardnico         * No fixme
148*007225e5Sgerardnico         */
149*007225e5Sgerardnico        $fixmeCount = $this->stats[self::FIXME];
150*007225e5Sgerardnico        $statExport[self::FIXME] = $fixmeCount == null ? 0 : $fixmeCount;
151*007225e5Sgerardnico        if ($fixmeCount != 0) {
152*007225e5Sgerardnico            $ruleResults[self::RULE_FIXME] = self::FAILED;
153*007225e5Sgerardnico            $qualityScores['no_' . self::FIXME] = 0;
154*007225e5Sgerardnico        } else {
155*007225e5Sgerardnico            $ruleResults[self::RULE_FIXME] = self::PASSED;
156*007225e5Sgerardnico            $qualityScores['no_' . self::FIXME] = $this->getConf(self::CONF_QUALITY_SCORE_NO_FIXME, 1);;
157*007225e5Sgerardnico        }
158*007225e5Sgerardnico
159*007225e5Sgerardnico        /**
160*007225e5Sgerardnico         * A title should be present
161*007225e5Sgerardnico         */
162*007225e5Sgerardnico        if (empty($this->metadata[Analytics::TITLE])) {
163*007225e5Sgerardnico            $ruleResults[self::RULE_TITLE_PRESENT] = self::FAILED;
164*007225e5Sgerardnico            $ruleInfo[self::RULE_TITLE_PRESENT] = "A title is not present in the frontmatter";
165*007225e5Sgerardnico            $this->metadata[Analytics::TITLE] = $meta[Analytics::TITLE];
166*007225e5Sgerardnico            $qualityScores[self::RULE_TITLE_PRESENT] = 0;
167*007225e5Sgerardnico        } else {
168*007225e5Sgerardnico            $qualityScores[self::RULE_TITLE_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_TITLE_PRESENT, 10);;
169*007225e5Sgerardnico            $ruleResults[self::RULE_TITLE_PRESENT] = self::PASSED;
170*007225e5Sgerardnico        }
171*007225e5Sgerardnico
172*007225e5Sgerardnico        /**
173*007225e5Sgerardnico         * A description should be present
174*007225e5Sgerardnico         */
175*007225e5Sgerardnico        if (empty($this->metadata[self::DESCRIPTION])) {
176*007225e5Sgerardnico            $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::FAILED;
177*007225e5Sgerardnico            $ruleInfo[self::RULE_CANONICAL_PRESENT] = "A description is not present in the frontmatter";
178*007225e5Sgerardnico            $this->metadata[self::DESCRIPTION] = $meta[self::DESCRIPTION]["abstract"];
179*007225e5Sgerardnico            $qualityScores[self::RULE_DESCRIPTION_PRESENT] = 0;
180*007225e5Sgerardnico        } else {
181*007225e5Sgerardnico            $qualityScores[self::RULE_DESCRIPTION_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_DESCRIPTION_PRESENT, 8);;
182*007225e5Sgerardnico            $ruleResults[self::RULE_DESCRIPTION_PRESENT] = self::PASSED;
183*007225e5Sgerardnico        }
184*007225e5Sgerardnico
185*007225e5Sgerardnico        /**
186*007225e5Sgerardnico         * A canonical should be present
187*007225e5Sgerardnico         */
188*007225e5Sgerardnico        if (empty($this->metadata[UrlCanonical::CANONICAL_PROPERTY])) {
189*007225e5Sgerardnico            $qualityScores[self::RULE_CANONICAL_PRESENT] = 0;
190*007225e5Sgerardnico            $ruleResults[self::RULE_CANONICAL_PRESENT] = self::FAILED;
191*007225e5Sgerardnico            $ruleInfo[self::RULE_CANONICAL_PRESENT] = "A canonical is not present in the frontmatter";
192*007225e5Sgerardnico        } else {
193*007225e5Sgerardnico            $qualityScores[self::RULE_CANONICAL_PRESENT] = $this->getConf(self::CONF_QUALITY_SCORE_CANONICAL_PRESENT, 5);;
194*007225e5Sgerardnico            $ruleResults[self::RULE_CANONICAL_PRESENT] = self::PASSED;
195*007225e5Sgerardnico        }
196*007225e5Sgerardnico
197*007225e5Sgerardnico        /**
198*007225e5Sgerardnico         * Outline / Header structure
199*007225e5Sgerardnico         */
200*007225e5Sgerardnico        $treeError = 0;
201*007225e5Sgerardnico        $headersCount = 0;
202*007225e5Sgerardnico        if (array_key_exists(Analytics::HEADER_POSITION, $this->stats)) {
203*007225e5Sgerardnico            $headersCount = count($this->stats[Analytics::HEADER_POSITION]);
204*007225e5Sgerardnico            unset($statExport[Analytics::HEADER_POSITION]);
205*007225e5Sgerardnico            for ($i = 1; $i < $headersCount; $i++) {
206*007225e5Sgerardnico                $currentHeaderLevel = $this->stats['header_struct'][$i];
207*007225e5Sgerardnico                $previousHeaderLevel = $this->stats['header_struct'][$i - 1];
208*007225e5Sgerardnico                if ($currentHeaderLevel - $previousHeaderLevel > 1) {
209*007225e5Sgerardnico                    $treeError += 1;
210*007225e5Sgerardnico                    $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "The " . $i . " header (h" . $currentHeaderLevel . ") has a level bigger than its precedent (" . $previousHeaderLevel . ")";
211*007225e5Sgerardnico                }
212*007225e5Sgerardnico            }
213*007225e5Sgerardnico        }
214*007225e5Sgerardnico        if ($treeError > 0 || $headersCount == 0) {
215*007225e5Sgerardnico            $qualityScores['correct_outline'] = 0;
216*007225e5Sgerardnico            $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::FAILED;
217*007225e5Sgerardnico            if ($headersCount==0){
218*007225e5Sgerardnico                $ruleInfo[self::RULE_OUTLINE_STRUCTURE] = "There is no header";
219*007225e5Sgerardnico            }
220*007225e5Sgerardnico        } else {
221*007225e5Sgerardnico            $qualityScores['correct_outline'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_HEADER_STRUCTURE, 3);
222*007225e5Sgerardnico            $ruleResults[self::RULE_OUTLINE_STRUCTURE] = self::PASSED;
223*007225e5Sgerardnico        }
224*007225e5Sgerardnico
225*007225e5Sgerardnico
226*007225e5Sgerardnico        /**
227*007225e5Sgerardnico         * Document length
228*007225e5Sgerardnico         */
229*007225e5Sgerardnico        $minimalWordCount = 50;
230*007225e5Sgerardnico        $maximalWordCount = 1500;
231*007225e5Sgerardnico        $correctContentLength = true;
232*007225e5Sgerardnico        if ($this->stats[Analytics::WORDS_COUNT] < $minimalWordCount) {
233*007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MINIMAL] = self::FAILED;
234*007225e5Sgerardnico            $correctContentLength = false;
235*007225e5Sgerardnico            $ruleInfo[self::RULE_WORDS_MINIMAL] = "The number of words is less than {$minimalWordCount}";
236*007225e5Sgerardnico        } else {
237*007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MINIMAL] = self::PASSED;
238*007225e5Sgerardnico        }
239*007225e5Sgerardnico        if ($this->stats[Analytics::WORDS_COUNT] > $maximalWordCount) {
240*007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MAXIMAL] = self::FAILED;
241*007225e5Sgerardnico            $ruleInfo[self::RULE_WORDS_MAXIMAL] = "The number of words is more than {$maximalWordCount}";
242*007225e5Sgerardnico            $correctContentLength = false;
243*007225e5Sgerardnico        } else {
244*007225e5Sgerardnico            $ruleResults[self::RULE_WORDS_MAXIMAL] = self::PASSED;
245*007225e5Sgerardnico        }
246*007225e5Sgerardnico        if ($correctContentLength) {
247*007225e5Sgerardnico            $qualityScores['correct_content_length'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_CONTENT, 10);
248*007225e5Sgerardnico        } else {
249*007225e5Sgerardnico            $qualityScores['correct_content_length'] = 0;
250*007225e5Sgerardnico        }
251*007225e5Sgerardnico
252*007225e5Sgerardnico
253*007225e5Sgerardnico        /**
254*007225e5Sgerardnico         * Average Number of words by header section to text ratio
255*007225e5Sgerardnico         */
256*007225e5Sgerardnico        $headers = $this->stats[Analytics::HEADERS_COUNT];
257*007225e5Sgerardnico        if ($headers != null) {
258*007225e5Sgerardnico            $headerCount = array_sum($headers);
259*007225e5Sgerardnico            $headerCount--; // h1 is supposed to have no words
260*007225e5Sgerardnico            if ($headerCount > 0) {
261*007225e5Sgerardnico
262*007225e5Sgerardnico                $avgWordsCountBySection = round($this->stats[Analytics::WORDS_COUNT] / $headerCount);
263*007225e5Sgerardnico                $statExport['word_section_count']['avg'] = $avgWordsCountBySection;
264*007225e5Sgerardnico
265*007225e5Sgerardnico                /**
266*007225e5Sgerardnico                 * Min words by header section
267*007225e5Sgerardnico                 */
268*007225e5Sgerardnico                $wordsByHeaderMin = 20;
269*007225e5Sgerardnico                /**
270*007225e5Sgerardnico                 * Max words by header section
271*007225e5Sgerardnico                 */
272*007225e5Sgerardnico                $wordsByHeaderMax = 300;
273*007225e5Sgerardnico                $correctAverageWordsBySection = true;
274*007225e5Sgerardnico                if ($avgWordsCountBySection < $wordsByHeaderMin) {
275*007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::FAILED;
276*007225e5Sgerardnico                    $correctAverageWordsBySection = false;
277*007225e5Sgerardnico                    $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = "The number of words by section is less than {$wordsByHeaderMin}";
278*007225e5Sgerardnico                } else {
279*007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MIN] = self::PASSED;
280*007225e5Sgerardnico                }
281*007225e5Sgerardnico                if ($avgWordsCountBySection > $wordsByHeaderMax) {
282*007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::FAILED;
283*007225e5Sgerardnico                    $correctAverageWordsBySection = false;
284*007225e5Sgerardnico                    $ruleInfo[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = "The number of words by section is more than {$wordsByHeaderMax}";
285*007225e5Sgerardnico                } else {
286*007225e5Sgerardnico                    $ruleResults[self::RULE_AVERAGE_WORDS_BY_SECTION_MAX] = self::PASSED;
287*007225e5Sgerardnico                }
288*007225e5Sgerardnico                if ($correctAverageWordsBySection) {
289*007225e5Sgerardnico                    $qualityScores['correct_word_avg_by_section'] = $this->getConf(self::CONF_QUALITY_SCORE_CORRECT_WORD_SECTION_AVERAGE, 10);
290*007225e5Sgerardnico                } else {
291*007225e5Sgerardnico                    $qualityScores['correct_word_avg_by_section'] = 0;
292*007225e5Sgerardnico                }
293*007225e5Sgerardnico
294*007225e5Sgerardnico            }
295*007225e5Sgerardnico        }
296*007225e5Sgerardnico
297*007225e5Sgerardnico        /**
298*007225e5Sgerardnico         * Internal Backlinks rule
299*007225e5Sgerardnico         *
300*007225e5Sgerardnico         * If a page is a low quality page, if the process run
301*007225e5Sgerardnico         * anonymous, we will not see all {@link ft_backlinks()}
302*007225e5Sgerardnico         * we use then the index directly to avoid confusion
303*007225e5Sgerardnico         */
304*007225e5Sgerardnico        $backlinks = idx_get_indexer()->lookupKey('relation_references', $ID);
305*007225e5Sgerardnico        $countBacklinks = count($backlinks);
306*007225e5Sgerardnico        $statExport[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks;
307*007225e5Sgerardnico        if ($countBacklinks == 0) {
308*007225e5Sgerardnico            $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = 0;
309*007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::FAILED;
310*007225e5Sgerardnico            $ruleInfo[self::RULE_INTERNAL_BACKLINKS_MIN] = "There is no backlinks";
311*007225e5Sgerardnico        } else {
312*007225e5Sgerardnico            $qualityScores[Analytics::INTERNAL_BACKLINKS_COUNT] = $countBacklinks * $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_BACKLINK_FACTOR, 1);
313*007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BACKLINKS_MIN] = self::PASSED;
314*007225e5Sgerardnico        }
315*007225e5Sgerardnico
316*007225e5Sgerardnico        /**
317*007225e5Sgerardnico         * Internal links
318*007225e5Sgerardnico         */
319*007225e5Sgerardnico        $internalLinksCount = $this->stats[Analytics::INTERNAL_LINKS_COUNT];
320*007225e5Sgerardnico        if ($internalLinksCount == 0) {
321*007225e5Sgerardnico            $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = 0;
322*007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::FAILED;
323*007225e5Sgerardnico            $ruleInfo[self::RULE_INTERNAL_BACKLINKS_MIN] = "There is no internal links";
324*007225e5Sgerardnico        } else {
325*007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_LINKS_MIN] = self::PASSED;
326*007225e5Sgerardnico            $qualityScores[Analytics::INTERNAL_LINKS_COUNT] = $countBacklinks * $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_FACTOR, 1);;
327*007225e5Sgerardnico        }
328*007225e5Sgerardnico
329*007225e5Sgerardnico        /**
330*007225e5Sgerardnico         * Broken Links
331*007225e5Sgerardnico         */
332*007225e5Sgerardnico        $brokenLinksCount = $this->stats[Analytics::INTERNAL_LINKS_BROKEN_COUNT];
333*007225e5Sgerardnico        if ($brokenLinksCount > 2) {
334*007225e5Sgerardnico            $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = 0;
335*007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::FAILED;
336*007225e5Sgerardnico            $ruleInfo[self::RULE_INTERNAL_BACKLINKS_MIN] = "There is {$brokenLinksCount} broken links";
337*007225e5Sgerardnico        } else {
338*007225e5Sgerardnico            $qualityScores['no_' . Analytics::INTERNAL_LINKS_BROKEN_COUNT] = $this->getConf(self::CONF_QUALITY_SCORE_INTERNAL_LINK_BROKEN_FACTOR, 2);;;
339*007225e5Sgerardnico            $ruleResults[self::RULE_INTERNAL_BROKEN_LINKS_MAX] = self::PASSED;
340*007225e5Sgerardnico        }
341*007225e5Sgerardnico
342*007225e5Sgerardnico        /**
343*007225e5Sgerardnico         * Changes, the more changes the better
344*007225e5Sgerardnico         */
345*007225e5Sgerardnico        $qualityScores[Analytics::EDITS_COUNT] = $this->stats[Analytics::EDITS_COUNT] * $this->getConf(self::CONF_QUALITY_SCORE_CHANGES_FACTOR, 0.25);;;
346*007225e5Sgerardnico
347*007225e5Sgerardnico
348*007225e5Sgerardnico        /**
349*007225e5Sgerardnico         * Rules that comes from the qc plugin
350*007225e5Sgerardnico         * but are not yet fully implemented
351*007225e5Sgerardnico         */
352*007225e5Sgerardnico
353*007225e5Sgerardnico//        // 2 points for lot's of formatting
354*007225e5Sgerardnico//        if ($this->stats[self::PLAINTEXT] && $this->stats['chars'] / $this->stats[self::PLAINTEXT] < 3) {
355*007225e5Sgerardnico//            $ruleResults['manyformat'] = 2;
356*007225e5Sgerardnico//        }
357*007225e5Sgerardnico//
358*007225e5Sgerardnico//        // 1/2 points for deeply nested quotations
359*007225e5Sgerardnico//        if ($this->stats['quote_nest'] > 2) {
360*007225e5Sgerardnico//            $ruleResults['deepquote'] += $this->stats['quote_nest'] / 2;
361*007225e5Sgerardnico//        }
362*007225e5Sgerardnico//
363*007225e5Sgerardnico//        // 1/2 points for too many hr
364*007225e5Sgerardnico//        if ($this->stats['hr'] > 2) {
365*007225e5Sgerardnico//            $ruleResults['manyhr'] = ($this->stats['hr'] - 2) / 2;
366*007225e5Sgerardnico//        }
367*007225e5Sgerardnico//
368*007225e5Sgerardnico//        // 1 point for too many line breaks
369*007225e5Sgerardnico//        if ($this->stats['linebreak'] > 2) {
370*007225e5Sgerardnico//            $ruleResults['manybr'] = $this->stats['linebreak'] - 2;
371*007225e5Sgerardnico//        }
372*007225e5Sgerardnico//
373*007225e5Sgerardnico//        // 1 point for single author only
374*007225e5Sgerardnico//        if (!$this->getConf('single_author_only') && count($this->stats['authors']) == 1) {
375*007225e5Sgerardnico//            $ruleResults['singleauthor'] = 1;
376*007225e5Sgerardnico//        }
377*007225e5Sgerardnico
378*007225e5Sgerardnico        // Too much cdata (plaintext), see cdata
379*007225e5Sgerardnico        // if ($len > 500) $statExport[self::QUALITY][self::ERROR]['plaintext']++;
380*007225e5Sgerardnico        // if ($len > 500) $statExport[self::QUALITY][self::ERROR]['plaintext']++;
381*007225e5Sgerardnico        //
382*007225e5Sgerardnico        // // 1 point for formattings longer than 500 chars
383*007225e5Sgerardnico        // $statExport[self::QUALITY][self::ERROR]['multiformat']
384*007225e5Sgerardnico
385*007225e5Sgerardnico        /**
386*007225e5Sgerardnico         * Quality Score
387*007225e5Sgerardnico         */
388*007225e5Sgerardnico        ksort($qualityScores);
389*007225e5Sgerardnico        $qualityScoring = array();
390*007225e5Sgerardnico        $qualityScoring["score"] = array_sum($qualityScores);
391*007225e5Sgerardnico        $qualityScoring["scores"] = $qualityScores;
392*007225e5Sgerardnico
393*007225e5Sgerardnico
394*007225e5Sgerardnico        /**
395*007225e5Sgerardnico         * The rule that if broken will set the quality level to low
396*007225e5Sgerardnico         */
397*007225e5Sgerardnico        $brokenRules = array();
398*007225e5Sgerardnico        foreach ($ruleResults as $ruleName => $ruleResult) {
399*007225e5Sgerardnico            if ($ruleResult == self::FAILED) {
400*007225e5Sgerardnico                $brokenRules[] = $ruleName;
401*007225e5Sgerardnico            }
402*007225e5Sgerardnico        }
403*007225e5Sgerardnico        $ruleErrorCount = sizeof($brokenRules);
404*007225e5Sgerardnico        if ($ruleErrorCount > 0) {
405*007225e5Sgerardnico            $qualityResult = $ruleErrorCount . " quality rules errors";
406*007225e5Sgerardnico        } else {
407*007225e5Sgerardnico            $qualityResult = "All quality rules passed";
408*007225e5Sgerardnico        }
409*007225e5Sgerardnico
410*007225e5Sgerardnico        /**
411*007225e5Sgerardnico         * Low level
412*007225e5Sgerardnico         */
413*007225e5Sgerardnico        $mandatoryRules = preg_split("/,/", $this->getConf(self::CONF_MANDATORY_QUALITY_RULES));
414*007225e5Sgerardnico        $mandatoryRulesBroken = [];
415*007225e5Sgerardnico        foreach ($mandatoryRules as $lowLevelRule) {
416*007225e5Sgerardnico            if (in_array($lowLevelRule, $brokenRules)) {
417*007225e5Sgerardnico                $mandatoryRulesBroken[] = $lowLevelRule;
418*007225e5Sgerardnico            }
419*007225e5Sgerardnico        }
420*007225e5Sgerardnico        $lowLevel = false;
421*007225e5Sgerardnico        if (sizeof($mandatoryRulesBroken) > 0) {
422*007225e5Sgerardnico            $lowLevel = true;
423*007225e5Sgerardnico        }
424*007225e5Sgerardnico        LowQualityPage::setLowQualityPage($ID, $lowLevel);
425*007225e5Sgerardnico
426*007225e5Sgerardnico        /**
427*007225e5Sgerardnico         * Building the quality object in order
428*007225e5Sgerardnico         */
429*007225e5Sgerardnico        $quality["low"] = $lowLevel;
430*007225e5Sgerardnico        if (sizeof($mandatoryRulesBroken) > 0) {
431*007225e5Sgerardnico            ksort($mandatoryRulesBroken);
432*007225e5Sgerardnico            $quality['failed_mandatory_rules'] = $mandatoryRulesBroken;
433*007225e5Sgerardnico        }
434*007225e5Sgerardnico        $quality["scoring"] = $qualityScoring;
435*007225e5Sgerardnico        $quality["rules"][self::RESULT] = $qualityResult;
436*007225e5Sgerardnico        if (!empty($ruleInfo)) {
437*007225e5Sgerardnico            $quality["rules"]["info"] = $ruleInfo;
438*007225e5Sgerardnico        }
439*007225e5Sgerardnico
440*007225e5Sgerardnico        ksort($ruleResults);
441*007225e5Sgerardnico        $quality["rules"]['details'] = $ruleResults;
442*007225e5Sgerardnico
443*007225e5Sgerardnico        /**
444*007225e5Sgerardnico         * Metadata
445*007225e5Sgerardnico         */
446*007225e5Sgerardnico        $this->metadata[Analytics::TITLE] = $meta['title'];
447*007225e5Sgerardnico        $timestampCreation = $meta['date']['created'];
448*007225e5Sgerardnico        $this->metadata[self::DATE_CREATED] = date('Y-m-d h:i:s', $timestampCreation);
449*007225e5Sgerardnico        $timestampModification = $meta['date']['modified'];
450*007225e5Sgerardnico        $this->metadata[Analytics::DATE_MODIFIED] = date('Y-m-d h:i:s', $timestampModification);
451*007225e5Sgerardnico        $this->metadata['age_creation'] = round((time() - $timestampCreation) / 60 / 60 / 24);
452*007225e5Sgerardnico        $this->metadata['age_modification'] = round((time() - $timestampModification) / 60 / 60 / 24);
453*007225e5Sgerardnico
454*007225e5Sgerardnico
455*007225e5Sgerardnico        // get author info
456*007225e5Sgerardnico        $changelog = new PageChangeLog($ID);
457*007225e5Sgerardnico        $revs = $changelog->getRevisions(0, 10000);
458*007225e5Sgerardnico        array_push($revs, $meta['last_change']['date']);
459*007225e5Sgerardnico        $this->stats[Analytics::EDITS_COUNT] = count($revs);
460*007225e5Sgerardnico        foreach ($revs as $rev) {
461*007225e5Sgerardnico            $info = $changelog->getRevisionInfo($rev);
462*007225e5Sgerardnico            if ($info['user']) {
463*007225e5Sgerardnico                $this->stats['authors'][$info['user']] += 1;
464*007225e5Sgerardnico            } else {
465*007225e5Sgerardnico                $this->stats['authors']['*'] += 1;
466*007225e5Sgerardnico            }
467*007225e5Sgerardnico        }
468*007225e5Sgerardnico
469*007225e5Sgerardnico        /**
470*007225e5Sgerardnico         * Building the Top JSON in order
471*007225e5Sgerardnico         */
472*007225e5Sgerardnico        global $ID;
473*007225e5Sgerardnico        $json = array();
474*007225e5Sgerardnico        $json["id"] = $ID;
475*007225e5Sgerardnico        $json['metadata'] = $this->metadata;
476*007225e5Sgerardnico        ksort($statExport);
477*007225e5Sgerardnico        $json[Analytics::STATISTICS] = $statExport;
478*007225e5Sgerardnico        $json[Analytics::QUALITY] = $quality; // Quality after the sort to get them at the end
479*007225e5Sgerardnico
480*007225e5Sgerardnico
481*007225e5Sgerardnico        /**
482*007225e5Sgerardnico         * The result can be seen with
483*007225e5Sgerardnico         * doku.php?id=somepage&do=export_combo_analysis
484*007225e5Sgerardnico         */
485*007225e5Sgerardnico        /**
486*007225e5Sgerardnico         * Set the header for the export.php file
487*007225e5Sgerardnico         */
488*007225e5Sgerardnico        p_set_metadata($ID, array("format" =>
489*007225e5Sgerardnico            array("combo_" . $this->getPluginComponent() => array("Content-Type" => 'application/json'))
490*007225e5Sgerardnico        ));
491*007225e5Sgerardnico        $json_encoded = json_encode($json, JSON_PRETTY_PRINT);
492*007225e5Sgerardnico
493*007225e5Sgerardnico        $sqlite = Sqlite::getSqlite();
494*007225e5Sgerardnico        if ($sqlite != null) {
495*007225e5Sgerardnico            /**
496*007225e5Sgerardnico             * Sqlite Plugin installed
497*007225e5Sgerardnico             */
498*007225e5Sgerardnico            $canonical = $this->metadata[UrlCanonical::CANONICAL_PROPERTY];
499*007225e5Sgerardnico            if (empty($canonical)) {
500*007225e5Sgerardnico                $canonical = $ID; // not null constraint unfortunately
501*007225e5Sgerardnico            }
502*007225e5Sgerardnico            $entry = array(
503*007225e5Sgerardnico                'CANONICAL' => $canonical,
504*007225e5Sgerardnico                'ANALYTICS' => $json_encoded,
505*007225e5Sgerardnico                'ID' => $ID
506*007225e5Sgerardnico            );
507*007225e5Sgerardnico            $res = $sqlite->query("SELECT count(*) FROM PAGES where ID = ?", $ID);
508*007225e5Sgerardnico            if ($sqlite->res2single($res) == 1) {
509*007225e5Sgerardnico                // Upset not supported on all version
510*007225e5Sgerardnico                //$upsert = 'insert into PAGES (ID,CANONICAL,ANALYTICS) values (?,?,?) on conflict (ID,CANONICAL) do update set ANALYTICS = EXCLUDED.ANALYTICS';
511*007225e5Sgerardnico                $update = 'update PAGES SET CANONICAL = ?, ANALYTICS = ? where ID=?';
512*007225e5Sgerardnico                $res = $sqlite->query($update, $entry);
513*007225e5Sgerardnico            } else {
514*007225e5Sgerardnico                $res = $sqlite->storeEntry('PAGES', $entry);
515*007225e5Sgerardnico            }
516*007225e5Sgerardnico            if (!$res) {
517*007225e5Sgerardnico                LogUtility::msg("There was a problem during the upsert: {$sqlite->getAdapter()->getDb()->errorInfo()}");
518*007225e5Sgerardnico            }
519*007225e5Sgerardnico            $sqlite->res_close($res);
520*007225e5Sgerardnico        }
521*007225e5Sgerardnico        $this->doc .= $json_encoded;
522*007225e5Sgerardnico
523*007225e5Sgerardnico    }
524*007225e5Sgerardnico
525*007225e5Sgerardnico    /**
526*007225e5Sgerardnico     */
527*007225e5Sgerardnico    public function getFormat()
528*007225e5Sgerardnico    {
529*007225e5Sgerardnico        return Analytics::RENDERER_FORMAT;
530*007225e5Sgerardnico    }
531*007225e5Sgerardnico
532*007225e5Sgerardnico    public function internallink($id, $name = null, $search = null, $returnonly = false, $linktype = 'content')
533*007225e5Sgerardnico    {
534*007225e5Sgerardnico
535*007225e5Sgerardnico        LinkUtility::processInternalLinkStats($id, $this->stats);
536*007225e5Sgerardnico
537*007225e5Sgerardnico    }
538*007225e5Sgerardnico
539*007225e5Sgerardnico    public function externallink($url, $name = null)
540*007225e5Sgerardnico    {
541*007225e5Sgerardnico        $this->stats[Analytics::EXTERNAL_LINKS_COUNT]++;
542*007225e5Sgerardnico    }
543*007225e5Sgerardnico
544*007225e5Sgerardnico    public function header($text, $level, $pos)
545*007225e5Sgerardnico    {
546*007225e5Sgerardnico        $this->stats[Analytics::HEADERS_COUNT]['h' . $level]++;
547*007225e5Sgerardnico        $this->headerId++;
548*007225e5Sgerardnico        $this->stats[Analytics::HEADER_POSITION][$this->headerId] = 'h' . $level;
549*007225e5Sgerardnico
550*007225e5Sgerardnico    }
551*007225e5Sgerardnico
552*007225e5Sgerardnico    public function smiley($smiley)
553*007225e5Sgerardnico    {
554*007225e5Sgerardnico        if ($smiley == 'FIXME') $this->stats[self::FIXME]++;
555*007225e5Sgerardnico    }
556*007225e5Sgerardnico
557*007225e5Sgerardnico    public function linebreak()
558*007225e5Sgerardnico    {
559*007225e5Sgerardnico        if (!$this->tableopen) {
560*007225e5Sgerardnico            $this->stats['linebreak']++;
561*007225e5Sgerardnico        }
562*007225e5Sgerardnico    }
563*007225e5Sgerardnico
564*007225e5Sgerardnico    public function table_open($maxcols = null, $numrows = null, $pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
565*007225e5Sgerardnico    {
566*007225e5Sgerardnico        $this->tableopen = true;
567*007225e5Sgerardnico    }
568*007225e5Sgerardnico
569*007225e5Sgerardnico    public function table_close($pos = null) // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
570*007225e5Sgerardnico    {
571*007225e5Sgerardnico        $this->tableopen = false;
572*007225e5Sgerardnico    }
573*007225e5Sgerardnico
574*007225e5Sgerardnico    public function hr()
575*007225e5Sgerardnico    {
576*007225e5Sgerardnico        $this->stats['hr']++;
577*007225e5Sgerardnico    }
578*007225e5Sgerardnico
579*007225e5Sgerardnico    public function quote_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
580*007225e5Sgerardnico    {
581*007225e5Sgerardnico        $this->stats['quote_count']++;
582*007225e5Sgerardnico        $this->quotelevel++;
583*007225e5Sgerardnico        $this->stats['quote_nest'] = max($this->quotelevel, $this->stats['quote_nest']);
584*007225e5Sgerardnico    }
585*007225e5Sgerardnico
586*007225e5Sgerardnico    public function quote_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
587*007225e5Sgerardnico    {
588*007225e5Sgerardnico        $this->quotelevel--;
589*007225e5Sgerardnico    }
590*007225e5Sgerardnico
591*007225e5Sgerardnico    public function strong_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
592*007225e5Sgerardnico    {
593*007225e5Sgerardnico        $this->formattingBracket++;
594*007225e5Sgerardnico    }
595*007225e5Sgerardnico
596*007225e5Sgerardnico    public function strong_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
597*007225e5Sgerardnico    {
598*007225e5Sgerardnico        $this->formattingBracket--;
599*007225e5Sgerardnico    }
600*007225e5Sgerardnico
601*007225e5Sgerardnico    public function emphasis_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
602*007225e5Sgerardnico    {
603*007225e5Sgerardnico        $this->formattingBracket++;
604*007225e5Sgerardnico    }
605*007225e5Sgerardnico
606*007225e5Sgerardnico    public function emphasis_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
607*007225e5Sgerardnico    {
608*007225e5Sgerardnico        $this->formattingBracket--;
609*007225e5Sgerardnico    }
610*007225e5Sgerardnico
611*007225e5Sgerardnico    public function underline_open() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
612*007225e5Sgerardnico    {
613*007225e5Sgerardnico        $this->formattingBracket++;
614*007225e5Sgerardnico    }
615*007225e5Sgerardnico
616*007225e5Sgerardnico    public function underline_close() // phpcs:ignore PSR1.Methods.CamelCapsMethodName.NotCamelCaps
617*007225e5Sgerardnico    {
618*007225e5Sgerardnico        $this->formattingBracket--;
619*007225e5Sgerardnico    }
620*007225e5Sgerardnico
621*007225e5Sgerardnico    public function cdata($text)
622*007225e5Sgerardnico    {
623*007225e5Sgerardnico
624*007225e5Sgerardnico        /**
625*007225e5Sgerardnico         * It seems that you receive cdata
626*007225e5Sgerardnico         * when emphasis_open / underline_open / strong_open
627*007225e5Sgerardnico         * Stats are not for them
628*007225e5Sgerardnico         */
629*007225e5Sgerardnico        if (!$this->formattingBracket) return;
630*007225e5Sgerardnico
631*007225e5Sgerardnico        $this->plainTextId++;
632*007225e5Sgerardnico
633*007225e5Sgerardnico        /**
634*007225e5Sgerardnico         * Length
635*007225e5Sgerardnico         */
636*007225e5Sgerardnico        $len = strlen($text);
637*007225e5Sgerardnico        $this->stats[self::PLAINTEXT][$this->plainTextId]['len'] = $len;
638*007225e5Sgerardnico
639*007225e5Sgerardnico
640*007225e5Sgerardnico        /**
641*007225e5Sgerardnico         * Multi-formatting
642*007225e5Sgerardnico         */
643*007225e5Sgerardnico        if ($this->formattingBracket > 1) {
644*007225e5Sgerardnico            $numberOfFormats = 1 * ($this->formattingBracket - 1);
645*007225e5Sgerardnico            $this->stats[self::PLAINTEXT][$this->plainTextId]['multiformat'] += $numberOfFormats;
646*007225e5Sgerardnico        }
647*007225e5Sgerardnico
648*007225e5Sgerardnico        /**
649*007225e5Sgerardnico         * Total
650*007225e5Sgerardnico         */
651*007225e5Sgerardnico        $this->stats[self::PLAINTEXT][0] += $len;
652*007225e5Sgerardnico    }
653*007225e5Sgerardnico
654*007225e5Sgerardnico    public function internalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null)
655*007225e5Sgerardnico    {
656*007225e5Sgerardnico        $this->stats[Analytics::INTERNAL_MEDIAS_COUNT]++;
657*007225e5Sgerardnico    }
658*007225e5Sgerardnico
659*007225e5Sgerardnico    public function externalmedia($src, $title = null, $align = null, $width = null, $height = null, $cache = null, $linking = null)
660*007225e5Sgerardnico    {
661*007225e5Sgerardnico        $this->stats[Analytics::EXTERNAL_MEDIAS]++;
662*007225e5Sgerardnico    }
663*007225e5Sgerardnico
664*007225e5Sgerardnico    public function reset()
665*007225e5Sgerardnico    {
666*007225e5Sgerardnico        $this->stats = array();
667*007225e5Sgerardnico        $this->metadata = array();
668*007225e5Sgerardnico        $this->headerId = 0;
669*007225e5Sgerardnico    }
670*007225e5Sgerardnico
671*007225e5Sgerardnico    public function setMeta($key, $value)
672*007225e5Sgerardnico    {
673*007225e5Sgerardnico        $this->metadata[$key] = $value;
674*007225e5Sgerardnico    }
675*007225e5Sgerardnico
676*007225e5Sgerardnico
677*007225e5Sgerardnico}
678*007225e5Sgerardnico
679