1/*  DokuWiki MoaiEditor Match_paragraphs.js file
2    Author  : MoaiTools <info@moaitools.org>
3    License : GPL 3 (http://www.gnu.org/licenses/gpl.html) */
4
5MoaiEditor.MatchParagraphs = class {
6
7    constructor(outer) {
8        this.outer = outer;
9        this.tools = outer.tools;
10    }
11
12    match () {
13
14        // Gather <p> nodes in this section
15        var nodes = [];
16        for (let node of this.outer.section.nodes)
17            if (node.type == 'P')
18                nodes.push(node);
19
20        // Return if no <p> nodes were found
21        if (nodes.length == 0)
22            return;
23
24        // Filter out all blocks of text that aren't very likely paragraphs
25        var blocks = [];
26        for (let block of this.outer.blocks)
27            if (this.isIsolatedParagraph(block))
28                blocks.push(block);
29
30
31        // Return if no isolated blocks of text were found
32        if (blocks.length == 0)
33            return;
34
35        // Iterate the <p> nodes
36        for (let node of nodes) {
37            // Iterate blocks of text
38            var scores = [];
39            for (let block of blocks) {
40
41                // Skip if block was matched already or it is not a paragraph
42                if (block?.matched) {
43                    continue;
44                }
45
46                // Get scores
47                var score = {n:0, m:0};
48                this.tools.media.same(score, block, node);
49                this.tools.sameWords(score, block.cleantext, node.handle.textContent, 'whitespace');
50
51                if (score.n == 0  ||  score.m == 0)
52                    continue;
53                scores.push({ node:node, block:block, score:score.m/score.n});
54            }
55
56            // Sort by score
57            scores.sort((a, b) => (a.score < b.score) ? 1 : -1);
58
59
60            var s = []; for (let score of scores) s.push(score.score);
61
62            // Continue if there are no candidates or both have about the same score
63            if (scores.length == 0)
64                continue;
65
66            // Continue if the top score is too bad
67            score = scores[0];
68            if (score.score < 0.4)
69                continue;
70
71            // Continue if the top two candidates have about the same score
72            if (scores.length >= 2) {
73                const s0 = scores[0].score;
74                const s1 = scores[1].score;
75                const d = s0-s1;
76                if (d < 0.1)
77                    continue;
78            }
79            // Prevent the block from being used again
80            score.block.matched = true;
81
82            // Store the match
83            const type = score.node.type;
84            const handle = score.node.handle;
85            const startline = this.outer.startline + score.block.start;
86            const endline = this.outer.startline + score.block.end;
87            this.outer.outer.matches.add (type, handle, startline, endline, 'paragraph');
88        }
89    }
90
91    isIsolatedParagraph (block) {
92
93        // No line should start with these tags
94        const tags = ['  ', '|', '^', '>'];
95        for (let line of block.text.split("\n"))
96            for (let tag of tags)
97                if (line.startsWith(tag))
98                    return false;
99
100        // It might be a broken paragraph if it has opening or closing block tags
101        if (this.tools.hasBlockTags(block.text))
102            return false;
103
104        // Make text matching more accurate and avoid urls like http://images.com/dog.gif from being interpreted as the '//' tag
105        this.tools.media.gatherFromText(block);   // Remove media tags from text
106        this.tools.replaceLinks(block);           // Replace links with what 'node.textContent' would show
107
108        // It's very likely the paragraph is broken if it has imbalanced inline tags
109        if (this.tools.hasImbaInlineTags(block.nolinktext))
110            return false;
111
112        // It is likely a paragraph (asuming it is surrounded by empty lines above and below)
113        return true;
114    }
115
116}; // End Class
117