1<?php
2
3namespace dokuwiki\plugin\wordimport\docx;
4
5/**
6 * A text run
7 *
8 * This is a part of a paragraph with a specific formatting
9 */
10class TextRun // this is not a paragraph!
11{
12    /**
13     * @var int[] The formatting of this run and the length of the formatting chain. A value of 0 means
14     *             the formatting is not present, a value of 1 means it is present only in this run. A value
15     *             of 2 or higher means it is present in this run and the n-1 following runs.
16     */
17    protected $formatting = [
18        'bold' => 0,
19        'italic' => 0,
20        'underline' => 0,
21        'strike' => 0,
22        'mono' => 0,
23    ];
24
25    protected $text = '';
26    protected $docx;
27
28    /**
29     * @param \SimpleXMLElement $tr
30     * @param string $newline The code for newlines
31     */
32    public function __construct(Docx $docx, \SimpleXMLElement $tr, $newline = '\\\\ ')
33    {
34        $this->docx = $docx;
35
36        $br = $tr->xpath('w:br');
37        if (!empty($br)) {
38            $this->text = $newline;
39            return;
40        }
41
42        $this->parseFormatting($tr);
43        $this->text = (string)($tr->xpath('w:t')[0] ?? '');
44    }
45
46    /**
47     * @return string
48     */
49    public function __toString(): string
50    {
51        return $this->text;
52    }
53
54    /**
55     * A list of set formattings on this run
56     *
57     * @return int[]
58     */
59    public function getFormatting()
60    {
61        return $this->formatting;
62    }
63
64    /**
65     * Check if this run is only whitespace
66     *
67     * @return bool
68     */
69    public function isWhiteSpace()
70    {
71        return ctype_space($this->text);
72    }
73
74    /**
75     * Parse the formatting of this run
76     *
77     * @see http://www.datypic.com/sc/ooxml/e-w_rPr-4.html
78     * @param \SimpleXMLElement $textRun
79     */
80    public function parseFormatting(\SimpleXMLElement $textRun)
81    {
82        $result = $textRun->xpath('w:rPr');
83        if (empty($result)) return;
84
85        foreach ($result[0]->children('w', true) as $child) {
86            switch ($child->getName()) {
87                case 'b':
88                case 'bCs':
89                    $this->formatting['bold'] = 1;
90                    break;
91                case 'i':
92                case 'iCs':
93                case 'em':
94                    $this->formatting['italic'] = 1;
95                    break;
96                case 'u':
97                    $this->formatting['underline'] = 1;
98                    break;
99                case 'strike':
100                case 'dstrike':
101                    $this->formatting['strike'] = 1;
102                    break;
103                case 'rFonts':
104                    if (in_array($child->attributes('w', true)->ascii, $this->docx->getConf('codefonts'))) {
105                        $this->formatting['mono'] = 1;
106                    }
107                    break;
108            }
109        }
110    }
111
112    /**
113     * Use the formatting of the following run to update the scores of this one
114     *
115     * This is used to find the longest chains of formatting
116     *
117     * @param TextRun $nextRun
118     * @return void
119     */
120    public function updateFormattingScores(TextRun $nextRun)
121    {
122        $next = $nextRun->getFormatting();
123        foreach ($next as $key => $value) {
124            if ($this->formatting[$key] === 0) continue;
125            $this->formatting[$key] += $value;
126        }
127
128        // sort by value, longest chains first
129        arsort($this->formatting);
130    }
131}
132