1<?php
2
3namespace dokuwiki\plugin\wordimport\docx;
4
5/**
6 * The main document
7 *
8 * This class is responsible for parsing the main document.xml file of the Word document.
9 *
10 * It handles all the different paragraph types and creates the final text output
11 */
12class Document extends AbstractXMLFile
13{
14    /** @var string The final DokuWiki syntax for the document */
15    protected $text = '';
16
17    /** @inheritdoc */
18    protected function parse()
19    {
20        $xml = $this->docx->loadXMLFile('/word/document.xml');
21        $this->registerNamespaces($xml);
22
23        $last = null;
24        foreach ($xml->xpath('//w:body')[0]->children('w', true) as $p) {
25            $obj = $this->createParagraph($p);
26            if (!$obj instanceof AbstractParagraph) continue;
27            $obj->parse();
28
29            if (
30                $obj->mergeToPrevious() &&
31                $last &&
32                get_class($obj) === get_class($last)
33            ) {
34                $this->text .= "\n";
35            } elseif ($last) {
36                $this->text .= "\n\n";
37            }
38
39            $this->text .= $obj; // toString
40            $last = $obj;
41        }
42
43        $this->text .= "\n"; // add a final newline
44    }
45
46    /**
47     * This factory method creates the correct paragraph object for the given XML element
48     *
49     * @param \SimpleXMLElement $p
50     * @return AbstractParagraph|null
51     */
52    public function createParagraph(\SimpleXMLElement $p): ?AbstractParagraph
53    {
54        $this->registerNamespaces($p); // FIXME is this still needed?
55
56        // tables
57        if ($p->getName() == 'tbl') {
58            return new Table($this->docx, $p);
59        }
60
61        // code blocks
62        if ($match = $p->xpath('w:pPr/w:rPr/w:rFonts')) {
63            if (in_array($match[0]->attributes('w', true)->ascii, $this->docx->getConf('codefonts'))) {
64                return new CodeBlock($this->docx, $p);
65            }
66        }
67
68        // headings
69        if ($this->docx->getStyles()->hasStyle($p, ['heading 1', 'heading 2', 'heading 3', 'heading 4', 'heading 5'])) {
70            return new Heading($this->docx, $p);
71        }
72
73        // lists
74        if ($this->docx->getStyles()->hasStyle($p, ['list paragraph'])) {
75            return new ListItem($this->docx, $p);
76        }
77
78        // images
79        if ($p->xpath('w:r/w:drawing/wp:inline//a:blip')) {
80            return new Image($this->docx, $p);
81        }
82
83        // text paragraphs
84        if ($p->xpath('w:r/w:t')) {
85            return new Paragraph($this->docx, $p);
86        }
87        return null;
88    }
89
90    /** @inheritdoc */
91    public function __toString()
92    {
93        return $this->text;
94    }
95}
96