1<?php 2 3namespace dokuwiki\plugin\wordimport\docx; 4 5/** 6 * The main document 7 * 8 * This class is responsible for parsing the main document.xml file of the Word document. 9 * 10 * It handles all the different paragraph types and creates the final text output 11 */ 12class Document extends AbstractXMLFile 13{ 14 /** @var string The final DokuWiki syntax for the document */ 15 protected $text = ''; 16 17 /** @inheritdoc */ 18 protected function parse() 19 { 20 $xml = $this->docx->loadXMLFile('/word/document.xml'); 21 $this->registerNamespaces($xml); 22 23 $last = null; 24 foreach ($xml->xpath('//w:body')[0]->children('w', true) as $p) { 25 $obj = $this->createParagraph($p); 26 if (!$obj instanceof AbstractParagraph) continue; 27 $obj->parse(); 28 29 if ( 30 $obj->mergeToPrevious() && 31 $last && 32 get_class($obj) === get_class($last) 33 ) { 34 $this->text .= "\n"; 35 } elseif ($last) { 36 $this->text .= "\n\n"; 37 } 38 39 $this->text .= $obj; // toString 40 $last = $obj; 41 } 42 43 $this->text .= "\n"; // add a final newline 44 } 45 46 /** 47 * This factory method creates the correct paragraph object for the given XML element 48 * 49 * @param \SimpleXMLElement $p 50 * @return AbstractParagraph|null 51 */ 52 public function createParagraph(\SimpleXMLElement $p): ?AbstractParagraph 53 { 54 $this->registerNamespaces($p); // FIXME is this still needed? 55 56 // tables 57 if ($p->getName() == 'tbl') { 58 return new Table($this->docx, $p); 59 } 60 61 // code blocks 62 if ($match = $p->xpath('w:pPr/w:rPr/w:rFonts')) { 63 if (in_array($match[0]->attributes('w', true)->ascii, $this->docx->getConf('codefonts'))) { 64 return new CodeBlock($this->docx, $p); 65 } 66 } 67 68 // headings 69 if ($this->docx->getStyles()->hasStyle($p, ['heading 1', 'heading 2', 'heading 3', 'heading 4', 'heading 5'])) { 70 return new Heading($this->docx, $p); 71 } 72 73 // lists 74 if ($this->docx->getStyles()->hasStyle($p, ['list paragraph'])) { 75 return new ListItem($this->docx, $p); 76 } 77 78 // images 79 if ($p->xpath('w:r/w:drawing/wp:inline//a:blip')) { 80 return new Image($this->docx, $p); 81 } 82 83 // text paragraphs 84 if ($p->xpath('w:r/w:t')) { 85 return new Paragraph($this->docx, $p); 86 } 87 return null; 88 } 89 90 /** @inheritdoc */ 91 public function __toString() 92 { 93 return $this->text; 94 } 95} 96