3 * Utility functions.
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     LarsDW223
7 */
9/** Include csscolors */
10require_once DOKU_PLUGIN . 'odt/ODT/css/csscolors.php';
11/** Include cssborder */
12require_once DOKU_PLUGIN . 'odt/ODT/css/cssborder.php';
15 * ODTUtility:
16 * Class containing some internal utility functions.
17 *
18 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
19 * @author     LarsDW223
20 * @package    ODT\Utility
21 */
22class ODTUtility
24    /**
25     * Replace local links with bookmark references or text
26     *
27     * @param    string $content          The document content
28     * @param    array  $toc              The table of contents
29     * @param    array  $bookmarks        List of bookmarks
30     * @param    string $styleName        Link style name
31     * @param    string $visitedStyleName Visited link style name
32     */
33    public static function replaceLocalLinkPlaceholders(&$content, array $toc, array $bookmarks, $styleName, $visitedStyleName) {
34        $matches = array();
35        $position = 0;
36        $max = strlen ($content);
37        $length = strlen ('<locallink>');
38        $lengthWithName = strlen ('<locallink name=');
39        while ( $position < $max ) {
40            $first = strpos ($content, '<locallink', $position);
41            if ( $first === false ) {
42                break;
43            }
44            $endFirst = strpos ($content, '>', $first);
45            if ( $endFirst === false ) {
46                break;
47            }
48            $second = strpos ($content, '</locallink>', $endFirst);
49            if ( $second === false ) {
50                break;
51            }
53            // $match includes the whole tag '<locallink name="...">text</locallink>'
54            // The attribute 'name' is optional!
55            $match = substr ($content, $first, $second - $first + $length + 1);
56            $text = substr ($match, $endFirst-$first+1, -($length + 1));
57            $text = trim ($text, ' ');
58            $text = strtolower ($text);
59            $page = str_replace (' ', '_', $text);
60            $opentag = substr ($match, 0, $endFirst-$first);
61            $name = substr ($opentag, $lengthWithName);
62            $name = trim ($name, '">');
64            $linkStyle  = 'text:style-name="'.$styleName.'"';
65            $linkStyle .= ' text:visited-style-name="'.$visitedStyleName.'"';
67            $found = false;
68            foreach ($toc as $item) {
69                $params = explode (',', $item);
71                if ( $page == $params [1] ) {
72                    $found = true;
73                    $link  = '<text:a xlink:type="simple" xlink:href="#'.$params [0].'" '.$linkStyle.'>';
74                    if ( !empty($name) ) {
75                        $link .= $name;
76                    } else {
77                        $link .= $text;
78                    }
79                    $link .= '</text:a>';
81                    $content = str_replace ($match, $link, $content);
82                    $position = $first + strlen ($link);
83                }
84            }
86            if ( $found == false ) {
87                // Nothing found yet, check the bookmarks too.
88                foreach ($bookmarks as $item) {
89                    if ( $page == $item ) {
90                        $found = true;
91                        $link  = '<text:a xlink:type="simple" xlink:href="#'.$item.'" '.$linkStyle.'>';
92                        if ( !empty($name) ) {
93                            $link .= $name;
94                        } else {
95                            $link .= $text;
96                        }
97                        $link .= '</text:a>';
99                        $content = str_replace ($match, $link, $content);
100                        $position = $first + strlen ($link);
101                    }
102                }
103            }
105            if ( $found == false ) {
106                // If we get here, then the referenced target was not found.
107                // There must be a bug manging the bookmarks or links!
108                // At least remove the locallink element and insert text.
109                if ( !empty($name) ) {
110                    $content = str_replace ($match, $name, $content);
111                } else {
112                    $content = str_replace ($match, $text, $content);
113                }
114                $position = $first + strlen ($text);
115            }
116        }
117    }
119    /**
120     * This function deletes the useless elements. Right now, these are empty paragraphs
121     * or paragraphs that only include whitespace.
122     *
123     * IMPORTANT:
124     * Paragraphs can be used for pagebreaks/changing page format.
125     * Such paragraphs may not be deleted!
126     *
127     * @param    string $docContent              The document content
128     * @param    array  $preventDeletetionStyles Array of style names which may not be deleted
129     */
130    public static function deleteUselessElements(&$docContent, array $preventDeletetionStyles) {
131        $length_open = strlen ('<text:p');
132        $length_close = strlen ('</text:p>');
133        $max = strlen ($docContent);
134        $pos = 0;
136        while ($pos < $max) {
137            $start_open = strpos ($docContent, '<text:p', $pos);
138            if ( $start_open === false ) {
139                break;
140            }
141            $start_close = strpos ($docContent, '>', $start_open + $length_open);
142            if ( $start_close === false ) {
143                break;
144            }
145            $end = strpos ($docContent, '</text:p>', $start_close + 1);
146            if ( $end === false ) {
147                break;
148            }
150            $deleted = false;
151            $length = $end - $start_open + $length_close;
152            $content = substr ($docContent, $start_close + 1, $end - ($start_close + 1));
154            if ( empty($content) || ctype_space ($content) ) {
155                // Paragraph is empty or consists of whitespace only. Check style name.
156                $style_start = strpos ($docContent, '"', $start_open);
157                if ( $style_start === false ) {
158                    // No '"' found??? Ignore this paragraph.
159                    break;
160                }
161                $style_end = strpos ($docContent, '"', $style_start+1);
162                if ( $style_end === false ) {
163                    // No '"' found??? Ignore this paragraph.
164                    break;
165                }
166                $style_name = substr ($docContent, $style_start+1, $style_end - ($style_start+1));
168                // Only delete empty paragraph if not listed in 'Do not delete' array!
169                if ( !in_array($style_name, $preventDeletetionStyles) )
170                {
171                    $docContent = substr_replace($docContent, '', $start_open, $length);
173                    $deleted = true;
174                    $max -= $length;
175                    $pos = $start_open;
176                }
177            }
179            if ( $deleted == false ) {
180                $pos = $start_close;
181            }
182        }
183    }
185    /**
186     * The function tries to examine the width and height
187     * of the image stored in file $src.
188     *
189     * @param  string $src The file name of image
190     * @param  int    $maxwidth The maximum width the image shall have
191     * @param  int    $maxheight The maximum height the image shall have
192     * @return array  Width and height of the image in centimeters or
193     *                both 0 if file doesn't exist.
194     *                Just the integer value, no units included.
195     */
196    public static function getImageSize($src, $maxwidth=NULL, $maxheight=NULL){
197        if (file_exists($src)) {
198            $info  = getimagesize($src);
199            if(!$width){
200                $width  = $info[0];
201                $height = $info[1];
202            }else{
203                $height = round(($width * $info[1]) / $info[0]);
204            }
206            if ($maxwidth && $width > $maxwidth) {
207                $height = $height * ($maxwidth/$width);
208                $width = $maxwidth;
209            }
210            if ($maxheight && $height > $maxheight) {
211                $width = $width * ($maxheight/$height);
212                $height = $maxheight;
213            }
215            // Convert from pixel to centimeters
216            if ($width) $width = (($width/96.0)*2.54);
217            if ($height) $height = (($height/96.0)*2.54);
219            return array($width, $height);
220        }
222        return array(0, 0);
223    }
225    /**
226     * Return the size of an image in centimeters.
227     *
228     * @param  string       $src         Filepath of the image
229     * @param  string|null  $width       Alternative width
230     * @param  string|null  $height      Alternative height
231     * @param  boolean|true $preferImage Prefer original image size
232     * @param  ODTUnits     $units       $ODTUnits object for unit conversion
233     * @return array
234     */
235    public static function getImageSizeString($src, $width = NULL, $height = NULL, $preferImage=true, ODTUnits $units){
236        list($width_file, $height_file) = self::getImageSize($src);
238        // Get original ratio if possible
239        $ratio = 1;
240        if ($width_file != 0 && $height_file != 0) {
241            $ratio = $height_file/$width_file;
242        }
244        if ($width_file != 0 && $preferImage) {
245            $width  = $width_file.'cm';
246            $height = $height_file.'cm';
247        } else {
248            // convert from pixel to centimeters only if no unit is
249            // specified or if unit is 'px'
250            $unit_width = $units->stripDigits ($width);
251            $unit_height = $units->stripDigits ($height);
252            if ((empty($unit_width) && empty($unit_height)) ||
253                ($unit_width == 'px' && $unit_height == 'px')) {
254                if (!$height) {
255                    $height = $width * $ratio;
256                }
257                $height = (($height/96.0)*2.54).'cm';
258                if ($width) $width = (($width/96.0)*2.54).'cm';
259            }
260        }
262        // At this point $width and $height should include a unit
264        $width = str_replace(',', '.', $width);
265        $height = str_replace(',', '.', $height);
266        if ($width && $height) {
267            // Don't be wider than the page
268            if ($width >= 17){ // FIXME : this assumes A4 page format with 2cm margins
269                $width = $width.'"  style:rel-width="100%';
270                $height = $height.'"  style:rel-height="scale';
271            } else {
272                $width = $width;
273                $height = $height;
274            }
275        } else {
276            // external image and unable to download, fallback
277            if (!$width) {
278                $width = '" svg:rel-width="100%';
279            }
280            if (!$height) {
281                $height = '" svg:rel-height="100%';
282            }
283        }
284        return array($width, $height);
285    }
287    /**
288     * Split $value by whitespace and convert any relative values (%)
289     * into an absolute value. This is done by taking the percentage of
290     * $maxWidthInPt.
291     *
292     * @param  string       $value        String (Property value)
293     * @param  integer      $maxWidthInPt Maximum width in points
294     * @param  ODTUnits     $units        $ODTUnits object for unit conversion
295     * @return string
296     */
297    protected static function adjustPercentageValueParts ($value, $maxWidthInPt, $units) {
298        $values = preg_split ('/\s+/', $value);
299        $value = '';
300        foreach ($values as $part) {
301            $length = strlen ($part);
303            if ( $length > 1 && $part [$length-1] == '%' ) {
304                $percentageValue = $units->getDigits($part);
305                $part = (($percentageValue * $maxWidthInPt)/100) . 'pt';
306                //$part = '5pt ';
307            }
309            $value .= ' '.$part;
310        }
311        $value = trim($value);
312        $value = trim($value, '"');
314        return $value;
315    }
317    /**
318     * The function adjusts the properties values for ODT:
319     * - 'em' units are converted to 'pt' units
320     * - CSS color names are converted to its RGB value
321     * - short color values like #fff are converted to the long format, e.g #ffffff
322     * - some relative values are converted to absoulte depending on other
323     *   values e.g. 'line-height' an 'font-size'
324     *
325     * @author LarsDW223
326     *
327     * @param  array    $properties Array with property value pairs
328     * @param  ODTUnits $units      Units object to use for conversion
329     * @param  integer  $maxWidth   Units object to use for conversion
330     */
331    public static function adjustValuesForODT (&$properties, ODTUnits $units, $maxWidth=NULL) {
332        $adjustToMaxWidth = array('margin', 'margin-left', 'margin-right', 'margin-top', 'margin-bottom');
334        // Convert 'text-decoration'.
335        if (isset($properties ['text-decoration'])) {
336            switch ($properties ['text-decoration']) {
337                case 'line-through':
338                    $properties ['text-line-through-style'] = 'solid';
339                    break;
340                case 'underline':
341                    $properties ['text-underline-style'] = 'solid';
342                    break;
343                case 'overline':
344                    $properties ['text-overline-style'] = 'solid';
345                    break;
346            }
347        }
349        // Normalize border properties
350        cssborder::normalize($properties);
352        // First do simple adjustments per property
353        foreach ($properties as $property => $value) {
354            $properties [$property] = self::adjustValueForODT ($property, $value, $units);
355        }
357        // Adjust relative margins if $maxWidth is given.
358        // $maxWidth is expected to be the width of the surrounding element.
359        if (isset($maxWidth)) {
360            $maxWidthInPt = $units->toPoints($maxWidth, 'y');
361            $maxWidthInPt = $units->getDigits($maxWidthInPt);
363            foreach ($adjustToMaxWidth as $property) {
364                if (!empty($properties [$property])) {
365                    $properties [$property] = self::adjustPercentageValueParts ($properties [$property], $maxWidthInPt, $units);
366                }
367            }
368        }
370        // Now we do the adjustments for which one value depends on another
372        // Do we have font-size or line-height set?
373        if (isset($properties ['font-size']) || isset($properties ['line-height'])) {
374            // First get absolute font-size in points
375            $base_font_size_in_pt = $units->getPixelPerEm ().'px';
376            $base_font_size_in_pt = $units->toPoints($base_font_size_in_pt, 'y');
377            $base_font_size_in_pt = $units->getDigits($base_font_size_in_pt);
378            if (isset($properties ['font-size'])) {
379                $font_size_unit = $units->stripDigits($properties ['font-size']);
380                $font_size_digits = $units->getDigits($properties ['font-size']);
381                if ($font_size_unit == '%') {
382                    $base_font_size_in_pt = ($font_size_digits * $base_font_size_in_pt)/100;
383                    $properties ['font-size'] = $base_font_size_in_pt.'pt';
384                } elseif ($font_size_unit != 'pt') {
385                    $properties ['font-size'] = $units->toPoints($properties ['font-size'], 'y');
386                    $base_font_size_in_pt = $units->getDigits($properties ['font-size']);
387                } else {
388                    $base_font_size_in_pt = $units->getDigits($properties ['font-size']);
389                }
390            }
392            // Convert relative line-heights to absolute
393            if (isset($properties ['line-height'])) {
394                $line_height_unit = $units->stripDigits($properties ['line-height']);
395                $line_height_digits = $units->getDigits($properties ['line-height']);
396                if ($line_height_unit == '%') {
397                    $properties ['line-height'] = (($line_height_digits * $base_font_size_in_pt)/100).'pt';
398                } elseif (empty($line_height_unit)) {
399                    $properties ['line-height'] = ($line_height_digits * $base_font_size_in_pt).'pt';
400                }
401            }
402        }
403    }
405    /**
406     * The function adjusts the property value for ODT:
407     * - 'em' units are converted to 'pt' units
408     * - CSS color names are converted to its RGB value
409     * - short color values like #fff are converted to the long format, e.g #ffffff
410     *
411     * @author LarsDW223
412     *
413     * @param  string   $property   The property name
414     * @param  string   $value      The value
415     * @param  ODTUnits $units      Units object to use for conversion
416     * @return string   Converted value
417     */
418    public static function adjustValueForODT ($property, $value, ODTUnits $units) {
419        if ($property == 'font-family') {
420            // There might be several font/font-families included.
421            // Only take the first one.
422            $value = trim($value, '"');
423            if (strpos($value, ',') !== false) {
424                $values = explode(',', $value);
425                $value = trim ($values [0], '"');
426                $value = trim ($value, "'");
427                $value = trim ($value);
428            }
429        } else {
430            $values = preg_split ('/\s+/', $value);
431            $value = '';
432            foreach ($values as $part) {
433                $length = strlen ($part);
435                // If it is a short color value (#xxx) then convert it to long value (#xxxxxx)
436                // (ODT does not support the short form)
437                if ( $part [0] == '#' && $length == 4 ) {
438                    $part = '#'.$part [1].$part [1].$part [2].$part [2].$part [3].$part [3];
439                } else {
440                    // If it is a CSS color name, get it's real color value
441                    $color = csscolors::getColorValue ($part);
442                    if ( $part == 'black' || $color != '#000000' ) {
443                        $part = $color;
444                    }
445                }
447                if ( $length > 2 && $part [$length-2] == 'e' && $part [$length-1] == 'm' ) {
448                    $part = $units->toPoints($part, 'y');
449                }
451                if ( $length > 2 && ($part [$length-2] != 'p' || $part [$length-1] != 't') &&
452                     strpos($property, 'border')!==false ) {
453                    $part = $units->toPoints($part, 'y');
454                }
456                // Some values can have '"' in it. These need to be converted to '&apos;'
457                // e.g. 'font-family' tp specify that '"Courier New"' is one font name not two
458                $part = str_replace('"', '&apos;', $part);
460                $value .= ' '.$part;
461            }
462            $value = trim($value);
463            $value = trim($value, '"');
464        }
466        return $value;
467    }
469    /**
470     * This function processes the CSS style declarations in $style and saves them in $properties
471     * as key - value pairs, e.g. $properties ['color'] = 'red'. It also adjusts the values
472     * for the ODT format and changes URLs to local paths if required, using $baseURL).
473     *
474     * @author LarsDW223
475     * @param array       $properties
476     * @param string      $style      The CSS style e.g. 'color:red;'
477     * @param string|null $baseURL
478     * @param ODTUnits    $units      Units object to use for conversion
479     * @param integer     $maxWidth   MaximumWidth
480     */
481    public static function getCSSStylePropertiesForODT(&$properties, $style, $baseURL = NULL, ODTUnits $units, $maxWidth=NULL){
482        // Create rule with selector '*' (doesn't matter) and declarations as set in $style
483        $rule = new css_rule ('*', $style);
484        $rule->getProperties ($properties);
485        //foreach ($properties as $property => $value) {
486        //    $properties [$property] = self::adjustValueForODT ($property, $value, $units);
487        //}
488        self::adjustValuesForODT ($properties, $units, $maxWidth);
490        if ( !empty ($properties ['background-image']) ) {
491            if ( !empty ($baseURL) ) {
492                // Replace 'url(...)' with $baseURL
493                $properties ['background-image'] = cssimportnew::replaceURLPrefix ($properties ['background-image'], $baseURL);
494            }
495        }
496    }
498    /**
499     * The function opens/puts a new element on the HTML stack in $params->htmlStack.
500     * The element name will be $element and it will be created with the attributes $attributes.
501     * Then CSS matching is performed and the CSS properties are returned in $dest.
502     * Finally the CSS properties are converted to ODT format if neccessary.
503     *
504     * @author LarsDW223
505     * @param ODTInternalParams $params     Commom params.
506     * @param array             $dest       Target array for properties storage
507     * @param string            $element    The element's name
508     * @param string            $attributes The element's attributes
509     * @param integer           $maxWidth   Maximum Width
510     */
511    public static function openHTMLElement (ODTInternalParams $params, array &$dest, $element, $attributes, $maxWidth=NULL) {
512        // Push/create our element to import on the stack
513        $params->htmlStack->open($element, $attributes);
514        $toMatch = $params->htmlStack->getCurrentElement();
515        $params->import->getPropertiesForElement($dest, $toMatch, $params->units);
517        // Adjust values for ODT
518        self::adjustValuesForODT($dest, $params->units, $maxWidth);
519    }
521    /**
522     * The function closes element with name $element on the HTML stack in $params->htmlStack.
523     *
524     * @author LarsDW223
525     * @param ODTInternalParams $params     Commom params.
526     * @param string            $element    The element's name
527     */
528    public static function closeHTMLElement (ODTInternalParams $params, $element) {
529        $params->htmlStack->close($element);
530    }
532    /**
533     * The function temporarily opens/puts a new element on the HTML stack in $params->htmlStack.
534     * Before leaving the function the element is removed from the stack.
535     *
536     * The element name will be $element and it will be created with the attributes $attributes.
537     * After opening the element CSS matching is performed and the CSS properties are returned in $dest.
538     * Finally the CSS properties are converted to ODT format if neccessary.
539     *
540     * @author LarsDW223
541     * @param ODTInternalParams $params     Commom params.
542     * @param array             $dest       Target array for properties storage
543     * @param string            $element    The element's name
544     * @param string            $attributes The element's attributes
545     * @param integer           $maxWidth   Maximum Width
546     * @param boolean           $inherit    Enable/disable CSS inheritance
547     */
548    public static function getHTMLElementProperties (ODTInternalParams $params, array &$dest, $element, $attributes, $maxWidth=NULL, $inherit=true) {
549        // Push/create our element to import on the stack
550        $params->htmlStack->open($element, $attributes);
551        $toMatch = $params->htmlStack->getCurrentElement();
552        $params->import->getPropertiesForElement($dest, $toMatch, $params->units, $inherit);
554        // Adjust values for ODT
555        self::adjustValuesForODT($dest, $params->units, $maxWidth);
557        // Remove element from stack
558        $params->htmlStack->removeCurrent();
559    }
561    /**
562     * Small helper function for finding the next tag enclosed in <angle> brackets.
563     * Returns beginning and end of the tag as an array [0] = start, [1] = end.
564     *
565     * @author LarsDW223
566     * @param string $content Code to search in.
567     * @param string $pos     Start position for searching.
568     * @return array
569     */
570    public static function getNextTag (&$content, $pos) {
571        $start = strpos ($content, '<', $pos);
572        if ($start === false) {
573            return false;
574        }
575        $end = strpos ($content, '>', $pos);
576        if ($end === false) {
577            return false;
578        }
579        return array($start, $end);
580    }
582    /**
583     * The function returns $value as a valid IRI and replaces some signs
584     * if neccessary, e.g. '&' will be replaced by '&amp;'.
585     * The function will not do double replacements, e.g. if the string
586     * already includes a '&amp;' it will NOT become '&amp;amp;'.
587     *
588     * @author LarsDW223
589     * @param string $value String to be converted to IRI
590     * @return string
591     */
592    public static function stringToIRI ($value) {
593        $max = strlen ($value);
594        for ($pos = 0 ; $pos < $max ; $pos++) {
595            switch ($value [$pos]) {
596                case '&':
597                    if ($max - $pos >= 4 &&
598                        $value [$pos+1] == '#' &&
599                        $value [$pos+2] == '3' &&
600                        $value [$pos+3] == '8' &&
601                        $value [$pos+4] == ';') {
602                        // '&#38;' must be replaced with "&amp;"
603                        $value [$pos+1] = 'a';
604                        $value [$pos+2] = 'm';
605                        $value [$pos+3] = 'p';
606                        $pos += 4;
607                    } else if ($max - $pos < 4 ||
608                        $value [$pos+1] != 'a' ||
609                        $value [$pos+2] != 'm' ||
610                        $value [$pos+3] != 'p' ||
611                        $value [$pos+4] != ';' ) {
612                        // '&' must be replaced with "&amp;"
613                        $new = substr($value, 0, $pos+1);
614                        $new .= 'amp;';
615                        $new .= substr($value, $pos+1);
616                        $value = $new;
617                        $max += 4;
618                        $pos += 4;
619                    }
620                    break;
621            }
622        }
623        return $value;
624    }
626    protected static function getLinkURL ($search) {
627        preg_match ('/href="[^"]*"/', $search, $matches);
628        $url = substr ($matches[0], 5);
629        $url = trim($url, '"');
630        // Keep '&' and ':' in the link URL unescaped, otherwise url parameter passing will not work
631        $url = str_replace('&amp;', '&', $url);
632        $url = str_replace('%3A', ':', $url);
634        return $url;
635    }
637    /**
638     * static call back to replace spaces
639     *
640     * @param array $matches
641     * @return string
642     */
643    protected static function _preserveSpace($matches){
644        $spaces = $matches[1];
645        $len    = strlen($spaces);
646        return '<text:s text:c="'.$len.'"/>';
647    }
649    protected static function createTextStyle (ODTInternalParams $params, $element, $attributes, $styleName=NULL) {
650        // Create automatic style
651        if (!isset($styleName) || !$params->document->styleExists($styleName)) {
652            // Get properties
653            $properties = array();
654            self::getHTMLElementProperties ($params, $properties, $element, $attributes);
656            if (!isset($styleName)) {
657                $properties ['style-name'] = ODTStyle::getNewStylename ('span');
658            } else {
659                // Use callers style name. He needs to be sure that it's unique!
660                $properties ['style-name'] = $styleName;
661            }
662            $params->document->createTextStyle($properties, false);
664            // Return style name
665            return $properties ['style-name'];
666        } else {
667            // Style already exists
668            return $styleName;
669        }
670    }
672    protected static function createParagraphStyle (ODTInternalParams $params, $element, $attributes, $styleName=NULL) {
673        // Create automatic style
674        if (!isset($styleName) || !$params->document->styleExists($styleName)) {
675            // Get properties
676            $properties = array();
677            self::getHTMLElementProperties ($params, $properties, $element, $attributes);
679            if (!isset($styleName)) {
680                $properties ['style-name'] = ODTStyle::getNewStylename ('span');
681            } else {
682                // Use callers style name. He needs to be sure that it's unique!
683                $properties ['style-name'] = $styleName;
684            }
685            $params->document->createParagraphStyle($properties, false);
687            // Return style name
688            return $properties ['style-name'];
689        } else {
690            // Style already exists
691            return $styleName;
692        }
693    }
695    /**
696     * Convenience function for converting some HTML code to ODT format.
697     * The function will try to automatically create any needed ODT styles
698     * from the CSS code found in the HTML code.
699     *
700     * Also some special settings can be passed in the options array:
701     *
702     * $options ['p_style']:
703     * The default paragraph style. If empty 'body' will be used.
704     *
705     * $options ['list_p_style']:
706     * The default paragraph style in lists. If empty 'body' will be used.
707     *
708     * $options ['list_ol_style']:
709     * The default style for ordered lists. If empty 'numbering' will be used.
710     *
711     * $options ['list_ul_style']:
712     * The default style for un-ordered lists. If empty 'list' will be used.
713     *
714     * $options ['media_selector']:
715     * The media selector used for CSS handling (e.g. 'screen' or 'print').
716     * If empty the current/configured one will be used.
717     *
718     * $options ['element']:
719     * If not empty an HTML tag named '$options ['element']' will be pushed
720     * on the internal HTML stack before converting the $HTMLCode.
721     * This influences CSS handling.
722     *
723     * $options ['attributes']:
724     * The attributes to set for '$options ['element']'.
725     *
726     * $options ['escape_content']:
727     * Should have the value 'true' or 'false' (as string!). If 'true'
728     * XML entities will be escaped. Otherwise it is assumed that it
729     * already has been done.
730     *
731     * $options ['class']:
732     * Optional CSS class to add to found 'class="..."' attributes in
733     * the HTML code.
734     *
735     * $options ['style_names']:
736     * If set to 'prefix_and_class' then ODT style names will not be
737     * generated dynamically but are constructed from '$options ['style_names_prefix']'
738     * following the CSS class name(s).
739     *
740     * $options ['linebreaks']:
741     * If set to 'remove' then linebreaks will be ignored. Otherwise
742     * they will be kept and converted to proper ODT linebreaks.
743     *
744     * $options ['tabs']:
745     * If set to 'remove' then tabs will be ignored. Otherwise they
746     * will be kept and converted to proper ODT tabs.
747     *
748     * $options ['space']:
749     * If set to 'preserve' then space is preserved like for preformatted
750     * code blocks. Otherwise space is not preserved and multiple spaces
751     * will apear as only one space.
752     *
753     * @author LarsDW223
754     * @param ODTInternalParams $params   The internal params
755     * @param string            $HTMLCode The HTML code to convert
756     * @param array             $options  Array of options
757     */
758    public static function generateODTfromHTMLCode(ODTInternalParams $params, $HTMLCode, array $options){
759        $elements = array ('sup' => array ('open' => '<text:span text:style-name="sup">',
760                                           'close' => '</text:span>'),
761                           'sub' => array ('open' => '<text:span text:style-name="sub">',
762                                           'close' => '</text:span>'),
763                           'u' => array ('open' => '<text:span text:style-name="underline">',
764                                         'close' => '</text:span>'),
765                           'em' => array ('open' => '<text:span text:style-name="Emphasis">',
766                                          'close' => '</text:span>'),
767                           'strong' => array ('open' => '<text:span text:style-name="Strong_20_Emphasis">',
768                                              'close' => '</text:span>'),
769                           'del' => array ('open' => '<text:span text:style-name="del">',
770                                           'close' => '</text:span>'),
771                           'span' => array ('open' => '',
772                                         'close' => ''),
773                           'a' => array ('open' => '',
774                                         'close' => ''),
775                           'ol' => array ('open' => '',
776                                          'close' => ''),
777                           'ul' => array ('open' => '',
778                                          'close' => ''),
779                           'li' => array ('open' => '<text:list-item><text:p text:style-name="Text_20_body">',
780                                          'close' => '</text:p></text:list-item>'),
781                           // In the moment only remove divs
782                           'div' => array ('open' => '', 'close' => ''),
783                       );
784        $parsed = array();
786        // remove useless leading and trailing whitespace-newlines
787        $HTMLCode = preg_replace('/^&nbsp;\n/', '', $HTMLCode);
788        $HTMLCode = preg_replace('/\n&nbsp;$/', '', $HTMLCode);
789        $HTMLCode = str_replace('&nbsp;', '&#xA0;', $HTMLCode);
791        // Get default paragraph style
792        if (!empty($options ['p_style'])) {
793            $p_style = $options ['p_style'];
794        } else {
795            $p_style = $params->document->getStyleName('body');
796        }
798        // Get default list style names
799        if (!empty($options ['list_p_style'])) {
800            $p_list_style = $options ['list_p_style'];
801        } else {
802            $p_list_style = $params->document->getStyleName('body');
803        }
804        if (!empty($options ['list_ol_style'])) {
805            $ol_list_style = $options ['list_ol_style'];
806        } else {
807            $ol_list_style = $params->document->getStyleName('numbering');
808        }
809        if (!empty($options ['list_ul_style'])) {
810            $ul_list_style = $options ['list_ul_style'];
811        } else {
812            $ul_list_style = $params->document->getStyleName('list');
813        }
815        // Set new media selector (remember old one)
816        $media = $params->import->getMedia ();
817        if (!empty($options['media_selector'])) {
818            $params->import->setMedia($options['media_selector']);
819        }
821        if (!empty($options ['element'])) {
822            $params->htmlStack->open($options ['element'], $options ['attributes']);
823        }
825        // First examine $HTMLCode and differ between normal content,
826        // opening tags and closing tags.
827        $max = strlen ($HTMLCode);
828        $pos = 0;
829        while ($pos < $max) {
830            $found = self::getNextTag($HTMLCode, $pos);
831            if ($found !== false) {
832                $entry = array();
833                $entry ['content'] = substr($HTMLCode, $pos, $found [0]-$pos);
834                if ($entry ['content'] === false) {
835                    $entry ['content'] = '';
836                }
837                $parsed [] = $entry;
839                $tagged = substr($HTMLCode, $found [0], $found [1]-$found [0]+1);
840                $entry = array();
842                if ($HTMLCode [$found[1]-1] == '/') {
843                    // Element without content <abc/>, doesn'T make sense, save as content
844                    $entry ['content'] = $tagged;
845                } else {
846                    if ($HTMLCode [$found[0]+1] != '/') {
847                        $parts = explode(' ', trim($tagged, '<> '), 2);
848                        $entry ['tag-open'] = $parts [0];
849                        if ( isset($parts [1]) ) {
850                            $entry ['attributes'] = $parts [1];
851                        }
852                        $entry ['tag-orig'] = $tagged;
853                    } else {
854                        $entry ['tag-close'] = trim ($tagged, '<>/ ');
855                        $entry ['tag-orig'] = $tagged;
856                    }
857                }
858                $entry ['matched'] = false;
859                $parsed [] = $entry;
861                $pos = $found [1]+1;
862            } else {
863                $entry = array();
864                $entry ['content'] = substr($HTMLCode, $pos);
865                $parsed [] = $entry;
866                break;
867            }
868        }
870        // Check each array entry.
871        $checked = array();
872        $first = true;
873        $firstTag = '';
874        $olStartValue = NULL;
875        for ($out = 0 ; $out < count($parsed) ; $out++) {
876            if (isset($checked [$out])) {
877                continue;
878            }
879            $found = &$parsed [$out];
880            if (isset($found ['content'])) {
881                if ($options ['escape_content'] !== 'false') {
882                    $checked [$out] = $params->document->replaceXMLEntities($found ['content']);
883                } else {
884                    $checked [$out] = $found ['content'];
885                }
886            } else if (isset($found ['tag-open'])) {
887                $closed = false;
889                for ($in = $out+1 ; $in < count($parsed) ; $in++) {
890                    $search = &$parsed [$in];
891                    if (isset($search ['tag-close']) &&
892                        $found ['tag-open'] == $search ['tag-close'] &&
893                        $search ['matched'] === false &&
894                        array_key_exists($found ['tag-open'], $elements)) {
896                        $closed = true;
897                        $search ['matched'] = true;
899                        // Remeber the first element
900                        if ($first) {
901                            $first = false;
902                            $firstTag = $found ['tag-open'];
903                        }
905                        // Known and closed tag, convert to ODT
906                        switch ($found ['tag-open']) {
907                            case 'span':
908                                // Create ODT span using CSS style from attributes
909                                if (!empty($options ['class'])) {
910                                    if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) {
911                                        $class_attr = substr($matches [0], 7);
912                                        $class_attr = trim($class_attr, '"');
913                                        $class_attr = 'class="'.$options ['class'].' '.$class_attr.'"';
914                                        $found ['attributes'] = str_replace($matches [0], $class_attr, $found ['attributes']);
915                                    }
916                                }
917                                $style_name = NULL;
918                                if ($options ['style_names'] == 'prefix_and_class') {
919                                    if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) {
920                                        $class_attr = substr($matches [0], 7);
921                                        $class_attr = trim($class_attr, '"');
922                                        $style_name = $options ['style_names_prefix'].$class_attr;
923                                    }
924                                }
925                                $style_name = self::createTextStyle ($params, 'span', $found ['attributes'], $style_name);
926                                $checked [$out] = '<text:span text:style-name="'.$style_name.'">';
927                                $checked [$in] = '</text:span>';
928                                break;
929                            case 'a':
930                                $url = self::getLinkURL($found ['attributes']);
931                                if (empty($url)) {
932                                    $url = 'URLNotFoundInXHTMLLink';
933                                }
934                                $checked [$out] = $params->document->openHyperlink ($url, NULL, NULL, true);
935                                $checked [$in] = $params->document->closeHyperlink (true);
936                                break;
937                            case 'ul':
938                                $checked [$out] = '<text:list text:style-name="'.$ul_list_style.'" text:continue-numbering="false">';
939                                $checked [$in] = '</text:list>';
940                                break;
941                            case 'ol':
942                                $checked [$out] = '<text:list text:style-name="'.$ol_list_style.'" text:continue-numbering="false">';
943                                $checked [$in] = '</text:list>';
944                                if (preg_match('/start="[^"]*"/', $found ['attributes'], $matches) == 1) {
945                                    $olStartValue = substr($matches [0], 7);
946                                    $olStartValue = trim($olStartValue, '"');
947                                }
948                                break;
949                            case 'li':
950                                // Create ODT span using CSS style from attributes
951                                $haveClass = false;
952                                if (!empty($options ['class'])) {
953                                    if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) {
954                                        $class_attr = substr($matches [0], 7);
955                                        $class_attr = trim($class_attr, '"');
956                                        $class_attr = 'class="'.$options ['class'].' '.$class_attr.'"';
957                                        $found ['attributes'] = str_replace($matches [0], $class_attr, $found ['attributes']);
958                                        $haveClass = true;
959                                    }
960                                }
961                                $style_name = NULL;
962                                if ($options ['style_names'] == 'prefix_and_class') {
963                                    if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) {
964                                        $class_attr = substr($matches [0], 7);
965                                        $class_attr = trim($class_attr, '"');
966                                        $style_name = $options ['style_names_prefix'].$class_attr;
967                                        $haveClass = true;
968                                    }
969                                }
970                                if ($haveClass) {
971                                    $style_name = self::createParagraphStyle ($params, 'li', $found ['attributes'], $style_name);
972                                } else {
973                                    $style_name = $p_list_style;
974                                }
976                                $checked [$out] = '<text:list-item';
977                                if (isset($olStartValue)) {
978                                    $checked [$out] .= ' text:start-value="'.$olStartValue.'"';
979                                    $olStartValue = NULL;
980                                }
981                                $checked [$out] .= '><text:p text:style-name="'.$style_name.'">';
982                                $checked [$in] = '</text:p></text:list-item>';
983                                break;
984                            default:
985                                // Simple replacement
986                                $checked [$out] = $elements [$found ['tag-open']]['open'];
987                                $checked [$in] = $elements [$found ['tag-open']]['close'];
988                                break;
989                        }
990                        break;
991                    }
992                }
994                // Known tag? Closing tag found?
995                if (!$closed) {
996                    // No, save as content
997                    if ($options ['escape_content'] !== 'false') {
998                        $checked [$out] = $params->document->replaceXMLEntities($found ['tag-orig']);
999                    } else {
1000                        $checked [$out] = $found ['tag-orig'];
1001                    }
1002                }
1003            } else if (isset($found ['tag-close'])) {
1004                // If we find a closing tag it means it did not match
1005                // an opening tag. Convert to content!
1006                $checked [$out] = $params->document->replaceXMLEntities($found ['tag-orig']);
1007            }
1008        }
1010        // Eventually we need to create an enclosing element, open it
1011        switch ($firstTag) {
1012            case 'ol':
1013            case 'ul':
1014                // Close an eventually open paragraph
1015                $params->document->paragraphClose();
1016                break;
1017            default:
1018                $params->document->paragraphClose();
1019                $params->document->paragraphOpen($p_style);
1020                break;
1021        }
1024        // Add checked entries to content
1025        $content = '';
1026        for ($index = 0 ; $index < count($checked) ; $index++) {
1027            $content .= $checked [$index];
1028        }
1030        // Handle newlines
1031        if ($options ['linebreaks'] !== 'remove') {
1032            $content = str_replace("\n",'<text:line-break/>',$content);
1033        } else {
1034            $content = str_replace("\n",'',$content);
1035        }
1037        // Handle tabs
1038        if ($options ['tabs'] !== 'remove') {
1039            $content = str_replace("\t",'<text:tab/>',$content);
1040        } else {
1041            $content = str_replace("\t",'',$content);
1042        }
1044        // Preserve space?
1045        if ($options ['space'] === 'preserve') {
1046            $content = preg_replace_callback('/(  +)/',array(__CLASS__, '_preserveSpace'), $content);
1047        }
1049        $params->content .= $content;
1052        // Eventually we need to create an enclosing element, close it
1053        switch ($firstTag) {
1054            case 'ol':
1055            case 'ul':
1056                // Nothing to do
1057                break;
1058            default:
1059                $params->document->paragraphClose();
1060                break;
1061        }
1063        // Remove current element from stack, if we created one
1064        if (!empty($options ['element'])) {
1065            $params->htmlStack->removeCurrent();
1066        }
1068        // Restore media selector
1069        if (!empty($options ['media_selector'])) {
1070            $params->import->setMedia ($media);
1071        }
1072    }