1<?php 2/** 3 * Utility functions. 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author LarsDW223 7 */ 8 9/** Include csscolors */ 10require_once DOKU_PLUGIN . 'odt/ODT/css/csscolors.php'; 11/** Include cssborder */ 12require_once DOKU_PLUGIN . 'odt/ODT/css/cssborder.php'; 13 14/** 15 * ODTUtility: 16 * Class containing some internal utility functions. 17 * 18 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 19 * @author LarsDW223 20 * @package ODT\Utility 21 */ 22class ODTUtility 23{ 24 /** 25 * Replace local links with bookmark references or text 26 * 27 * @param string $content The document content 28 * @param array $toc The table of contents 29 * @param array $bookmarks List of bookmarks 30 * @param string $styleName Link style name 31 * @param string $visitedStyleName Visited link style name 32 */ 33 public static function replaceLocalLinkPlaceholders(&$content, array $toc, array $bookmarks, $styleName, $visitedStyleName) { 34 $matches = array(); 35 $position = 0; 36 $max = strlen ($content); 37 $length = strlen ('<locallink>'); 38 $lengthWithName = strlen ('<locallink name='); 39 while ( $position < $max ) { 40 $first = strpos ($content, '<locallink', $position); 41 if ( $first === false ) { 42 break; 43 } 44 $endFirst = strpos ($content, '>', $first); 45 if ( $endFirst === false ) { 46 break; 47 } 48 $second = strpos ($content, '</locallink>', $endFirst); 49 if ( $second === false ) { 50 break; 51 } 52 53 // $match includes the whole tag '<locallink name="...">text</locallink>' 54 // The attribute 'name' is optional! 55 $match = substr ($content, $first, $second - $first + $length + 1); 56 $text = substr ($match, $endFirst-$first+1, -($length + 1)); 57 $text = trim ($text, ' '); 58 $text = strtolower ($text); 59 $page = str_replace (' ', '_', $text); 60 $opentag = substr ($match, 0, $endFirst-$first); 61 $name = substr ($opentag, $lengthWithName); 62 $name = trim ($name, '">'); 63 64 $linkStyle = 'text:style-name="'.$styleName.'"'; 65 $linkStyle .= ' text:visited-style-name="'.$visitedStyleName.'"'; 66 67 $found = false; 68 foreach ($toc as $item) { 69 $params = explode (',', $item); 70 71 if ( $page == $params [1] ) { 72 $found = true; 73 $link = '<text:a xlink:type="simple" xlink:href="#'.$params [0].'" '.$linkStyle.'>'; 74 if ( !empty($name) ) { 75 $link .= $name; 76 } else { 77 $link .= $text; 78 } 79 $link .= '</text:a>'; 80 81 $content = str_replace ($match, $link, $content); 82 $position = $first + strlen ($link); 83 } 84 } 85 86 if ( $found == false ) { 87 // Nothing found yet, check the bookmarks too. 88 foreach ($bookmarks as $item) { 89 if ( $page == $item ) { 90 $found = true; 91 $link = '<text:a xlink:type="simple" xlink:href="#'.$item.'" '.$linkStyle.'>'; 92 if ( !empty($name) ) { 93 $link .= $name; 94 } else { 95 $link .= $text; 96 } 97 $link .= '</text:a>'; 98 99 $content = str_replace ($match, $link, $content); 100 $position = $first + strlen ($link); 101 } 102 } 103 } 104 105 if ( $found == false ) { 106 // If we get here, then the referenced target was not found. 107 // There must be a bug manging the bookmarks or links! 108 // At least remove the locallink element and insert text. 109 if ( !empty($name) ) { 110 $content = str_replace ($match, $name, $content); 111 } else { 112 $content = str_replace ($match, $text, $content); 113 } 114 $position = $first + strlen ($text); 115 } 116 } 117 } 118 119 /** 120 * This function deletes the useless elements. Right now, these are empty paragraphs 121 * or paragraphs that only include whitespace. 122 * 123 * IMPORTANT: 124 * Paragraphs can be used for pagebreaks/changing page format. 125 * Such paragraphs may not be deleted! 126 * 127 * @param string $docContent The document content 128 * @param array $preventDeletetionStyles Array of style names which may not be deleted 129 */ 130 public static function deleteUselessElements(&$docContent, array $preventDeletetionStyles) { 131 $length_open = strlen ('<text:p'); 132 $length_close = strlen ('</text:p>'); 133 $max = strlen ($docContent); 134 $pos = 0; 135 136 while ($pos < $max) { 137 $start_open = strpos ($docContent, '<text:p', $pos); 138 if ( $start_open === false ) { 139 break; 140 } 141 $start_close = strpos ($docContent, '>', $start_open + $length_open); 142 if ( $start_close === false ) { 143 break; 144 } 145 $end = strpos ($docContent, '</text:p>', $start_close + 1); 146 if ( $end === false ) { 147 break; 148 } 149 150 $deleted = false; 151 $length = $end - $start_open + $length_close; 152 $content = substr ($docContent, $start_close + 1, $end - ($start_close + 1)); 153 154 if ( empty($content) || ctype_space ($content) ) { 155 // Paragraph is empty or consists of whitespace only. Check style name. 156 $style_start = strpos ($docContent, '"', $start_open); 157 if ( $style_start === false ) { 158 // No '"' found??? Ignore this paragraph. 159 break; 160 } 161 $style_end = strpos ($docContent, '"', $style_start+1); 162 if ( $style_end === false ) { 163 // No '"' found??? Ignore this paragraph. 164 break; 165 } 166 $style_name = substr ($docContent, $style_start+1, $style_end - ($style_start+1)); 167 168 // Only delete empty paragraph if not listed in 'Do not delete' array! 169 if ( !in_array($style_name, $preventDeletetionStyles) ) 170 { 171 $docContent = substr_replace($docContent, '', $start_open, $length); 172 173 $deleted = true; 174 $max -= $length; 175 $pos = $start_open; 176 } 177 } 178 179 if ( $deleted == false ) { 180 $pos = $start_close; 181 } 182 } 183 } 184 185 /** 186 * The function tries to examine the width and height 187 * of the image stored in file $src. 188 * 189 * @param string $src The file name of image 190 * @param int $maxwidth The maximum width the image shall have 191 * @param int $maxheight The maximum height the image shall have 192 * @return array Width and height of the image in centimeters or 193 * both 0 if file doesn't exist. 194 * Just the integer value, no units included. 195 */ 196 public static function getImageSize($src, $maxwidth=NULL, $maxheight=NULL){ 197 if (file_exists($src)) { 198 $info = getimagesize($src); 199 if(!$width){ 200 $width = $info[0]; 201 $height = $info[1]; 202 }else{ 203 $height = round(($width * $info[1]) / $info[0]); 204 } 205 206 if ($maxwidth && $width > $maxwidth) { 207 $height = $height * ($maxwidth/$width); 208 $width = $maxwidth; 209 } 210 if ($maxheight && $height > $maxheight) { 211 $width = $width * ($maxheight/$height); 212 $height = $maxheight; 213 } 214 215 // Convert from pixel to centimeters 216 if ($width) $width = (($width/96.0)*2.54); 217 if ($height) $height = (($height/96.0)*2.54); 218 219 return array($width, $height); 220 } 221 222 return array(0, 0); 223 } 224 225 /** 226 * Return the size of an image in centimeters. 227 * 228 * @param string $src Filepath of the image 229 * @param string|null $width Alternative width 230 * @param string|null $height Alternative height 231 * @param boolean|true $preferImage Prefer original image size 232 * @param ODTUnits $units $ODTUnits object for unit conversion 233 * @return array 234 */ 235 public static function getImageSizeString($src, $width = NULL, $height = NULL, $preferImage=true, ODTUnits $units){ 236 list($width_file, $height_file) = self::getImageSize($src); 237 238 // Get original ratio if possible 239 $ratio = 1; 240 if ($width_file != 0 && $height_file != 0) { 241 $ratio = $height_file/$width_file; 242 } 243 244 if ($width_file != 0 && $preferImage) { 245 $width = $width_file.'cm'; 246 $height = $height_file.'cm'; 247 } else { 248 // convert from pixel to centimeters only if no unit is 249 // specified or if unit is 'px' 250 $unit_width = $units->stripDigits ($width); 251 $unit_height = $units->stripDigits ($height); 252 if ((empty($unit_width) && empty($unit_height)) || 253 ($unit_width == 'px' && $unit_height == 'px')) { 254 if (!$height) { 255 $height = $width * $ratio; 256 } 257 $height = (($height/96.0)*2.54).'cm'; 258 if ($width) $width = (($width/96.0)*2.54).'cm'; 259 } 260 } 261 262 // At this point $width and $height should include a unit 263 264 $width = str_replace(',', '.', $width); 265 $height = str_replace(',', '.', $height); 266 if ($width && $height) { 267 // Don't be wider than the page 268 if ($width >= 17){ // FIXME : this assumes A4 page format with 2cm margins 269 $width = $width.'" style:rel-width="100%'; 270 $height = $height.'" style:rel-height="scale'; 271 } else { 272 $width = $width; 273 $height = $height; 274 } 275 } else { 276 // external image and unable to download, fallback 277 if (!$width) { 278 $width = '" svg:rel-width="100%'; 279 } 280 if (!$height) { 281 $height = '" svg:rel-height="100%'; 282 } 283 } 284 return array($width, $height); 285 } 286 287 /** 288 * Split $value by whitespace and convert any relative values (%) 289 * into an absolute value. This is done by taking the percentage of 290 * $maxWidthInPt. 291 * 292 * @param string $value String (Property value) 293 * @param integer $maxWidthInPt Maximum width in points 294 * @param ODTUnits $units $ODTUnits object for unit conversion 295 * @return string 296 */ 297 protected static function adjustPercentageValueParts ($value, $maxWidthInPt, $units) { 298 $values = preg_split ('/\s+/', $value); 299 $value = ''; 300 foreach ($values as $part) { 301 $length = strlen ($part); 302 303 if ( $length > 1 && $part [$length-1] == '%' ) { 304 $percentageValue = $units->getDigits($part); 305 $part = (($percentageValue * $maxWidthInPt)/100) . 'pt'; 306 //$part = '5pt '; 307 } 308 309 $value .= ' '.$part; 310 } 311 $value = trim($value); 312 $value = trim($value, '"'); 313 314 return $value; 315 } 316 317 /** 318 * The function adjusts the properties values for ODT: 319 * - 'em' units are converted to 'pt' units 320 * - CSS color names are converted to its RGB value 321 * - short color values like #fff are converted to the long format, e.g #ffffff 322 * - some relative values are converted to absoulte depending on other 323 * values e.g. 'line-height' an 'font-size' 324 * 325 * @author LarsDW223 326 * 327 * @param array $properties Array with property value pairs 328 * @param ODTUnits $units Units object to use for conversion 329 * @param integer $maxWidth Units object to use for conversion 330 */ 331 public static function adjustValuesForODT (&$properties, ODTUnits $units, $maxWidth=NULL) { 332 $adjustToMaxWidth = array('margin', 'margin-left', 'margin-right', 'margin-top', 'margin-bottom'); 333 334 // Convert 'text-decoration'. 335 if (isset($properties ['text-decoration'])) { 336 switch ($properties ['text-decoration']) { 337 case 'line-through': 338 $properties ['text-line-through-style'] = 'solid'; 339 break; 340 case 'underline': 341 $properties ['text-underline-style'] = 'solid'; 342 break; 343 case 'overline': 344 $properties ['text-overline-style'] = 'solid'; 345 break; 346 } 347 } 348 349 // Normalize border properties 350 cssborder::normalize($properties); 351 352 // First do simple adjustments per property 353 foreach ($properties as $property => $value) { 354 $properties [$property] = self::adjustValueForODT ($property, $value, $units); 355 } 356 357 // Adjust relative margins if $maxWidth is given. 358 // $maxWidth is expected to be the width of the surrounding element. 359 if (isset($maxWidth)) { 360 $maxWidthInPt = $units->toPoints($maxWidth, 'y'); 361 $maxWidthInPt = $units->getDigits($maxWidthInPt); 362 363 foreach ($adjustToMaxWidth as $property) { 364 if (!empty($properties [$property])) { 365 $properties [$property] = self::adjustPercentageValueParts ($properties [$property], $maxWidthInPt, $units); 366 } 367 } 368 } 369 370 // Now we do the adjustments for which one value depends on another 371 372 // Do we have font-size or line-height set? 373 if (isset($properties ['font-size']) || isset($properties ['line-height'])) { 374 // First get absolute font-size in points 375 $base_font_size_in_pt = $units->getPixelPerEm ().'px'; 376 $base_font_size_in_pt = $units->toPoints($base_font_size_in_pt, 'y'); 377 $base_font_size_in_pt = $units->getDigits($base_font_size_in_pt); 378 if (isset($properties ['font-size'])) { 379 $font_size_unit = $units->stripDigits($properties ['font-size']); 380 $font_size_digits = $units->getDigits($properties ['font-size']); 381 if ($font_size_unit == '%') { 382 $base_font_size_in_pt = ($font_size_digits * $base_font_size_in_pt)/100; 383 $properties ['font-size'] = $base_font_size_in_pt.'pt'; 384 } elseif ($font_size_unit != 'pt') { 385 $properties ['font-size'] = $units->toPoints($properties ['font-size'], 'y'); 386 $base_font_size_in_pt = $units->getDigits($properties ['font-size']); 387 } else { 388 $base_font_size_in_pt = $units->getDigits($properties ['font-size']); 389 } 390 } 391 392 // Convert relative line-heights to absolute 393 if (isset($properties ['line-height'])) { 394 $line_height_unit = $units->stripDigits($properties ['line-height']); 395 $line_height_digits = $units->getDigits($properties ['line-height']); 396 if ($line_height_unit == '%') { 397 $properties ['line-height'] = (($line_height_digits * $base_font_size_in_pt)/100).'pt'; 398 } elseif (empty($line_height_unit)) { 399 $properties ['line-height'] = ($line_height_digits * $base_font_size_in_pt).'pt'; 400 } 401 } 402 } 403 } 404 405 /** 406 * The function adjusts the property value for ODT: 407 * - 'em' units are converted to 'pt' units 408 * - CSS color names are converted to its RGB value 409 * - short color values like #fff are converted to the long format, e.g #ffffff 410 * 411 * @author LarsDW223 412 * 413 * @param string $property The property name 414 * @param string $value The value 415 * @param ODTUnits $units Units object to use for conversion 416 * @return string Converted value 417 */ 418 public static function adjustValueForODT ($property, $value, ODTUnits $units) { 419 if ($property == 'font-family') { 420 // There might be several font/font-families included. 421 // Only take the first one. 422 $value = trim($value, '"'); 423 if (strpos($value, ',') !== false) { 424 $values = explode(',', $value); 425 $value = trim ($values [0], '"'); 426 $value = trim ($value, "'"); 427 $value = trim ($value); 428 } 429 } else { 430 $values = preg_split ('/\s+/', $value); 431 $value = ''; 432 foreach ($values as $part) { 433 $length = strlen ($part); 434 435 // If it is a short color value (#xxx) then convert it to long value (#xxxxxx) 436 // (ODT does not support the short form) 437 if ( $part [0] == '#' && $length == 4 ) { 438 $part = '#'.$part [1].$part [1].$part [2].$part [2].$part [3].$part [3]; 439 } else { 440 // If it is a CSS color name, get it's real color value 441 $color = csscolors::getColorValue ($part); 442 if ( $part == 'black' || $color != '#000000' ) { 443 $part = $color; 444 } 445 } 446 447 if ( $length > 2 && $part [$length-2] == 'e' && $part [$length-1] == 'm' ) { 448 $part = $units->toPoints($part, 'y'); 449 } 450 451 if ( $length > 2 && ($part [$length-2] != 'p' || $part [$length-1] != 't') && 452 strpos($property, 'border')!==false ) { 453 $part = $units->toPoints($part, 'y'); 454 } 455 456 // Some values can have '"' in it. These need to be converted to ''' 457 // e.g. 'font-family' tp specify that '"Courier New"' is one font name not two 458 $part = str_replace('"', ''', $part); 459 460 $value .= ' '.$part; 461 } 462 $value = trim($value); 463 $value = trim($value, '"'); 464 } 465 466 return $value; 467 } 468 469 /** 470 * This function processes the CSS style declarations in $style and saves them in $properties 471 * as key - value pairs, e.g. $properties ['color'] = 'red'. It also adjusts the values 472 * for the ODT format and changes URLs to local paths if required, using $baseURL). 473 * 474 * @author LarsDW223 475 * @param array $properties 476 * @param string $style The CSS style e.g. 'color:red;' 477 * @param string|null $baseURL 478 * @param ODTUnits $units Units object to use for conversion 479 * @param integer $maxWidth MaximumWidth 480 */ 481 public static function getCSSStylePropertiesForODT(&$properties, $style, $baseURL = NULL, ODTUnits $units, $maxWidth=NULL){ 482 // Create rule with selector '*' (doesn't matter) and declarations as set in $style 483 $rule = new css_rule ('*', $style); 484 $rule->getProperties ($properties); 485 //foreach ($properties as $property => $value) { 486 // $properties [$property] = self::adjustValueForODT ($property, $value, $units); 487 //} 488 self::adjustValuesForODT ($properties, $units, $maxWidth); 489 490 if ( !empty ($properties ['background-image']) ) { 491 if ( !empty ($baseURL) ) { 492 // Replace 'url(...)' with $baseURL 493 $properties ['background-image'] = cssimportnew::replaceURLPrefix ($properties ['background-image'], $baseURL); 494 } 495 } 496 } 497 498 /** 499 * The function opens/puts a new element on the HTML stack in $params->htmlStack. 500 * The element name will be $element and it will be created with the attributes $attributes. 501 * Then CSS matching is performed and the CSS properties are returned in $dest. 502 * Finally the CSS properties are converted to ODT format if neccessary. 503 * 504 * @author LarsDW223 505 * @param ODTInternalParams $params Commom params. 506 * @param array $dest Target array for properties storage 507 * @param string $element The element's name 508 * @param string $attributes The element's attributes 509 * @param integer $maxWidth Maximum Width 510 */ 511 public static function openHTMLElement (ODTInternalParams $params, array &$dest, $element, $attributes, $maxWidth=NULL) { 512 // Push/create our element to import on the stack 513 $params->htmlStack->open($element, $attributes); 514 $toMatch = $params->htmlStack->getCurrentElement(); 515 $params->import->getPropertiesForElement($dest, $toMatch, $params->units); 516 517 // Adjust values for ODT 518 self::adjustValuesForODT($dest, $params->units, $maxWidth); 519 } 520 521 /** 522 * The function closes element with name $element on the HTML stack in $params->htmlStack. 523 * 524 * @author LarsDW223 525 * @param ODTInternalParams $params Commom params. 526 * @param string $element The element's name 527 */ 528 public static function closeHTMLElement (ODTInternalParams $params, $element) { 529 $params->htmlStack->close($element); 530 } 531 532 /** 533 * The function temporarily opens/puts a new element on the HTML stack in $params->htmlStack. 534 * Before leaving the function the element is removed from the stack. 535 * 536 * The element name will be $element and it will be created with the attributes $attributes. 537 * After opening the element CSS matching is performed and the CSS properties are returned in $dest. 538 * Finally the CSS properties are converted to ODT format if neccessary. 539 * 540 * @author LarsDW223 541 * @param ODTInternalParams $params Commom params. 542 * @param array $dest Target array for properties storage 543 * @param string $element The element's name 544 * @param string $attributes The element's attributes 545 * @param integer $maxWidth Maximum Width 546 * @param boolean $inherit Enable/disable CSS inheritance 547 */ 548 public static function getHTMLElementProperties (ODTInternalParams $params, array &$dest, $element, $attributes, $maxWidth=NULL, $inherit=true) { 549 // Push/create our element to import on the stack 550 $params->htmlStack->open($element, $attributes); 551 $toMatch = $params->htmlStack->getCurrentElement(); 552 $params->import->getPropertiesForElement($dest, $toMatch, $params->units, $inherit); 553 554 // Adjust values for ODT 555 self::adjustValuesForODT($dest, $params->units, $maxWidth); 556 557 // Remove element from stack 558 $params->htmlStack->removeCurrent(); 559 } 560 561 /** 562 * Small helper function for finding the next tag enclosed in <angle> brackets. 563 * Returns beginning and end of the tag as an array [0] = start, [1] = end. 564 * 565 * @author LarsDW223 566 * @param string $content Code to search in. 567 * @param string $pos Start position for searching. 568 * @return array 569 */ 570 public static function getNextTag (&$content, $pos) { 571 $start = strpos ($content, '<', $pos); 572 if ($start === false) { 573 return false; 574 } 575 $end = strpos ($content, '>', $pos); 576 if ($end === false) { 577 return false; 578 } 579 return array($start, $end); 580 } 581 582 /** 583 * The function returns $value as a valid IRI and replaces some signs 584 * if neccessary, e.g. '&' will be replaced by '&'. 585 * The function will not do double replacements, e.g. if the string 586 * already includes a '&' it will NOT become '&amp;'. 587 * 588 * @author LarsDW223 589 * @param string $value String to be converted to IRI 590 * @return string 591 */ 592 public static function stringToIRI ($value) { 593 $max = strlen ($value); 594 for ($pos = 0 ; $pos < $max ; $pos++) { 595 switch ($value [$pos]) { 596 case '&': 597 if ($max - $pos >= 4 && 598 $value [$pos+1] == '#' && 599 $value [$pos+2] == '3' && 600 $value [$pos+3] == '8' && 601 $value [$pos+4] == ';') { 602 // '&' must be replaced with "&" 603 $value [$pos+1] = 'a'; 604 $value [$pos+2] = 'm'; 605 $value [$pos+3] = 'p'; 606 $pos += 4; 607 } else if ($max - $pos < 4 || 608 $value [$pos+1] != 'a' || 609 $value [$pos+2] != 'm' || 610 $value [$pos+3] != 'p' || 611 $value [$pos+4] != ';' ) { 612 // '&' must be replaced with "&" 613 $new = substr($value, 0, $pos+1); 614 $new .= 'amp;'; 615 $new .= substr($value, $pos+1); 616 $value = $new; 617 $max += 4; 618 $pos += 4; 619 } 620 break; 621 } 622 } 623 return $value; 624 } 625 626 protected static function getLinkURL ($search) { 627 preg_match ('/href="[^"]*"/', $search, $matches); 628 $url = substr ($matches[0], 5); 629 $url = trim($url, '"'); 630 // Keep '&' and ':' in the link URL unescaped, otherwise url parameter passing will not work 631 $url = str_replace('&', '&', $url); 632 $url = str_replace('%3A', ':', $url); 633 634 return $url; 635 } 636 637 /** 638 * static call back to replace spaces 639 * 640 * @param array $matches 641 * @return string 642 */ 643 protected static function _preserveSpace($matches){ 644 $spaces = $matches[1]; 645 $len = strlen($spaces); 646 return '<text:s text:c="'.$len.'"/>'; 647 } 648 649 protected static function createTextStyle (ODTInternalParams $params, $element, $attributes, $styleName=NULL) { 650 // Create automatic style 651 if (!isset($styleName) || !$params->document->styleExists($styleName)) { 652 // Get properties 653 $properties = array(); 654 self::getHTMLElementProperties ($params, $properties, $element, $attributes); 655 656 if (!isset($styleName)) { 657 $properties ['style-name'] = ODTStyle::getNewStylename ('span'); 658 } else { 659 // Use callers style name. He needs to be sure that it's unique! 660 $properties ['style-name'] = $styleName; 661 } 662 $params->document->createTextStyle($properties, false); 663 664 // Return style name 665 return $properties ['style-name']; 666 } else { 667 // Style already exists 668 return $styleName; 669 } 670 } 671 672 protected static function createParagraphStyle (ODTInternalParams $params, $element, $attributes, $styleName=NULL) { 673 // Create automatic style 674 if (!isset($styleName) || !$params->document->styleExists($styleName)) { 675 // Get properties 676 $properties = array(); 677 self::getHTMLElementProperties ($params, $properties, $element, $attributes); 678 679 if (!isset($styleName)) { 680 $properties ['style-name'] = ODTStyle::getNewStylename ('span'); 681 } else { 682 // Use callers style name. He needs to be sure that it's unique! 683 $properties ['style-name'] = $styleName; 684 } 685 $params->document->createParagraphStyle($properties, false); 686 687 // Return style name 688 return $properties ['style-name']; 689 } else { 690 // Style already exists 691 return $styleName; 692 } 693 } 694 695 /** 696 * Convenience function for converting some HTML code to ODT format. 697 * The function will try to automatically create any needed ODT styles 698 * from the CSS code found in the HTML code. 699 * 700 * Also some special settings can be passed in the options array: 701 * 702 * $options ['p_style']: 703 * The default paragraph style. If empty 'body' will be used. 704 * 705 * $options ['list_p_style']: 706 * The default paragraph style in lists. If empty 'body' will be used. 707 * 708 * $options ['list_ol_style']: 709 * The default style for ordered lists. If empty 'numbering' will be used. 710 * 711 * $options ['list_ul_style']: 712 * The default style for un-ordered lists. If empty 'list' will be used. 713 * 714 * $options ['media_selector']: 715 * The media selector used for CSS handling (e.g. 'screen' or 'print'). 716 * If empty the current/configured one will be used. 717 * 718 * $options ['element']: 719 * If not empty an HTML tag named '$options ['element']' will be pushed 720 * on the internal HTML stack before converting the $HTMLCode. 721 * This influences CSS handling. 722 * 723 * $options ['attributes']: 724 * The attributes to set for '$options ['element']'. 725 * 726 * $options ['escape_content']: 727 * Should have the value 'true' or 'false' (as string!). If 'true' 728 * XML entities will be escaped. Otherwise it is assumed that it 729 * already has been done. 730 * 731 * $options ['class']: 732 * Optional CSS class to add to found 'class="..."' attributes in 733 * the HTML code. 734 * 735 * $options ['style_names']: 736 * If set to 'prefix_and_class' then ODT style names will not be 737 * generated dynamically but are constructed from '$options ['style_names_prefix']' 738 * following the CSS class name(s). 739 * 740 * $options ['linebreaks']: 741 * If set to 'remove' then linebreaks will be ignored. Otherwise 742 * they will be kept and converted to proper ODT linebreaks. 743 * 744 * $options ['tabs']: 745 * If set to 'remove' then tabs will be ignored. Otherwise they 746 * will be kept and converted to proper ODT tabs. 747 * 748 * $options ['space']: 749 * If set to 'preserve' then space is preserved like for preformatted 750 * code blocks. Otherwise space is not preserved and multiple spaces 751 * will apear as only one space. 752 * 753 * @author LarsDW223 754 * @param ODTInternalParams $params The internal params 755 * @param string $HTMLCode The HTML code to convert 756 * @param array $options Array of options 757 */ 758 public static function generateODTfromHTMLCode(ODTInternalParams $params, $HTMLCode, array $options){ 759 $elements = array ('sup' => array ('open' => '<text:span text:style-name="sup">', 760 'close' => '</text:span>'), 761 'sub' => array ('open' => '<text:span text:style-name="sub">', 762 'close' => '</text:span>'), 763 'u' => array ('open' => '<text:span text:style-name="underline">', 764 'close' => '</text:span>'), 765 'em' => array ('open' => '<text:span text:style-name="Emphasis">', 766 'close' => '</text:span>'), 767 'strong' => array ('open' => '<text:span text:style-name="Strong_20_Emphasis">', 768 'close' => '</text:span>'), 769 'del' => array ('open' => '<text:span text:style-name="del">', 770 'close' => '</text:span>'), 771 'span' => array ('open' => '', 772 'close' => ''), 773 'a' => array ('open' => '', 774 'close' => ''), 775 'ol' => array ('open' => '', 776 'close' => ''), 777 'ul' => array ('open' => '', 778 'close' => ''), 779 'li' => array ('open' => '<text:list-item><text:p text:style-name="Text_20_body">', 780 'close' => '</text:p></text:list-item>'), 781 // In the moment only remove divs 782 'div' => array ('open' => '', 'close' => ''), 783 ); 784 $parsed = array(); 785 786 // remove useless leading and trailing whitespace-newlines 787 $HTMLCode = preg_replace('/^ \n/', '', $HTMLCode); 788 $HTMLCode = preg_replace('/\n $/', '', $HTMLCode); 789 $HTMLCode = str_replace(' ', ' ', $HTMLCode); 790 791 // Get default paragraph style 792 if (!empty($options ['p_style'])) { 793 $p_style = $options ['p_style']; 794 } else { 795 $p_style = $params->document->getStyleName('body'); 796 } 797 798 // Get default list style names 799 if (!empty($options ['list_p_style'])) { 800 $p_list_style = $options ['list_p_style']; 801 } else { 802 $p_list_style = $params->document->getStyleName('body'); 803 } 804 if (!empty($options ['list_ol_style'])) { 805 $ol_list_style = $options ['list_ol_style']; 806 } else { 807 $ol_list_style = $params->document->getStyleName('numbering'); 808 } 809 if (!empty($options ['list_ul_style'])) { 810 $ul_list_style = $options ['list_ul_style']; 811 } else { 812 $ul_list_style = $params->document->getStyleName('list'); 813 } 814 815 // Set new media selector (remember old one) 816 $media = $params->import->getMedia (); 817 if (!empty($options['media_selector'])) { 818 $params->import->setMedia($options['media_selector']); 819 } 820 821 if (!empty($options ['element'])) { 822 $params->htmlStack->open($options ['element'], $options ['attributes']); 823 } 824 825 // First examine $HTMLCode and differ between normal content, 826 // opening tags and closing tags. 827 $max = strlen ($HTMLCode); 828 $pos = 0; 829 while ($pos < $max) { 830 $found = self::getNextTag($HTMLCode, $pos); 831 if ($found !== false) { 832 $entry = array(); 833 $entry ['content'] = substr($HTMLCode, $pos, $found [0]-$pos); 834 if ($entry ['content'] === false) { 835 $entry ['content'] = ''; 836 } 837 $parsed [] = $entry; 838 839 $tagged = substr($HTMLCode, $found [0], $found [1]-$found [0]+1); 840 $entry = array(); 841 842 if ($HTMLCode [$found[1]-1] == '/') { 843 // Element without content <abc/>, doesn'T make sense, save as content 844 $entry ['content'] = $tagged; 845 } else { 846 if ($HTMLCode [$found[0]+1] != '/') { 847 $parts = explode(' ', trim($tagged, '<> '), 2); 848 $entry ['tag-open'] = $parts [0]; 849 if ( isset($parts [1]) ) { 850 $entry ['attributes'] = $parts [1]; 851 } 852 $entry ['tag-orig'] = $tagged; 853 } else { 854 $entry ['tag-close'] = trim ($tagged, '<>/ '); 855 $entry ['tag-orig'] = $tagged; 856 } 857 } 858 $entry ['matched'] = false; 859 $parsed [] = $entry; 860 861 $pos = $found [1]+1; 862 } else { 863 $entry = array(); 864 $entry ['content'] = substr($HTMLCode, $pos); 865 $parsed [] = $entry; 866 break; 867 } 868 } 869 870 // Check each array entry. 871 $checked = array(); 872 $first = true; 873 $firstTag = ''; 874 $olStartValue = NULL; 875 for ($out = 0 ; $out < count($parsed) ; $out++) { 876 if (isset($checked [$out])) { 877 continue; 878 } 879 $found = &$parsed [$out]; 880 if (isset($found ['content'])) { 881 if ($options ['escape_content'] !== 'false') { 882 $checked [$out] = $params->document->replaceXMLEntities($found ['content']); 883 } else { 884 $checked [$out] = $found ['content']; 885 } 886 } else if (isset($found ['tag-open'])) { 887 $closed = false; 888 889 for ($in = $out+1 ; $in < count($parsed) ; $in++) { 890 $search = &$parsed [$in]; 891 if (isset($search ['tag-close']) && 892 $found ['tag-open'] == $search ['tag-close'] && 893 $search ['matched'] === false && 894 array_key_exists($found ['tag-open'], $elements)) { 895 896 $closed = true; 897 $search ['matched'] = true; 898 899 // Remeber the first element 900 if ($first) { 901 $first = false; 902 $firstTag = $found ['tag-open']; 903 } 904 905 // Known and closed tag, convert to ODT 906 switch ($found ['tag-open']) { 907 case 'span': 908 // Create ODT span using CSS style from attributes 909 if (!empty($options ['class'])) { 910 if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) { 911 $class_attr = substr($matches [0], 7); 912 $class_attr = trim($class_attr, '"'); 913 $class_attr = 'class="'.$options ['class'].' '.$class_attr.'"'; 914 $found ['attributes'] = str_replace($matches [0], $class_attr, $found ['attributes']); 915 } 916 } 917 $style_name = NULL; 918 if ($options ['style_names'] == 'prefix_and_class') { 919 if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) { 920 $class_attr = substr($matches [0], 7); 921 $class_attr = trim($class_attr, '"'); 922 $style_name = $options ['style_names_prefix'].$class_attr; 923 } 924 } 925 $style_name = self::createTextStyle ($params, 'span', $found ['attributes'], $style_name); 926 $checked [$out] = '<text:span text:style-name="'.$style_name.'">'; 927 $checked [$in] = '</text:span>'; 928 break; 929 case 'a': 930 $url = self::getLinkURL($found ['attributes']); 931 if (empty($url)) { 932 $url = 'URLNotFoundInXHTMLLink'; 933 } 934 $checked [$out] = $params->document->openHyperlink ($url, NULL, NULL, true); 935 $checked [$in] = $params->document->closeHyperlink (true); 936 break; 937 case 'ul': 938 $checked [$out] = '<text:list text:style-name="'.$ul_list_style.'" text:continue-numbering="false">'; 939 $checked [$in] = '</text:list>'; 940 break; 941 case 'ol': 942 $checked [$out] = '<text:list text:style-name="'.$ol_list_style.'" text:continue-numbering="false">'; 943 $checked [$in] = '</text:list>'; 944 if (preg_match('/start="[^"]*"/', $found ['attributes'], $matches) == 1) { 945 $olStartValue = substr($matches [0], 7); 946 $olStartValue = trim($olStartValue, '"'); 947 } 948 break; 949 case 'li': 950 // Create ODT span using CSS style from attributes 951 $haveClass = false; 952 if (!empty($options ['class'])) { 953 if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) { 954 $class_attr = substr($matches [0], 7); 955 $class_attr = trim($class_attr, '"'); 956 $class_attr = 'class="'.$options ['class'].' '.$class_attr.'"'; 957 $found ['attributes'] = str_replace($matches [0], $class_attr, $found ['attributes']); 958 $haveClass = true; 959 } 960 } 961 $style_name = NULL; 962 if ($options ['style_names'] == 'prefix_and_class') { 963 if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) { 964 $class_attr = substr($matches [0], 7); 965 $class_attr = trim($class_attr, '"'); 966 $style_name = $options ['style_names_prefix'].$class_attr; 967 $haveClass = true; 968 } 969 } 970 if ($haveClass) { 971 $style_name = self::createParagraphStyle ($params, 'li', $found ['attributes'], $style_name); 972 } else { 973 $style_name = $p_list_style; 974 } 975 976 $checked [$out] = '<text:list-item'; 977 if (isset($olStartValue)) { 978 $checked [$out] .= ' text:start-value="'.$olStartValue.'"'; 979 $olStartValue = NULL; 980 } 981 $checked [$out] .= '><text:p text:style-name="'.$style_name.'">'; 982 $checked [$in] = '</text:p></text:list-item>'; 983 break; 984 default: 985 // Simple replacement 986 $checked [$out] = $elements [$found ['tag-open']]['open']; 987 $checked [$in] = $elements [$found ['tag-open']]['close']; 988 break; 989 } 990 break; 991 } 992 } 993 994 // Known tag? Closing tag found? 995 if (!$closed) { 996 // No, save as content 997 if ($options ['escape_content'] !== 'false') { 998 $checked [$out] = $params->document->replaceXMLEntities($found ['tag-orig']); 999 } else { 1000 $checked [$out] = $found ['tag-orig']; 1001 } 1002 } 1003 } else if (isset($found ['tag-close'])) { 1004 // If we find a closing tag it means it did not match 1005 // an opening tag. Convert to content! 1006 $checked [$out] = $params->document->replaceXMLEntities($found ['tag-orig']); 1007 } 1008 } 1009 1010 // Eventually we need to create an enclosing element, open it 1011 switch ($firstTag) { 1012 case 'ol': 1013 case 'ul': 1014 // Close an eventually open paragraph 1015 $params->document->paragraphClose(); 1016 break; 1017 default: 1018 $params->document->paragraphClose(); 1019 $params->document->paragraphOpen($p_style); 1020 break; 1021 } 1022 1023 1024 // Add checked entries to content 1025 $content = ''; 1026 for ($index = 0 ; $index < count($checked) ; $index++) { 1027 $content .= $checked [$index]; 1028 } 1029 1030 // Handle newlines 1031 if ($options ['linebreaks'] !== 'remove') { 1032 $content = str_replace("\n",'<text:line-break/>',$content); 1033 } else { 1034 $content = str_replace("\n",'',$content); 1035 } 1036 1037 // Handle tabs 1038 if ($options ['tabs'] !== 'remove') { 1039 $content = str_replace("\t",'<text:tab/>',$content); 1040 } else { 1041 $content = str_replace("\t",'',$content); 1042 } 1043 1044 // Preserve space? 1045 if ($options ['space'] === 'preserve') { 1046 $content = preg_replace_callback('/( +)/',array(__CLASS__, '_preserveSpace'), $content); 1047 } 1048 1049 $params->content .= $content; 1050 1051 1052 // Eventually we need to create an enclosing element, close it 1053 switch ($firstTag) { 1054 case 'ol': 1055 case 'ul': 1056 // Nothing to do 1057 break; 1058 default: 1059 $params->document->paragraphClose(); 1060 break; 1061 } 1062 1063 // Remove current element from stack, if we created one 1064 if (!empty($options ['element'])) { 1065 $params->htmlStack->removeCurrent(); 1066 } 1067 1068 // Restore media selector 1069 if (!empty($options ['media_selector'])) { 1070 $params->import->setMedia ($media); 1071 } 1072 } 1073} 1074