1<?php 2/** 3 * Utility functions. 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author LarsDW223 7 */ 8 9/** Include csscolors */ 10require_once DOKU_PLUGIN . 'odt/ODT/css/csscolors.php'; 11/** Include cssborder */ 12require_once DOKU_PLUGIN . 'odt/ODT/css/cssborder.php'; 13 14/** 15 * ODTUtility: 16 * Class containing some internal utility functions. 17 * 18 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 19 * @author LarsDW223 20 * @package ODT\Utility 21 */ 22class ODTUtility 23{ 24 /** 25 * Replace local links with bookmark references or text 26 * 27 * @param string $content The document content 28 * @param array $toc The table of contents 29 * @param array $bookmarks List of bookmarks 30 * @param string $styleName Link style name 31 * @param string $visitedStyleName Visited link style name 32 */ 33 public static function replaceLocalLinkPlaceholders(&$content, array $toc, array $bookmarks, $styleName, $visitedStyleName) { 34 $matches = array(); 35 $position = 0; 36 $max = strlen ($content); 37 $length = strlen ('<locallink>'); 38 $lengthWithName = strlen ('<locallink name='); 39 while ( $position < $max ) { 40 $first = strpos ($content, '<locallink', $position); 41 if ( $first === false ) { 42 break; 43 } 44 $endFirst = strpos ($content, '>', $first); 45 if ( $endFirst === false ) { 46 break; 47 } 48 $second = strpos ($content, '</locallink>', $endFirst); 49 if ( $second === false ) { 50 break; 51 } 52 53 // $match includes the whole tag '<locallink name="...">text</locallink>' 54 // The attribute 'name' is optional! 55 $match = substr ($content, $first, $second - $first + $length + 1); 56 $text = substr ($match, $endFirst-$first+1, -($length + 1)); 57 $text = trim ($text, ' '); 58 $text = strtolower ($text); 59 $page = str_replace (' ', '_', $text); 60 $opentag = substr ($match, 0, $endFirst-$first); 61 $name = substr ($opentag, $lengthWithName); 62 $name = trim ($name, '">'); 63 64 $linkStyle = 'text:style-name="'.$styleName.'"'; 65 $linkStyle .= ' text:visited-style-name="'.$visitedStyleName.'"'; 66 67 $found = false; 68 foreach ($toc as $item) { 69 $params = explode (',', $item); 70 71 if ( $page == $params [1] ) { 72 $found = true; 73 $link = '<text:a xlink:type="simple" xlink:href="#'.$params [0].'" '.$linkStyle.'>'; 74 if ( !empty($name) ) { 75 $link .= $name; 76 } else { 77 $link .= $text; 78 } 79 $link .= '</text:a>'; 80 81 $content = str_replace ($match, $link, $content); 82 $position = $first + strlen ($link); 83 } 84 } 85 86 if ( $found == false ) { 87 // Nothing found yet, check the bookmarks too. 88 foreach ($bookmarks as $item) { 89 if ( $page == $item ) { 90 $found = true; 91 $link = '<text:a xlink:type="simple" xlink:href="#'.$item.'" '.$linkStyle.'>'; 92 if ( !empty($name) ) { 93 $link .= $name; 94 } else { 95 $link .= $text; 96 } 97 $link .= '</text:a>'; 98 99 $content = str_replace ($match, $link, $content); 100 $position = $first + strlen ($link); 101 } 102 } 103 } 104 105 if ( $found == false ) { 106 // If we get here, then the referenced target was not found. 107 // There must be a bug manging the bookmarks or links! 108 // At least remove the locallink element and insert text. 109 if ( !empty($name) ) { 110 $content = str_replace ($match, $name, $content); 111 } else { 112 $content = str_replace ($match, $text, $content); 113 } 114 $position = $first + strlen ($text); 115 } 116 } 117 } 118 119 /** 120 * This function deletes the useless elements. Right now, these are empty paragraphs 121 * or paragraphs that only include whitespace. 122 * 123 * IMPORTANT: 124 * Paragraphs can be used for pagebreaks/changing page format. 125 * Such paragraphs may not be deleted! 126 * 127 * @param string $docContent The document content 128 * @param array $preventDeletetionStyles Array of style names which may not be deleted 129 */ 130 public static function deleteUselessElements(&$docContent, array $preventDeletetionStyles) { 131 $length_open = strlen ('<text:p'); 132 $length_close = strlen ('</text:p>'); 133 $max = strlen ($docContent); 134 $pos = 0; 135 136 while ($pos < $max) { 137 $start_open = strpos ($docContent, '<text:p', $pos); 138 if ( $start_open === false ) { 139 break; 140 } 141 $start_close = strpos ($docContent, '>', $start_open + $length_open); 142 if ( $start_close === false ) { 143 break; 144 } 145 $end = strpos ($docContent, '</text:p>', $start_close + 1); 146 if ( $end === false ) { 147 break; 148 } 149 150 $deleted = false; 151 $length = $end - $start_open + $length_close; 152 $content = substr ($docContent, $start_close + 1, $end - ($start_close + 1)); 153 154 if ( empty($content) || ctype_space ($content) ) { 155 // Paragraph is empty or consists of whitespace only. Check style name. 156 $style_start = strpos ($docContent, '"', $start_open); 157 if ( $style_start === false ) { 158 // No '"' found??? Ignore this paragraph. 159 break; 160 } 161 $style_end = strpos ($docContent, '"', $style_start+1); 162 if ( $style_end === false ) { 163 // No '"' found??? Ignore this paragraph. 164 break; 165 } 166 $style_name = substr ($docContent, $style_start+1, $style_end - ($style_start+1)); 167 168 // Only delete empty paragraph if not listed in 'Do not delete' array! 169 if ( !in_array($style_name, $preventDeletetionStyles) ) 170 { 171 $docContent = substr_replace($docContent, '', $start_open, $length); 172 173 $deleted = true; 174 $max -= $length; 175 $pos = $start_open; 176 } 177 } 178 179 if ( $deleted == false ) { 180 $pos = $start_close; 181 } 182 } 183 } 184 185 /** 186 * The function tries to examine the width and height 187 * of the image stored in file $src. 188 * 189 * @param string $src The file name of image 190 * @param int $maxwidth The maximum width the image shall have 191 * @param int $maxheight The maximum height the image shall have 192 * @return array Width and height of the image in centimeters or 193 * both 0 if file doesn't exist. 194 * Just the integer value, no units included. 195 */ 196 public static function getImageSize($src, $maxwidth=NULL, $maxheight=NULL){ 197 if (file_exists($src)) { 198 $info = getimagesize($src); 199 if(!$width){ 200 $width = $info[0]; 201 $height = $info[1]; 202 }else{ 203 $height = round(($width * $info[1]) / $info[0]); 204 } 205 206 if ($maxwidth && $width > $maxwidth) { 207 $height = $height * ($maxwidth/$width); 208 $width = $maxwidth; 209 } 210 if ($maxheight && $height > $maxheight) { 211 $width = $width * ($maxheight/$height); 212 $height = $maxheight; 213 } 214 215 // Convert from pixel to centimeters 216 if ($width) $width = (($width/96.0)*2.54); 217 if ($height) $height = (($height/96.0)*2.54); 218 219 return array($width, $height); 220 } 221 222 return array(0, 0); 223 } 224 225 /** 226 * Return the size of an image in centimeters. 227 * 228 * @param string $src Filepath of the image 229 * @param string|null $width Alternative width 230 * @param string|null $height Alternative height 231 * @param boolean|true $preferImage Prefer original image size 232 * @param ODTUnits $units $ODTUnits object for unit conversion 233 * @return array 234 */ 235 public static function getImageSizeString($src, $width = NULL, $height = NULL, $preferImage=true, ODTUnits $units){ 236 list($width_file, $height_file) = self::getImageSize($src); 237 238 // Get original ratio if possible 239 $ratio = 1; 240 if ($width_file != 0 && $height_file != 0) { 241 $ratio = $height_file/$width_file; 242 } 243 244 if ($width_file != 0 && $preferImage) { 245 $width = $width_file.'cm'; 246 $height = $height_file.'cm'; 247 } else { 248 // convert from pixel to centimeters only if no unit is 249 // specified or if unit is 'px' 250 $unit_width = $units->stripDigits ($width); 251 $unit_height = $units->stripDigits ($height); 252 if ((empty($unit_width) && empty($unit_height)) || 253 ($unit_width == 'px' && $unit_height == 'px')) { 254 if (!$height) { 255 $height = $width * $ratio; 256 } 257 $height = (($height/96.0)*2.54).'cm'; 258 if ($width) $width = (($width/96.0)*2.54).'cm'; 259 } 260 } 261 262 // At this point $width and $height should include a unit 263 264 $width = str_replace(',', '.', $width); 265 $height = str_replace(',', '.', $height); 266 if ($width && $height) { 267 // Don't be wider than the page 268 if ($width >= 17){ // FIXME : this assumes A4 page format with 2cm margins 269 $width = $width.'" style:rel-width="100%'; 270 $height = $height.'" style:rel-height="scale'; 271 } else { 272 $width = $width; 273 $height = $height; 274 } 275 } else { 276 // external image and unable to download, fallback 277 if (!$width) { 278 $width = '" svg:rel-width="100%'; 279 } 280 if (!$height) { 281 $height = '" svg:rel-height="100%'; 282 } 283 } 284 return array($width, $height); 285 } 286 287 /** 288 * Split $value by whitespace and convert any relative values (%) 289 * into an absolute value. This is done by taking the percentage of 290 * $maxWidthInPt. 291 * 292 * @param string $value String (Property value) 293 * @param integer $maxWidthInPt Maximum width in points 294 * @param ODTUnits $units $ODTUnits object for unit conversion 295 * @return string 296 */ 297 protected static function adjustPercentageValueParts ($value, $maxWidthInPt, $units) { 298 $values = preg_split ('/\s+/', $value); 299 $value = ''; 300 foreach ($values as $part) { 301 $length = strlen ($part); 302 303 if ( $length > 1 && $part [$length-1] == '%' ) { 304 $percentageValue = $units->getDigits($part); 305 $part = (($percentageValue * $maxWidthInPt)/100) . 'pt'; 306 //$part = '5pt '; 307 } 308 309 $value .= ' '.$part; 310 } 311 $value = trim($value); 312 $value = trim($value, '"'); 313 314 return $value; 315 } 316 317 /** 318 * The function adjusts the properties values for ODT: 319 * - 'em' units are converted to 'pt' units 320 * - CSS color names are converted to its RGB value 321 * - short color values like #fff are converted to the long format, e.g #ffffff 322 * - some relative values are converted to absoulte depending on other 323 * values e.g. 'line-height' an 'font-size' 324 * 325 * @author LarsDW223 326 * 327 * @param array $properties Array with property value pairs 328 * @param ODTUnits $units Units object to use for conversion 329 * @param integer $maxWidth Units object to use for conversion 330 */ 331 public static function adjustValuesForODT (&$properties, ODTUnits $units, $maxWidth=NULL) { 332 $adjustToMaxWidth = array('margin', 'margin-left', 'margin-right', 'margin-top', 'margin-bottom'); 333 334 // Convert 'text-decoration'. 335 if ( $properties ['text-decoration'] == 'line-through' ) { 336 $properties ['text-line-through-style'] = 'solid'; 337 } 338 if ( $properties ['text-decoration'] == 'underline' ) { 339 $properties ['text-underline-style'] = 'solid'; 340 } 341 if ( $properties ['text-decoration'] == 'overline' ) { 342 $properties ['text-overline-style'] = 'solid'; 343 } 344 345 // Normalize border properties 346 cssborder::normalize($properties); 347 348 // First do simple adjustments per property 349 foreach ($properties as $property => $value) { 350 $properties [$property] = self::adjustValueForODT ($property, $value, $units); 351 } 352 353 // Adjust relative margins if $maxWidth is given. 354 // $maxWidth is expected to be the width of the surrounding element. 355 if ($maxWidth != NULL) { 356 $maxWidthInPt = $units->toPoints($maxWidth, 'y'); 357 $maxWidthInPt = $units->getDigits($maxWidthInPt); 358 359 foreach ($adjustToMaxWidth as $property) { 360 if (!empty($properties [$property])) { 361 $properties [$property] = self::adjustPercentageValueParts ($properties [$property], $maxWidthInPt, $units); 362 } 363 } 364 } 365 366 // Now we do the adjustments for which one value depends on another 367 368 // Do we have font-size or line-height set? 369 if ($properties ['font-size'] != NULL || $properties ['line-height'] != NULL) { 370 // First get absolute font-size in points 371 $base_font_size_in_pt = $units->getPixelPerEm ().'px'; 372 $base_font_size_in_pt = $units->toPoints($base_font_size_in_pt, 'y'); 373 $base_font_size_in_pt = $units->getDigits($base_font_size_in_pt); 374 if ($properties ['font-size'] != NULL) { 375 $font_size_unit = $units->stripDigits($properties ['font-size']); 376 $font_size_digits = $units->getDigits($properties ['font-size']); 377 if ($font_size_unit == '%') { 378 $base_font_size_in_pt = ($font_size_digits * $base_font_size_in_pt)/100; 379 $properties ['font-size'] = $base_font_size_in_pt.'pt'; 380 } elseif ($font_size_unit != 'pt') { 381 $properties ['font-size'] = $units->toPoints($properties ['font-size'], 'y'); 382 $base_font_size_in_pt = $units->getDigits($properties ['font-size']); 383 } else { 384 $base_font_size_in_pt = $units->getDigits($properties ['font-size']); 385 } 386 } 387 388 // Convert relative line-heights to absolute 389 if ($properties ['line-height'] != NULL) { 390 $line_height_unit = $units->stripDigits($properties ['line-height']); 391 $line_height_digits = $units->getDigits($properties ['line-height']); 392 if ($line_height_unit == '%') { 393 $properties ['line-height'] = (($line_height_digits * $base_font_size_in_pt)/100).'pt'; 394 } elseif (empty($line_height_unit)) { 395 $properties ['line-height'] = ($line_height_digits * $base_font_size_in_pt).'pt'; 396 } 397 } 398 } 399 } 400 401 /** 402 * The function adjusts the property value for ODT: 403 * - 'em' units are converted to 'pt' units 404 * - CSS color names are converted to its RGB value 405 * - short color values like #fff are converted to the long format, e.g #ffffff 406 * 407 * @author LarsDW223 408 * 409 * @param string $property The property name 410 * @param string $value The value 411 * @param ODTUnits $units Units object to use for conversion 412 * @return string Converted value 413 */ 414 public static function adjustValueForODT ($property, $value, ODTUnits $units) { 415 if ($property == 'font-family') { 416 // There might be several font/font-families included. 417 // Only take the first one. 418 $value = trim($value, '"'); 419 if (strpos($value, ',') !== false) { 420 $values = explode(',', $value); 421 $value = trim ($values [0], '"'); 422 $value = trim ($value, "'"); 423 $value = trim ($value); 424 } 425 } else { 426 $values = preg_split ('/\s+/', $value); 427 $value = ''; 428 foreach ($values as $part) { 429 $length = strlen ($part); 430 431 // If it is a short color value (#xxx) then convert it to long value (#xxxxxx) 432 // (ODT does not support the short form) 433 if ( $part [0] == '#' && $length == 4 ) { 434 $part = '#'.$part [1].$part [1].$part [2].$part [2].$part [3].$part [3]; 435 } else { 436 // If it is a CSS color name, get it's real color value 437 $color = csscolors::getColorValue ($part); 438 if ( $part == 'black' || $color != '#000000' ) { 439 $part = $color; 440 } 441 } 442 443 if ( $length > 2 && $part [$length-2] == 'e' && $part [$length-1] == 'm' ) { 444 $part = $units->toPoints($part, 'y'); 445 } 446 447 if ( $length > 2 && ($part [$length-2] != 'p' || $part [$length-1] != 't') && 448 strpos($property, 'border')!==false ) { 449 $part = $units->toPoints($part, 'y'); 450 } 451 452 // Some values can have '"' in it. These need to be converted to ''' 453 // e.g. 'font-family' tp specify that '"Courier New"' is one font name not two 454 $part = str_replace('"', ''', $part); 455 456 $value .= ' '.$part; 457 } 458 $value = trim($value); 459 $value = trim($value, '"'); 460 } 461 462 return $value; 463 } 464 465 /** 466 * This function processes the CSS style declarations in $style and saves them in $properties 467 * as key - value pairs, e.g. $properties ['color'] = 'red'. It also adjusts the values 468 * for the ODT format and changes URLs to local paths if required, using $baseURL). 469 * 470 * @author LarsDW223 471 * @param array $properties 472 * @param string $style The CSS style e.g. 'color:red;' 473 * @param string|null $baseURL 474 * @param ODTUnits $units Units object to use for conversion 475 * @param integer $maxWidth MaximumWidth 476 */ 477 public static function getCSSStylePropertiesForODT(&$properties, $style, $baseURL = NULL, ODTUnits $units, $maxWidth=NULL){ 478 // Create rule with selector '*' (doesn't matter) and declarations as set in $style 479 $rule = new css_rule ('*', $style); 480 $rule->getProperties ($properties); 481 //foreach ($properties as $property => $value) { 482 // $properties [$property] = self::adjustValueForODT ($property, $value, $units); 483 //} 484 self::adjustValuesForODT ($properties, $units, $maxWidth); 485 486 if ( !empty ($properties ['background-image']) ) { 487 if ( !empty ($baseURL) ) { 488 // Replace 'url(...)' with $baseURL 489 $properties ['background-image'] = cssimportnew::replaceURLPrefix ($properties ['background-image'], $baseURL); 490 } 491 } 492 } 493 494 /** 495 * The function opens/puts a new element on the HTML stack in $params->htmlStack. 496 * The element name will be $element and it will be created with the attributes $attributes. 497 * Then CSS matching is performed and the CSS properties are returned in $dest. 498 * Finally the CSS properties are converted to ODT format if neccessary. 499 * 500 * @author LarsDW223 501 * @param ODTInternalParams $params Commom params. 502 * @param array $dest Target array for properties storage 503 * @param string $element The element's name 504 * @param string $attributes The element's attributes 505 * @param integer $maxWidth Maximum Width 506 */ 507 public static function openHTMLElement (ODTInternalParams $params, array &$dest, $element, $attributes, $maxWidth=NULL) { 508 // Push/create our element to import on the stack 509 $params->htmlStack->open($element, $attributes); 510 $toMatch = $params->htmlStack->getCurrentElement(); 511 $params->import->getPropertiesForElement($dest, $toMatch, $params->units); 512 513 // Adjust values for ODT 514 self::adjustValuesForODT($dest, $params->units, $maxWidth); 515 } 516 517 /** 518 * The function closes element with name $element on the HTML stack in $params->htmlStack. 519 * 520 * @author LarsDW223 521 * @param ODTInternalParams $params Commom params. 522 * @param string $element The element's name 523 */ 524 public static function closeHTMLElement (ODTInternalParams $params, $element) { 525 $params->htmlStack->close($element); 526 } 527 528 /** 529 * The function temporarily opens/puts a new element on the HTML stack in $params->htmlStack. 530 * Before leaving the function the element is removed from the stack. 531 * 532 * The element name will be $element and it will be created with the attributes $attributes. 533 * After opening the element CSS matching is performed and the CSS properties are returned in $dest. 534 * Finally the CSS properties are converted to ODT format if neccessary. 535 * 536 * @author LarsDW223 537 * @param ODTInternalParams $params Commom params. 538 * @param array $dest Target array for properties storage 539 * @param string $element The element's name 540 * @param string $attributes The element's attributes 541 * @param integer $maxWidth Maximum Width 542 * @param boolean $inherit Enable/disable CSS inheritance 543 */ 544 public static function getHTMLElementProperties (ODTInternalParams $params, array &$dest, $element, $attributes, $maxWidth=NULL, $inherit=true) { 545 // Push/create our element to import on the stack 546 $params->htmlStack->open($element, $attributes); 547 $toMatch = $params->htmlStack->getCurrentElement(); 548 $params->import->getPropertiesForElement($dest, $toMatch, $params->units, $inherit); 549 550 // Adjust values for ODT 551 self::adjustValuesForODT($dest, $params->units, $maxWidth); 552 553 // Remove element from stack 554 $params->htmlStack->removeCurrent(); 555 } 556 557 /** 558 * Small helper function for finding the next tag enclosed in <angle> brackets. 559 * Returns beginning and end of the tag as an array [0] = start, [1] = end. 560 * 561 * @author LarsDW223 562 * @param string $content Code to search in. 563 * @param string $pos Start position for searching. 564 * @return array 565 */ 566 public static function getNextTag (&$content, $pos) { 567 $start = strpos ($content, '<', $pos); 568 if ($start === false) { 569 return false; 570 } 571 $end = strpos ($content, '>', $pos); 572 if ($end === false) { 573 return false; 574 } 575 return array($start, $end); 576 } 577 578 /** 579 * The function returns $value as a valid IRI and replaces some signs 580 * if neccessary, e.g. '&' will be replaced by '&'. 581 * The function will not do double replacements, e.g. if the string 582 * already includes a '&' it will NOT become '&amp;'. 583 * 584 * @author LarsDW223 585 * @param string $value String to be converted to IRI 586 * @return string 587 */ 588 public static function stringToIRI ($value) { 589 $max = strlen ($value); 590 for ($pos = 0 ; $pos < $max ; $pos++) { 591 switch ($value [$pos]) { 592 case '&': 593 if ($max - $pos >= 4 && 594 $value [$pos+1] == '#' && 595 $value [$pos+2] == '3' && 596 $value [$pos+3] == '8' && 597 $value [$pos+4] == ';') { 598 // '&' must be replaced with "&" 599 $value [$pos+1] = 'a'; 600 $value [$pos+2] = 'm'; 601 $value [$pos+3] = 'p'; 602 $pos += 4; 603 } else if ($max - $pos < 4 || 604 $value [$pos+1] != 'a' || 605 $value [$pos+2] != 'm' || 606 $value [$pos+3] != 'p' || 607 $value [$pos+4] != ';' ) { 608 // '&' must be replaced with "&" 609 $new = substr($value, 0, $pos+1); 610 $new .= 'amp;'; 611 $new .= substr($value, $pos+1); 612 $value = $new; 613 $max += 4; 614 $pos += 4; 615 } 616 break; 617 } 618 } 619 return $value; 620 } 621 622 protected static function getLinkURL ($search) { 623 preg_match ('/href="[^"]*"/', $search, $matches); 624 $url = substr ($matches[0], 5); 625 $url = trim($url, '"'); 626 // Keep '&' and ':' in the link URL unescaped, otherwise url parameter passing will not work 627 $url = str_replace('&', '&', $url); 628 $url = str_replace('%3A', ':', $url); 629 630 return $url; 631 } 632 633 /** 634 * static call back to replace spaces 635 * 636 * @param array $matches 637 * @return string 638 */ 639 protected static function _preserveSpace($matches){ 640 $spaces = $matches[1]; 641 $len = strlen($spaces); 642 return '<text:s text:c="'.$len.'"/>'; 643 } 644 645 protected static function createTextStyle (ODTInternalParams $params, $element, $attributes, $styleName=NULL) { 646 // Create automatic style 647 if ($styleName == NULL || !$params->document->styleExists($styleName)) { 648 // Get properties 649 $properties = array(); 650 self::getHTMLElementProperties ($params, $properties, $element, $attributes); 651 652 if ($styleName == NULL) { 653 $properties ['style-name'] = ODTStyle::getNewStylename ('span'); 654 } else { 655 // Use callers style name. He needs to be sure that it's unique! 656 $properties ['style-name'] = $styleName; 657 } 658 $params->document->createTextStyle($properties, false); 659 660 // Return style name 661 return $properties ['style-name']; 662 } else { 663 // Style already exists 664 return $styleName; 665 } 666 } 667 668 protected static function createParagraphStyle (ODTInternalParams $params, $element, $attributes, $styleName=NULL) { 669 // Create automatic style 670 if ($styleName == NULL || !$params->document->styleExists($styleName)) { 671 // Get properties 672 $properties = array(); 673 self::getHTMLElementProperties ($params, $properties, $element, $attributes); 674 675 if ($styleName == NULL) { 676 $properties ['style-name'] = ODTStyle::getNewStylename ('span'); 677 } else { 678 // Use callers style name. He needs to be sure that it's unique! 679 $properties ['style-name'] = $styleName; 680 } 681 $params->document->createParagraphStyle($properties, false); 682 683 // Return style name 684 return $properties ['style-name']; 685 } else { 686 // Style already exists 687 return $styleName; 688 } 689 } 690 691 /** 692 * Convenience function for converting some HTML code to ODT format. 693 * The function will try to automatically create any needed ODT styles 694 * from the CSS code found in the HTML code. 695 * 696 * Also some special settings can be passed in the options array: 697 * 698 * $options ['p_style']: 699 * The default paragraph style. If empty 'body' will be used. 700 * 701 * $options ['list_p_style']: 702 * The default paragraph style in lists. If empty 'body' will be used. 703 * 704 * $options ['list_ol_style']: 705 * The default style for ordered lists. If empty 'numbering' will be used. 706 * 707 * $options ['list_ul_style']: 708 * The default style for un-ordered lists. If empty 'list' will be used. 709 * 710 * $options ['media_selector']: 711 * The media selector used for CSS handling (e.g. 'screen' or 'print'). 712 * If empty the current/configured one will be used. 713 * 714 * $options ['element']: 715 * If not empty an HTML tag named '$options ['element']' will be pushed 716 * on the internal HTML stack before converting the $HTMLCode. 717 * This influences CSS handling. 718 * 719 * $options ['attributes']: 720 * The attributes to set for '$options ['element']'. 721 * 722 * $options ['escape_content']: 723 * Should have the value 'true' or 'false' (as string!). If 'true' 724 * XML entities will be escaped. Otherwise it is assumed that it 725 * already has been done. 726 * 727 * $options ['class']: 728 * Optional CSS class to add to found 'class="..."' attributes in 729 * the HTML code. 730 * 731 * $options ['style_names']: 732 * If set to 'prefix_and_class' then ODT style names will not be 733 * generated dynamically but are constructed from '$options ['style_names_prefix']' 734 * following the CSS class name(s). 735 * 736 * $options ['linebreaks']: 737 * If set to 'remove' then linebreaks will be ignored. Otherwise 738 * they will be kept and converted to proper ODT linebreaks. 739 * 740 * $options ['tabs']: 741 * If set to 'remove' then tabs will be ignored. Otherwise they 742 * will be kept and converted to proper ODT tabs. 743 * 744 * $options ['space']: 745 * If set to 'preserve' then space is preserved like for preformatted 746 * code blocks. Otherwise space is not preserved and multiple spaces 747 * will apear as only one space. 748 * 749 * @author LarsDW223 750 * @param ODTInternalParams $params The internal params 751 * @param string $HTMLCode The HTML code to convert 752 * @param array $options Array of options 753 */ 754 public static function generateODTfromHTMLCode(ODTInternalParams $params, $HTMLCode, array $options){ 755 $elements = array ('sup' => array ('open' => '<text:span text:style-name="sup">', 756 'close' => '</text:span>'), 757 'sub' => array ('open' => '<text:span text:style-name="sub">', 758 'close' => '</text:span>'), 759 'u' => array ('open' => '<text:span text:style-name="underline">', 760 'close' => '</text:span>'), 761 'em' => array ('open' => '<text:span text:style-name="Emphasis">', 762 'close' => '</text:span>'), 763 'strong' => array ('open' => '<text:span text:style-name="Strong_20_Emphasis">', 764 'close' => '</text:span>'), 765 'del' => array ('open' => '<text:span text:style-name="del">', 766 'close' => '</text:span>'), 767 'span' => array ('open' => '', 768 'close' => ''), 769 'a' => array ('open' => '', 770 'close' => ''), 771 'ol' => array ('open' => '', 772 'close' => ''), 773 'ul' => array ('open' => '', 774 'close' => ''), 775 'li' => array ('open' => '<text:list-item><text:p text:style-name="Text_20_body">', 776 'close' => '</text:p></text:list-item>'), 777 // In the moment only remove divs 778 'div' => array ('open' => '', 'close' => ''), 779 ); 780 $parsed = array(); 781 782 // remove useless leading and trailing whitespace-newlines 783 $HTMLCode = preg_replace('/^ \n/', '', $HTMLCode); 784 $HTMLCode = preg_replace('/\n $/', '', $HTMLCode); 785 $HTMLCode = str_replace(' ', ' ', $HTMLCode); 786 787 // Get default paragraph style 788 if (!empty($options ['p_style'])) { 789 $p_style = $options ['p_style']; 790 } else { 791 $p_style = $params->document->getStyleName('body'); 792 } 793 794 // Get default list style names 795 if (!empty($options ['list_p_style'])) { 796 $p_list_style = $options ['list_p_style']; 797 } else { 798 $p_list_style = $params->document->getStyleName('body'); 799 } 800 if (!empty($options ['list_ol_style'])) { 801 $ol_list_style = $options ['list_ol_style']; 802 } else { 803 $ol_list_style = $params->document->getStyleName('numbering'); 804 } 805 if (!empty($options ['list_ul_style'])) { 806 $ul_list_style = $options ['list_ul_style']; 807 } else { 808 $ul_list_style = $params->document->getStyleName('list'); 809 } 810 811 // Set new media selector (remember old one) 812 $media = $params->import->getMedia (); 813 if (!empty($options ['media_selector'])) { 814 $params->import->setMedia ($options ['media_selector']); 815 } 816 817 if (!empty($options ['element'])) { 818 $params->htmlStack->open($options ['element'], $options ['attributes']); 819 } 820 821 // First examine $HTMLCode and differ between normal content, 822 // opening tags and closing tags. 823 $max = strlen ($HTMLCode); 824 $pos = 0; 825 while ($pos < $max) { 826 $found = self::getNextTag($HTMLCode, $pos); 827 if ($found !== false) { 828 $entry = array(); 829 $entry ['content'] = substr($HTMLCode, $pos, $found [0]-$pos); 830 if ($entry ['content'] === false) { 831 $entry ['content'] = ''; 832 } 833 $parsed [] = $entry; 834 835 $tagged = substr($HTMLCode, $found [0], $found [1]-$found [0]+1); 836 $entry = array(); 837 838 if ($HTMLCode [$found[1]-1] == '/') { 839 // Element without content <abc/>, doesn'T make sense, save as content 840 $entry ['content'] = $tagged; 841 } else { 842 if ($HTMLCode [$found[0]+1] != '/') { 843 $parts = explode(' ', trim($tagged, '<> '), 2); 844 $entry ['tag-open'] = $parts [0]; 845 if ($parts [1] !== NULL ) { 846 $entry ['attributes'] = $parts [1]; 847 } 848 $entry ['tag-orig'] = $tagged; 849 } else { 850 $entry ['tag-close'] = trim ($tagged, '<>/ '); 851 $entry ['tag-orig'] = $tagged; 852 } 853 } 854 $entry ['matched'] = false; 855 $parsed [] = $entry; 856 857 $pos = $found [1]+1; 858 } else { 859 $entry = array(); 860 $entry ['content'] = substr($HTMLCode, $pos); 861 $parsed [] = $entry; 862 break; 863 } 864 } 865 866 // Check each array entry. 867 $checked = array(); 868 $first = true; 869 $firstTag = ''; 870 $olStartValue = NULL; 871 for ($out = 0 ; $out < count($parsed) ; $out++) { 872 if ($checked [$out] !== NULL) { 873 continue; 874 } 875 $found = &$parsed [$out]; 876 if ($found ['content'] !== NULL) { 877 if ($options ['escape_content'] !== 'false') { 878 $checked [$out] = $params->document->replaceXMLEntities($found ['content']); 879 } else { 880 $checked [$out] = $found ['content']; 881 } 882 } else if ($found ['tag-open'] !== NULL) { 883 $closed = false; 884 885 for ($in = $out+1 ; $in < count($parsed) ; $in++) { 886 $search = &$parsed [$in]; 887 if ($search ['tag-close'] !== NULL && 888 $found ['tag-open'] == $search ['tag-close'] && 889 $search ['matched'] === false && 890 array_key_exists($found ['tag-open'], $elements)) { 891 892 $closed = true; 893 $search ['matched'] = true; 894 895 // Remeber the first element 896 if ($first) { 897 $first = false; 898 $firstTag = $found ['tag-open']; 899 } 900 901 // Known and closed tag, convert to ODT 902 switch ($found ['tag-open']) { 903 case 'span': 904 // Create ODT span using CSS style from attributes 905 if (!empty($options ['class'])) { 906 if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) { 907 $class_attr = substr($matches [0], 7); 908 $class_attr = trim($class_attr, '"'); 909 $class_attr = 'class="'.$options ['class'].' '.$class_attr.'"'; 910 $found ['attributes'] = str_replace($matches [0], $class_attr, $found ['attributes']); 911 } 912 } 913 $style_name = NULL; 914 if ($options ['style_names'] == 'prefix_and_class') { 915 if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) { 916 $class_attr = substr($matches [0], 7); 917 $class_attr = trim($class_attr, '"'); 918 $style_name = $options ['style_names_prefix'].$class_attr; 919 } 920 } 921 $style_name = self::createTextStyle ($params, 'span', $found ['attributes'], $style_name); 922 $checked [$out] = '<text:span text:style-name="'.$style_name.'">'; 923 $checked [$in] = '</text:span>'; 924 break; 925 case 'a': 926 $url = self::getLinkURL($found ['attributes']); 927 if (empty($url)) { 928 $url = 'URLNotFoundInXHTMLLink'; 929 } 930 $checked [$out] = $params->document->openHyperlink ($url, NULL, NULL, true); 931 $checked [$in] = $params->document->closeHyperlink (true); 932 break; 933 case 'ul': 934 $checked [$out] = '<text:list text:style-name="'.$ul_list_style.'" text:continue-numbering="false">'; 935 $checked [$in] = '</text:list>'; 936 break; 937 case 'ol': 938 $checked [$out] = '<text:list text:style-name="'.$ol_list_style.'" text:continue-numbering="false">'; 939 $checked [$in] = '</text:list>'; 940 if (preg_match('/start="[^"]*"/', $found ['attributes'], $matches) == 1) { 941 $olStartValue = substr($matches [0], 7); 942 $olStartValue = trim($olStartValue, '"'); 943 } 944 break; 945 case 'li': 946 // Create ODT span using CSS style from attributes 947 $haveClass = false; 948 if (!empty($options ['class'])) { 949 if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) { 950 $class_attr = substr($matches [0], 7); 951 $class_attr = trim($class_attr, '"'); 952 $class_attr = 'class="'.$options ['class'].' '.$class_attr.'"'; 953 $found ['attributes'] = str_replace($matches [0], $class_attr, $found ['attributes']); 954 $haveClass = true; 955 } 956 } 957 $style_name = NULL; 958 if ($options ['style_names'] == 'prefix_and_class') { 959 if (preg_match('/class="[^"]*"/', $found ['attributes'], $matches) == 1) { 960 $class_attr = substr($matches [0], 7); 961 $class_attr = trim($class_attr, '"'); 962 $style_name = $options ['style_names_prefix'].$class_attr; 963 $haveClass = true; 964 } 965 } 966 if ($haveClass) { 967 $style_name = self::createParagraphStyle ($params, 'li', $found ['attributes'], $style_name); 968 } else { 969 $style_name = $p_list_style; 970 } 971 972 $checked [$out] = '<text:list-item'; 973 if ($olStartValue !== NULL) { 974 $checked [$out] .= ' text:start-value="'.$olStartValue.'"'; 975 $olStartValue = NULL; 976 } 977 $checked [$out] .= '><text:p text:style-name="'.$style_name.'">'; 978 $checked [$in] = '</text:p></text:list-item>'; 979 break; 980 default: 981 // Simple replacement 982 $checked [$out] = $elements [$found ['tag-open']]['open']; 983 $checked [$in] = $elements [$found ['tag-open']]['close']; 984 break; 985 } 986 break; 987 } 988 } 989 990 // Known tag? Closing tag found? 991 if (!$closed) { 992 // No, save as content 993 if ($options ['escape_content'] !== 'false') { 994 $checked [$out] = $params->document->replaceXMLEntities($found ['tag-orig']); 995 } else { 996 $checked [$out] = $found ['tag-orig']; 997 } 998 } 999 } else if ($found ['tag-close'] !== NULL) { 1000 // If we find a closing tag it means it did not match 1001 // an opening tag. Convert to content! 1002 $checked [$out] = $params->document->replaceXMLEntities($found ['tag-orig']); 1003 } 1004 } 1005 1006 // Eventually we need to create an enclosing element, open it 1007 switch ($firstTag) { 1008 case 'ol': 1009 case 'ul': 1010 // Close an eventually open paragraph 1011 $params->document->paragraphClose(); 1012 break; 1013 default: 1014 $params->document->paragraphClose(); 1015 $params->document->paragraphOpen($p_style); 1016 break; 1017 } 1018 1019 1020 // Add checked entries to content 1021 $content = ''; 1022 for ($index = 0 ; $index < count($checked) ; $index++) { 1023 $content .= $checked [$index]; 1024 } 1025 1026 // Handle newlines 1027 if ($options ['linebreaks'] !== 'remove') { 1028 $content = str_replace("\n",'<text:line-break/>',$content); 1029 } else { 1030 $content = str_replace("\n",'',$content); 1031 } 1032 1033 // Handle tabs 1034 if ($options ['tabs'] !== 'remove') { 1035 $content = str_replace("\t",'<text:tab/>',$content); 1036 } else { 1037 $content = str_replace("\t",'',$content); 1038 } 1039 1040 // Preserve space? 1041 if ($options ['space'] === 'preserve') { 1042 $content = preg_replace_callback('/( +)/',array(__CLASS__, '_preserveSpace'), $content); 1043 } 1044 1045 $params->content .= $content; 1046 1047 1048 // Eventually we need to create an enclosing element, close it 1049 switch ($firstTag) { 1050 case 'ol': 1051 case 'ul': 1052 // Nothing to do 1053 break; 1054 default: 1055 $params->document->paragraphClose(); 1056 break; 1057 } 1058 1059 // Remove current element from stack, if we created one 1060 if (!empty($options ['element'])) { 1061 $params->htmlStack->removeCurrent(); 1062 } 1063 1064 // Restore media selector 1065 if (!empty($options ['media_selector'])) { 1066 $params->import->setMedia ($media); 1067 } 1068 } 1069} 1070