1<?php 2 3namespace Sabre\VObject\Parser; 4 5use 6 Sabre\VObject\ParseException, 7 Sabre\VObject\EofException, 8 Sabre\VObject\Component, 9 Sabre\VObject\Property, 10 Sabre\VObject\Component\VCalendar, 11 Sabre\VObject\Component\VCard; 12 13/** 14 * MimeDir parser. 15 * 16 * This class parses iCalendar 2.0 and vCard 2.1, 3.0 and 4.0 files. This 17 * parser will return one of the following two objects from the parse method: 18 * 19 * Sabre\VObject\Component\VCalendar 20 * Sabre\VObject\Component\VCard 21 * 22 * @copyright Copyright (C) 2011-2015 fruux GmbH (https://fruux.com/). 23 * @author Evert Pot (http://evertpot.com/) 24 * @license http://sabre.io/license/ Modified BSD License 25 */ 26class MimeDir extends Parser { 27 28 /** 29 * The input stream. 30 * 31 * @var resource 32 */ 33 protected $input; 34 35 /** 36 * Root component 37 * 38 * @var Component 39 */ 40 protected $root; 41 42 /** 43 * Parses an iCalendar or vCard file 44 * 45 * Pass a stream or a string. If null is parsed, the existing buffer is 46 * used. 47 * 48 * @param string|resource|null $input 49 * @param int|null $options 50 * @return array 51 */ 52 public function parse($input = null, $options = null) { 53 54 $this->root = null; 55 if (!is_null($input)) { 56 57 $this->setInput($input); 58 59 } 60 61 if (!is_null($options)) $this->options = $options; 62 63 $this->parseDocument(); 64 65 return $this->root; 66 67 } 68 69 /** 70 * Sets the input buffer. Must be a string or stream. 71 * 72 * @param resource|string $input 73 * @return void 74 */ 75 public function setInput($input) { 76 77 // Resetting the parser 78 $this->lineIndex = 0; 79 $this->startLine = 0; 80 81 if (is_string($input)) { 82 // Convering to a stream. 83 $stream = fopen('php://temp', 'r+'); 84 fwrite($stream, $input); 85 rewind($stream); 86 $this->input = $stream; 87 } elseif (is_resource($input)) { 88 $this->input = $input; 89 } else { 90 throw new \InvalidArgumentException('This parser can only read from strings or streams.'); 91 } 92 93 } 94 95 /** 96 * Parses an entire document. 97 * 98 * @return void 99 */ 100 protected function parseDocument() { 101 102 $line = $this->readLine(); 103 104 // BOM is ZERO WIDTH NO-BREAK SPACE (U+FEFF). 105 // It's 0xEF 0xBB 0xBF in UTF-8 hex. 106 if ( 3 <= strlen($line) 107 && ord($line[0]) === 0xef 108 && ord($line[1]) === 0xbb 109 && ord($line[2]) === 0xbf) { 110 $line = substr($line, 3); 111 } 112 113 switch(strtoupper($line)) { 114 case 'BEGIN:VCALENDAR' : 115 $class = isset(VCalendar::$componentMap['VCALENDAR']) 116 ? VCalendar::$componentMap[$name] 117 : 'Sabre\\VObject\\Component\\VCalendar'; 118 break; 119 case 'BEGIN:VCARD' : 120 $class = isset(VCard::$componentMap['VCARD']) 121 ? VCard::$componentMap['VCARD'] 122 : 'Sabre\\VObject\\Component\\VCard'; 123 break; 124 default : 125 throw new ParseException('This parser only supports VCARD and VCALENDAR files'); 126 } 127 128 $this->root = new $class(array(), false); 129 130 while(true) { 131 132 // Reading until we hit END: 133 $line = $this->readLine(); 134 if (strtoupper(substr($line,0,4)) === 'END:') { 135 break; 136 } 137 $result = $this->parseLine($line); 138 if ($result) { 139 $this->root->add($result); 140 } 141 142 } 143 144 $name = strtoupper(substr($line, 4)); 145 if ($name!==$this->root->name) { 146 throw new ParseException('Invalid MimeDir file. expected: "END:' . $this->root->name . '" got: "END:' . $name . '"'); 147 } 148 149 } 150 151 /** 152 * Parses a line, and if it hits a component, it will also attempt to parse 153 * the entire component 154 * 155 * @param string $line Unfolded line 156 * @return Node 157 */ 158 protected function parseLine($line) { 159 160 // Start of a new component 161 if (strtoupper(substr($line, 0, 6)) === 'BEGIN:') { 162 163 $component = $this->root->createComponent(substr($line,6), array(), false); 164 165 while(true) { 166 167 // Reading until we hit END: 168 $line = $this->readLine(); 169 if (strtoupper(substr($line,0,4)) === 'END:') { 170 break; 171 } 172 $result = $this->parseLine($line); 173 if ($result) { 174 $component->add($result); 175 } 176 177 } 178 179 $name = strtoupper(substr($line, 4)); 180 if ($name!==$component->name) { 181 throw new ParseException('Invalid MimeDir file. expected: "END:' . $component->name . '" got: "END:' . $name . '"'); 182 } 183 184 return $component; 185 186 } else { 187 188 // Property reader 189 $property = $this->readProperty($line); 190 if (!$property) { 191 // Ignored line 192 return false; 193 } 194 return $property; 195 196 } 197 198 } 199 200 /** 201 * We need to look ahead 1 line every time to see if we need to 'unfold' 202 * the next line. 203 * 204 * If that was not the case, we store it here. 205 * 206 * @var null|string 207 */ 208 protected $lineBuffer; 209 210 /** 211 * The real current line number. 212 */ 213 protected $lineIndex = 0; 214 215 /** 216 * In the case of unfolded lines, this property holds the line number for 217 * the start of the line. 218 * 219 * @var int 220 */ 221 protected $startLine = 0; 222 223 /** 224 * Contains a 'raw' representation of the current line. 225 * 226 * @var string 227 */ 228 protected $rawLine; 229 230 /** 231 * Reads a single line from the buffer. 232 * 233 * This method strips any newlines and also takes care of unfolding. 234 * 235 * @throws \Sabre\VObject\EofException 236 * @return string 237 */ 238 protected function readLine() { 239 240 if (!is_null($this->lineBuffer)) { 241 $rawLine = $this->lineBuffer; 242 $this->lineBuffer = null; 243 } else { 244 do { 245 $eof = feof($this->input); 246 247 $rawLine = fgets($this->input); 248 249 if ($eof || (feof($this->input) && $rawLine===false)) { 250 throw new EofException('End of document reached prematurely'); 251 } 252 if ($rawLine === false) { 253 throw new ParseException('Error reading from input stream'); 254 } 255 $rawLine = rtrim($rawLine, "\r\n"); 256 } while ($rawLine === ''); // Skipping empty lines 257 $this->lineIndex++; 258 } 259 $line = $rawLine; 260 261 $this->startLine = $this->lineIndex; 262 263 // Looking ahead for folded lines. 264 while (true) { 265 266 $nextLine = rtrim(fgets($this->input), "\r\n"); 267 $this->lineIndex++; 268 if (!$nextLine) { 269 break; 270 } 271 if ($nextLine[0] === "\t" || $nextLine[0] === " ") { 272 $line .= substr($nextLine, 1); 273 $rawLine .= "\n " . substr($nextLine, 1); 274 } else { 275 $this->lineBuffer = $nextLine; 276 break; 277 } 278 279 } 280 $this->rawLine = $rawLine; 281 return $line; 282 283 } 284 285 /** 286 * Reads a property or component from a line. 287 * 288 * @return void 289 */ 290 protected function readProperty($line) { 291 292 if ($this->options & self::OPTION_FORGIVING) { 293 $propNameToken = 'A-Z0-9\-\._\\/'; 294 } else { 295 $propNameToken = 'A-Z0-9\-\.'; 296 } 297 298 $paramNameToken = 'A-Z0-9\-'; 299 $safeChar = '^";:,'; 300 $qSafeChar = '^"'; 301 302 $regex = "/ 303 ^(?P<name> [$propNameToken]+ ) (?=[;:]) # property name 304 | 305 (?<=:)(?P<propValue> .+)$ # property value 306 | 307 ;(?P<paramName> [$paramNameToken]+) (?=[=;:]) # parameter name 308 | 309 (=|,)(?P<paramValue> # parameter value 310 (?: [$safeChar]*) | 311 \"(?: [$qSafeChar]+)\" 312 ) (?=[;:,]) 313 /xi"; 314 315 //echo $regex, "\n"; die(); 316 preg_match_all($regex, $line, $matches, PREG_SET_ORDER); 317 318 $property = array( 319 'name' => null, 320 'parameters' => array(), 321 'value' => null 322 ); 323 324 $lastParam = null; 325 326 /** 327 * Looping through all the tokens. 328 * 329 * Note that we are looping through them in reverse order, because if a 330 * sub-pattern matched, the subsequent named patterns will not show up 331 * in the result. 332 */ 333 foreach($matches as $match) { 334 335 if (isset($match['paramValue'])) { 336 if ($match['paramValue'] && $match['paramValue'][0] === '"') { 337 $value = substr($match['paramValue'], 1, -1); 338 } else { 339 $value = $match['paramValue']; 340 } 341 342 $value = $this->unescapeParam($value); 343 344 if (is_null($property['parameters'][$lastParam])) { 345 $property['parameters'][$lastParam] = $value; 346 } elseif (is_array($property['parameters'][$lastParam])) { 347 $property['parameters'][$lastParam][] = $value; 348 } else { 349 $property['parameters'][$lastParam] = array( 350 $property['parameters'][$lastParam], 351 $value 352 ); 353 } 354 continue; 355 } 356 if (isset($match['paramName'])) { 357 $lastParam = strtoupper($match['paramName']); 358 if (!isset($property['parameters'][$lastParam])) { 359 $property['parameters'][$lastParam] = null; 360 } 361 continue; 362 } 363 if (isset($match['propValue'])) { 364 $property['value'] = $match['propValue']; 365 continue; 366 } 367 if (isset($match['name']) && $match['name']) { 368 $property['name'] = strtoupper($match['name']); 369 continue; 370 } 371 372 // @codeCoverageIgnoreStart 373 throw new \LogicException('This code should not be reachable'); 374 // @codeCoverageIgnoreEnd 375 376 } 377 378 if (is_null($property['value'])) { 379 $property['value'] = ''; 380 } 381 if (!$property['name']) { 382 if ($this->options & self::OPTION_IGNORE_INVALID_LINES) { 383 return false; 384 } 385 throw new ParseException('Invalid Mimedir file. Line starting at ' . $this->startLine . ' did not follow iCalendar/vCard conventions'); 386 } 387 388 // vCard 2.1 states that parameters may appear without a name, and only 389 // a value. We can deduce the value based on it's name. 390 // 391 // Our parser will get those as parameters without a value instead, so 392 // we're filtering these parameters out first. 393 $namedParameters = array(); 394 $namelessParameters = array(); 395 396 foreach($property['parameters'] as $name=>$value) { 397 if (!is_null($value)) { 398 $namedParameters[$name] = $value; 399 } else { 400 $namelessParameters[] = $name; 401 } 402 } 403 404 $propObj = $this->root->createProperty($property['name'], null, $namedParameters); 405 406 foreach($namelessParameters as $namelessParameter) { 407 $propObj->add(null, $namelessParameter); 408 } 409 410 if (strtoupper($propObj['ENCODING']) === 'QUOTED-PRINTABLE') { 411 $propObj->setQuotedPrintableValue($this->extractQuotedPrintableValue()); 412 } else { 413 $propObj->setRawMimeDirValue($property['value']); 414 } 415 416 return $propObj; 417 418 } 419 420 /** 421 * Unescapes a property value. 422 * 423 * vCard 2.1 says: 424 * * Semi-colons must be escaped in some property values, specifically 425 * ADR, ORG and N. 426 * * Semi-colons must be escaped in parameter values, because semi-colons 427 * are also use to separate values. 428 * * No mention of escaping backslashes with another backslash. 429 * * newlines are not escaped either, instead QUOTED-PRINTABLE is used to 430 * span values over more than 1 line. 431 * 432 * vCard 3.0 says: 433 * * (rfc2425) Backslashes, newlines (\n or \N) and comma's must be 434 * escaped, all time time. 435 * * Comma's are used for delimeters in multiple values 436 * * (rfc2426) Adds to to this that the semi-colon MUST also be escaped, 437 * as in some properties semi-colon is used for separators. 438 * * Properties using semi-colons: N, ADR, GEO, ORG 439 * * Both ADR and N's individual parts may be broken up further with a 440 * comma. 441 * * Properties using commas: NICKNAME, CATEGORIES 442 * 443 * vCard 4.0 (rfc6350) says: 444 * * Commas must be escaped. 445 * * Semi-colons may be escaped, an unescaped semi-colon _may_ be a 446 * delimiter, depending on the property. 447 * * Backslashes must be escaped 448 * * Newlines must be escaped as either \N or \n. 449 * * Some compound properties may contain multiple parts themselves, so a 450 * comma within a semi-colon delimited property may also be unescaped 451 * to denote multiple parts _within_ the compound property. 452 * * Text-properties using semi-colons: N, ADR, ORG, CLIENTPIDMAP. 453 * * Text-properties using commas: NICKNAME, RELATED, CATEGORIES, PID. 454 * 455 * Even though the spec says that commas must always be escaped, the 456 * example for GEO in Section 6.5.2 seems to violate this. 457 * 458 * iCalendar 2.0 (rfc5545) says: 459 * * Commas or semi-colons may be used as delimiters, depending on the 460 * property. 461 * * Commas, semi-colons, backslashes, newline (\N or \n) are always 462 * escaped, unless they are delimiters. 463 * * Colons shall not be escaped. 464 * * Commas can be considered the 'default delimiter' and is described as 465 * the delimiter in cases where the order of the multiple values is 466 * insignificant. 467 * * Semi-colons are described as the delimiter for 'structured values'. 468 * They are specifically used in Semi-colons are used as a delimiter in 469 * REQUEST-STATUS, RRULE, GEO and EXRULE. EXRULE is deprecated however. 470 * 471 * Now for the parameters 472 * 473 * If delimiter is not set (null) this method will just return a string. 474 * If it's a comma or a semi-colon the string will be split on those 475 * characters, and always return an array. 476 * 477 * @param string $input 478 * @param string $delimiter 479 * @return string|string[] 480 */ 481 static public function unescapeValue($input, $delimiter = ';') { 482 483 $regex = '# (?: (\\\\ (?: \\\\ | N | n | ; | , ) )'; 484 if ($delimiter) { 485 $regex .= ' | (' . $delimiter . ')'; 486 } 487 $regex .= ') #x'; 488 489 $matches = preg_split($regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); 490 491 $resultArray = array(); 492 $result = ''; 493 494 foreach($matches as $match) { 495 496 switch ($match) { 497 case '\\\\' : 498 $result .='\\'; 499 break; 500 case '\N' : 501 case '\n' : 502 $result .="\n"; 503 break; 504 case '\;' : 505 $result .=';'; 506 break; 507 case '\,' : 508 $result .=','; 509 break; 510 case $delimiter : 511 $resultArray[] = $result; 512 $result = ''; 513 break; 514 default : 515 $result .= $match; 516 break; 517 518 } 519 520 } 521 522 $resultArray[] = $result; 523 return $delimiter ? $resultArray : $result; 524 525 } 526 527 /** 528 * Unescapes a parameter value. 529 * 530 * vCard 2.1: 531 * * Does not mention a mechanism for this. In addition, double quotes 532 * are never used to wrap values. 533 * * This means that parameters can simply not contain colons or 534 * semi-colons. 535 * 536 * vCard 3.0 (rfc2425, rfc2426): 537 * * Parameters _may_ be surrounded by double quotes. 538 * * If this is not the case, semi-colon, colon and comma may simply not 539 * occur (the comma used for multiple parameter values though). 540 * * If it is surrounded by double-quotes, it may simply not contain 541 * double-quotes. 542 * * This means that a parameter can in no case encode double-quotes, or 543 * newlines. 544 * 545 * vCard 4.0 (rfc6350) 546 * * Behavior seems to be identical to vCard 3.0 547 * 548 * iCalendar 2.0 (rfc5545) 549 * * Behavior seems to be identical to vCard 3.0 550 * 551 * Parameter escaping mechanism (rfc6868) : 552 * * This rfc describes a new way to escape parameter values. 553 * * New-line is encoded as ^n 554 * * ^ is encoded as ^^. 555 * * " is encoded as ^' 556 * 557 * @param string $input 558 * @return void 559 */ 560 private function unescapeParam($input) { 561 562 return 563 preg_replace_callback( 564 '#(\^(\^|n|\'))#', 565 function($matches) { 566 switch($matches[2]) { 567 case 'n' : 568 return "\n"; 569 case '^' : 570 return '^'; 571 case '\'' : 572 return '"'; 573 574 // @codeCoverageIgnoreStart 575 } 576 // @codeCoverageIgnoreEnd 577 }, 578 $input 579 ); 580 } 581 582 /** 583 * Gets the full quoted printable value. 584 * 585 * We need a special method for this, because newlines have both a meaning 586 * in vCards, and in QuotedPrintable. 587 * 588 * This method does not do any decoding. 589 * 590 * @return string 591 */ 592 private function extractQuotedPrintableValue() { 593 594 // We need to parse the raw line again to get the start of the value. 595 // 596 // We are basically looking for the first colon (:), but we need to 597 // skip over the parameters first, as they may contain one. 598 $regex = '/^ 599 (?: [^:])+ # Anything but a colon 600 (?: "[^"]")* # A parameter in double quotes 601 : # start of the value we really care about 602 (.*)$ 603 /xs'; 604 605 preg_match($regex, $this->rawLine, $matches); 606 607 $value = $matches[1]; 608 // Removing the first whitespace character from every line. Kind of 609 // like unfolding, but we keep the newline. 610 $value = str_replace("\n ", "\n", $value); 611 612 // Microsoft products don't always correctly fold lines, they may be 613 // missing a whitespace. So if 'forgiving' is turned on, we will take 614 // those as well. 615 if ($this->options & self::OPTION_FORGIVING) { 616 while(substr($value,-1) === '=') { 617 // Reading the line 618 $this->readLine(); 619 // Grabbing the raw form 620 $value.="\n" . $this->rawLine; 621 } 622 } 623 624 return $value; 625 626 } 627 628} 629