1<?php
2
3namespace Sabre\VObject\Parser;
4
5use
6    Sabre\VObject\ParseException,
7    Sabre\VObject\EofException,
8    Sabre\VObject\Component,
9    Sabre\VObject\Property,
10    Sabre\VObject\Component\VCalendar,
11    Sabre\VObject\Component\VCard;
12
13/**
14 * MimeDir parser.
15 *
16 * This class parses iCalendar 2.0 and vCard 2.1, 3.0 and 4.0 files. This
17 * parser will return one of the following two objects from the parse method:
18 *
19 * Sabre\VObject\Component\VCalendar
20 * Sabre\VObject\Component\VCard
21 *
22 * @copyright Copyright (C) 2011-2015 fruux GmbH (https://fruux.com/).
23 * @author Evert Pot (http://evertpot.com/)
24 * @license http://sabre.io/license/ Modified BSD License
25 */
26class MimeDir extends Parser {
27
28    /**
29     * The input stream.
30     *
31     * @var resource
32     */
33    protected $input;
34
35    /**
36     * Root component
37     *
38     * @var Component
39     */
40    protected $root;
41
42    /**
43     * Parses an iCalendar or vCard file
44     *
45     * Pass a stream or a string. If null is parsed, the existing buffer is
46     * used.
47     *
48     * @param string|resource|null $input
49     * @param int|null $options
50     * @return array
51     */
52    public function parse($input = null, $options = null) {
53
54        $this->root = null;
55        if (!is_null($input)) {
56
57            $this->setInput($input);
58
59        }
60
61        if (!is_null($options)) $this->options = $options;
62
63        $this->parseDocument();
64
65        return $this->root;
66
67    }
68
69    /**
70     * Sets the input buffer. Must be a string or stream.
71     *
72     * @param resource|string $input
73     * @return void
74     */
75    public function setInput($input) {
76
77        // Resetting the parser
78        $this->lineIndex = 0;
79        $this->startLine = 0;
80
81        if (is_string($input)) {
82            // Convering to a stream.
83            $stream = fopen('php://temp', 'r+');
84            fwrite($stream, $input);
85            rewind($stream);
86            $this->input = $stream;
87        } elseif (is_resource($input)) {
88            $this->input = $input;
89        } else {
90            throw new \InvalidArgumentException('This parser can only read from strings or streams.');
91        }
92
93    }
94
95    /**
96     * Parses an entire document.
97     *
98     * @return void
99     */
100    protected function parseDocument() {
101
102        $line = $this->readLine();
103
104        // BOM is ZERO WIDTH NO-BREAK SPACE (U+FEFF).
105        // It's 0xEF 0xBB 0xBF in UTF-8 hex.
106        if (   3 <= strlen($line)
107            && ord($line[0]) === 0xef
108            && ord($line[1]) === 0xbb
109            && ord($line[2]) === 0xbf) {
110            $line = substr($line, 3);
111        }
112
113        switch(strtoupper($line)) {
114            case 'BEGIN:VCALENDAR' :
115                $class = isset(VCalendar::$componentMap['VCALENDAR'])
116                    ? VCalendar::$componentMap[$name]
117                    : 'Sabre\\VObject\\Component\\VCalendar';
118                break;
119            case 'BEGIN:VCARD' :
120                $class = isset(VCard::$componentMap['VCARD'])
121                    ? VCard::$componentMap['VCARD']
122                    : 'Sabre\\VObject\\Component\\VCard';
123                break;
124            default :
125                throw new ParseException('This parser only supports VCARD and VCALENDAR files');
126        }
127
128        $this->root = new $class(array(), false);
129
130        while(true) {
131
132            // Reading until we hit END:
133            $line = $this->readLine();
134            if (strtoupper(substr($line,0,4)) === 'END:') {
135                break;
136            }
137            $result = $this->parseLine($line);
138            if ($result) {
139                $this->root->add($result);
140            }
141
142        }
143
144        $name = strtoupper(substr($line, 4));
145        if ($name!==$this->root->name) {
146            throw new ParseException('Invalid MimeDir file. expected: "END:' . $this->root->name . '" got: "END:' . $name . '"');
147        }
148
149    }
150
151    /**
152     * Parses a line, and if it hits a component, it will also attempt to parse
153     * the entire component
154     *
155     * @param string $line Unfolded line
156     * @return Node
157     */
158    protected function parseLine($line) {
159
160        // Start of a new component
161        if (strtoupper(substr($line, 0, 6)) === 'BEGIN:') {
162
163            $component = $this->root->createComponent(substr($line,6), array(), false);
164
165            while(true) {
166
167                // Reading until we hit END:
168                $line = $this->readLine();
169                if (strtoupper(substr($line,0,4)) === 'END:') {
170                    break;
171                }
172                $result = $this->parseLine($line);
173                if ($result) {
174                    $component->add($result);
175                }
176
177            }
178
179            $name = strtoupper(substr($line, 4));
180            if ($name!==$component->name) {
181                throw new ParseException('Invalid MimeDir file. expected: "END:' . $component->name . '" got: "END:' . $name . '"');
182            }
183
184            return $component;
185
186        } else {
187
188            // Property reader
189            $property = $this->readProperty($line);
190            if (!$property) {
191                // Ignored line
192                return false;
193            }
194            return $property;
195
196        }
197
198    }
199
200    /**
201     * We need to look ahead 1 line every time to see if we need to 'unfold'
202     * the next line.
203     *
204     * If that was not the case, we store it here.
205     *
206     * @var null|string
207     */
208    protected $lineBuffer;
209
210    /**
211     * The real current line number.
212     */
213    protected $lineIndex = 0;
214
215    /**
216     * In the case of unfolded lines, this property holds the line number for
217     * the start of the line.
218     *
219     * @var int
220     */
221    protected $startLine = 0;
222
223    /**
224     * Contains a 'raw' representation of the current line.
225     *
226     * @var string
227     */
228    protected $rawLine;
229
230    /**
231     * Reads a single line from the buffer.
232     *
233     * This method strips any newlines and also takes care of unfolding.
234     *
235     * @throws \Sabre\VObject\EofException
236     * @return string
237     */
238    protected function readLine() {
239
240        if (!is_null($this->lineBuffer)) {
241            $rawLine = $this->lineBuffer;
242            $this->lineBuffer = null;
243        } else {
244            do {
245                $eof = feof($this->input);
246
247                $rawLine = fgets($this->input);
248
249                if ($eof || (feof($this->input) && $rawLine===false)) {
250                    throw new EofException('End of document reached prematurely');
251                }
252                if ($rawLine === false) {
253                    throw new ParseException('Error reading from input stream');
254                }
255                $rawLine = rtrim($rawLine, "\r\n");
256            } while ($rawLine === ''); // Skipping empty lines
257            $this->lineIndex++;
258        }
259        $line = $rawLine;
260
261        $this->startLine = $this->lineIndex;
262
263        // Looking ahead for folded lines.
264        while (true) {
265
266            $nextLine = rtrim(fgets($this->input), "\r\n");
267            $this->lineIndex++;
268            if (!$nextLine) {
269                break;
270            }
271            if ($nextLine[0] === "\t" || $nextLine[0] === " ") {
272                $line .= substr($nextLine, 1);
273                $rawLine .= "\n " . substr($nextLine, 1);
274            } else {
275                $this->lineBuffer = $nextLine;
276                break;
277            }
278
279        }
280        $this->rawLine = $rawLine;
281        return $line;
282
283    }
284
285    /**
286     * Reads a property or component from a line.
287     *
288     * @return void
289     */
290    protected function readProperty($line) {
291
292        if ($this->options & self::OPTION_FORGIVING) {
293            $propNameToken = 'A-Z0-9\-\._\\/';
294        } else {
295            $propNameToken = 'A-Z0-9\-\.';
296        }
297
298        $paramNameToken = 'A-Z0-9\-';
299        $safeChar = '^";:,';
300        $qSafeChar = '^"';
301
302        $regex = "/
303            ^(?P<name> [$propNameToken]+ ) (?=[;:])        # property name
304            |
305            (?<=:)(?P<propValue> .+)$                      # property value
306            |
307            ;(?P<paramName> [$paramNameToken]+) (?=[=;:])  # parameter name
308            |
309            (=|,)(?P<paramValue>                           # parameter value
310                (?: [$safeChar]*) |
311                \"(?: [$qSafeChar]+)\"
312            ) (?=[;:,])
313            /xi";
314
315        //echo $regex, "\n"; die();
316        preg_match_all($regex, $line, $matches,  PREG_SET_ORDER);
317
318        $property = array(
319            'name' => null,
320            'parameters' => array(),
321            'value' => null
322        );
323
324        $lastParam = null;
325
326        /**
327         * Looping through all the tokens.
328         *
329         * Note that we are looping through them in reverse order, because if a
330         * sub-pattern matched, the subsequent named patterns will not show up
331         * in the result.
332         */
333        foreach($matches as $match) {
334
335            if (isset($match['paramValue'])) {
336                if ($match['paramValue'] && $match['paramValue'][0] === '"') {
337                    $value = substr($match['paramValue'], 1, -1);
338                } else {
339                    $value = $match['paramValue'];
340                }
341
342                $value = $this->unescapeParam($value);
343
344                if (is_null($property['parameters'][$lastParam])) {
345                    $property['parameters'][$lastParam] = $value;
346                } elseif (is_array($property['parameters'][$lastParam])) {
347                    $property['parameters'][$lastParam][] = $value;
348                } else {
349                    $property['parameters'][$lastParam] = array(
350                        $property['parameters'][$lastParam],
351                        $value
352                    );
353                }
354                continue;
355            }
356            if (isset($match['paramName'])) {
357                $lastParam = strtoupper($match['paramName']);
358                if (!isset($property['parameters'][$lastParam])) {
359                    $property['parameters'][$lastParam] = null;
360                }
361                continue;
362            }
363            if (isset($match['propValue'])) {
364                $property['value'] = $match['propValue'];
365                continue;
366            }
367            if (isset($match['name']) && $match['name']) {
368                $property['name'] = strtoupper($match['name']);
369                continue;
370            }
371
372            // @codeCoverageIgnoreStart
373            throw new \LogicException('This code should not be reachable');
374            // @codeCoverageIgnoreEnd
375
376        }
377
378        if (is_null($property['value'])) {
379            $property['value'] = '';
380        }
381        if (!$property['name']) {
382            if ($this->options & self::OPTION_IGNORE_INVALID_LINES) {
383                return false;
384            }
385            throw new ParseException('Invalid Mimedir file. Line starting at ' . $this->startLine . ' did not follow iCalendar/vCard conventions');
386        }
387
388        // vCard 2.1 states that parameters may appear without a name, and only
389        // a value. We can deduce the value based on it's name.
390        //
391        // Our parser will get those as parameters without a value instead, so
392        // we're filtering these parameters out first.
393        $namedParameters = array();
394        $namelessParameters = array();
395
396        foreach($property['parameters'] as $name=>$value) {
397            if (!is_null($value)) {
398                $namedParameters[$name] = $value;
399            } else {
400                $namelessParameters[] = $name;
401            }
402        }
403
404        $propObj = $this->root->createProperty($property['name'], null, $namedParameters);
405
406        foreach($namelessParameters as $namelessParameter) {
407            $propObj->add(null, $namelessParameter);
408        }
409
410        if (strtoupper($propObj['ENCODING']) === 'QUOTED-PRINTABLE') {
411            $propObj->setQuotedPrintableValue($this->extractQuotedPrintableValue());
412        } else {
413            $propObj->setRawMimeDirValue($property['value']);
414        }
415
416        return $propObj;
417
418    }
419
420    /**
421     * Unescapes a property value.
422     *
423     * vCard 2.1 says:
424     *   * Semi-colons must be escaped in some property values, specifically
425     *     ADR, ORG and N.
426     *   * Semi-colons must be escaped in parameter values, because semi-colons
427     *     are also use to separate values.
428     *   * No mention of escaping backslashes with another backslash.
429     *   * newlines are not escaped either, instead QUOTED-PRINTABLE is used to
430     *     span values over more than 1 line.
431     *
432     * vCard 3.0 says:
433     *   * (rfc2425) Backslashes, newlines (\n or \N) and comma's must be
434     *     escaped, all time time.
435     *   * Comma's are used for delimeters in multiple values
436     *   * (rfc2426) Adds to to this that the semi-colon MUST also be escaped,
437     *     as in some properties semi-colon is used for separators.
438     *   * Properties using semi-colons: N, ADR, GEO, ORG
439     *   * Both ADR and N's individual parts may be broken up further with a
440     *     comma.
441     *   * Properties using commas: NICKNAME, CATEGORIES
442     *
443     * vCard 4.0 (rfc6350) says:
444     *   * Commas must be escaped.
445     *   * Semi-colons may be escaped, an unescaped semi-colon _may_ be a
446     *     delimiter, depending on the property.
447     *   * Backslashes must be escaped
448     *   * Newlines must be escaped as either \N or \n.
449     *   * Some compound properties may contain multiple parts themselves, so a
450     *     comma within a semi-colon delimited property may also be unescaped
451     *     to denote multiple parts _within_ the compound property.
452     *   * Text-properties using semi-colons: N, ADR, ORG, CLIENTPIDMAP.
453     *   * Text-properties using commas: NICKNAME, RELATED, CATEGORIES, PID.
454     *
455     * Even though the spec says that commas must always be escaped, the
456     * example for GEO in Section 6.5.2 seems to violate this.
457     *
458     * iCalendar 2.0 (rfc5545) says:
459     *   * Commas or semi-colons may be used as delimiters, depending on the
460     *     property.
461     *   * Commas, semi-colons, backslashes, newline (\N or \n) are always
462     *     escaped, unless they are delimiters.
463     *   * Colons shall not be escaped.
464     *   * Commas can be considered the 'default delimiter' and is described as
465     *     the delimiter in cases where the order of the multiple values is
466     *     insignificant.
467     *   * Semi-colons are described as the delimiter for 'structured values'.
468     *     They are specifically used in Semi-colons are used as a delimiter in
469     *     REQUEST-STATUS, RRULE, GEO and EXRULE. EXRULE is deprecated however.
470     *
471     * Now for the parameters
472     *
473     * If delimiter is not set (null) this method will just return a string.
474     * If it's a comma or a semi-colon the string will be split on those
475     * characters, and always return an array.
476     *
477     * @param string $input
478     * @param string $delimiter
479     * @return string|string[]
480     */
481    static public function unescapeValue($input, $delimiter = ';') {
482
483        $regex = '#  (?: (\\\\ (?: \\\\ | N | n | ; | , ) )';
484        if ($delimiter) {
485            $regex .= ' | (' . $delimiter . ')';
486        }
487        $regex .= ') #x';
488
489        $matches = preg_split($regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
490
491        $resultArray = array();
492        $result = '';
493
494        foreach($matches as $match) {
495
496            switch ($match) {
497                case '\\\\' :
498                    $result .='\\';
499                    break;
500                case '\N' :
501                case '\n' :
502                    $result .="\n";
503                    break;
504                case '\;' :
505                    $result .=';';
506                    break;
507                case '\,' :
508                    $result .=',';
509                    break;
510                case $delimiter :
511                    $resultArray[] = $result;
512                    $result = '';
513                    break;
514                default :
515                    $result .= $match;
516                    break;
517
518            }
519
520        }
521
522        $resultArray[] = $result;
523        return $delimiter ? $resultArray : $result;
524
525    }
526
527    /**
528     * Unescapes a parameter value.
529     *
530     * vCard 2.1:
531     *   * Does not mention a mechanism for this. In addition, double quotes
532     *     are never used to wrap values.
533     *   * This means that parameters can simply not contain colons or
534     *     semi-colons.
535     *
536     * vCard 3.0 (rfc2425, rfc2426):
537     *   * Parameters _may_ be surrounded by double quotes.
538     *   * If this is not the case, semi-colon, colon and comma may simply not
539     *     occur (the comma used for multiple parameter values though).
540     *   * If it is surrounded by double-quotes, it may simply not contain
541     *     double-quotes.
542     *   * This means that a parameter can in no case encode double-quotes, or
543     *     newlines.
544     *
545     * vCard 4.0 (rfc6350)
546     *   * Behavior seems to be identical to vCard 3.0
547     *
548     * iCalendar 2.0 (rfc5545)
549     *   * Behavior seems to be identical to vCard 3.0
550     *
551     * Parameter escaping mechanism (rfc6868) :
552     *   * This rfc describes a new way to escape parameter values.
553     *   * New-line is encoded as ^n
554     *   * ^ is encoded as ^^.
555     *   * " is encoded as ^'
556     *
557     * @param string $input
558     * @return void
559     */
560    private function unescapeParam($input) {
561
562        return
563            preg_replace_callback(
564                '#(\^(\^|n|\'))#',
565                function($matches) {
566                    switch($matches[2]) {
567                        case 'n' :
568                            return "\n";
569                        case '^' :
570                            return '^';
571                        case '\'' :
572                            return '"';
573
574                    // @codeCoverageIgnoreStart
575                    }
576                    // @codeCoverageIgnoreEnd
577                },
578                $input
579            );
580    }
581
582    /**
583     * Gets the full quoted printable value.
584     *
585     * We need a special method for this, because newlines have both a meaning
586     * in vCards, and in QuotedPrintable.
587     *
588     * This method does not do any decoding.
589     *
590     * @return string
591     */
592    private function extractQuotedPrintableValue() {
593
594        // We need to parse the raw line again to get the start of the value.
595        //
596        // We are basically looking for the first colon (:), but we need to
597        // skip over the parameters first, as they may contain one.
598        $regex = '/^
599            (?: [^:])+ # Anything but a colon
600            (?: "[^"]")* # A parameter in double quotes
601            : # start of the value we really care about
602            (.*)$
603        /xs';
604
605        preg_match($regex, $this->rawLine, $matches);
606
607        $value = $matches[1];
608        // Removing the first whitespace character from every line. Kind of
609        // like unfolding, but we keep the newline.
610        $value = str_replace("\n ", "\n", $value);
611
612        // Microsoft products don't always correctly fold lines, they may be
613        // missing a whitespace. So if 'forgiving' is turned on, we will take
614        // those as well.
615        if ($this->options & self::OPTION_FORGIVING) {
616            while(substr($value,-1) === '=') {
617                // Reading the line
618                $this->readLine();
619                // Grabbing the raw form
620                $value.="\n" . $this->rawLine;
621            }
622        }
623
624        return $value;
625
626    }
627
628}
629