1<?php
2
3/**
4 * Swift Mailer Message Encoder
5 * Please read the LICENSE file
6 * @copyright Chris Corbyn <chris@w3style.co.uk>
7 * @author Chris Corbyn <chris@w3style.co.uk>
8 * @package Swift_Message
9 * @license GNU Lesser General Public License
10 */
11
12require_once dirname(__FILE__) . "/../ClassLoader.php";
13Swift_ClassLoader::load("Swift_File");
14
15/**
16 * Encodes strings in a variety of formats and detects some encoding formats
17 * @package Swift_Message
18 * @author Chris Corbyn <chris@w3style.co.uk>
19 */
20class Swift_Message_Encoder
21{
22  /**
23   * A regular expression which matches valid e-mail addresses (including some unlikely ones)
24   */
25  const CHEAP_ADDRESS_RE = '(?#Start of dot-atom
26    )[-!#\$%&\'\*\+\/=\?\^_`{}\|~0-9A-Za-z]+(?:\.[-!#\$%&\'\*\+\/=\?\^_`{}\|~0-9A-Za-z]+)*(?#
27    End of dot-atom)(?:@(?#Start of domain)[-0-9A-Za-z]+(?:\.[-0-9A-Za-z]+)*(?#End of domain))?';
28  /**
29   * A singleton of this class
30   * @var Swift_Message_Encoder
31   */
32  protected static $instance = null;
33  /**
34   * Retreive an instance of the encoder as a singleton.
35   * New instances are never ever needed since it's monostatic.
36   * @return Message_Encoder
37   */
38  public static function instance()
39  {
40    if (self::$instance === null)
41    {
42      self::$instance = new Swift_Message_Encoder();
43    }
44    return self::$instance;
45  }
46  /**
47   * Break a string apart at every occurence of <add@ress> and return an array
48   * This method does NOT remove any characters like a preg_split() would do.
49   * Elements matching an address start with "a" followed by the numeric index
50   * @param string The input string to separate
51   * @return array
52   */
53  public function addressChunk($input)
54  {
55    $elements = 0;
56    while (preg_match('/^(.*?)(<' . self::CHEAP_ADDRESS_RE . '>)/s', $input, $matches))
57    {
58      if (!empty($matches[1])) $ret[($elements++)] = $matches[1];
59      $ret[('a' . ($elements++))] = $matches[2];
60      $input = substr($input, strlen($matches[0]));
61    }
62    if ($input != "") $ret[($elements++)] = $input; //Whatever is left over
63
64    return $ret;
65  }
66  /**
67   * Break a string apart at every occurence of <xxxyyy> and return an array
68   * This method does NOT remove any characters like a preg_split() would do.
69   * Elements matching a quoted string start with "a" followed by the numeric index
70   * @param string The input string to separate
71   * @return array
72   */
73  public function quoteChunk($input)
74  {
75    $elements = 0;
76    while (preg_match('/^(.*?)(<[\x20-\x3A\x3C-\x7E]*>)/s', $input, $matches))
77    {
78      if (!empty($matches[1])) $ret[($elements++)] = $matches[1];
79      $ret[('a' . ($elements++))] = $matches[2];
80      $input = substr($input, strlen($matches[0]));
81    }
82    if ($input != "") $ret[($elements++)] = $input; //Whatever is left over
83
84    return $ret;
85  }
86  /**
87   * Return the base64 encoded version of the string
88   * @param string The input string to encode
89   * @param int The maximum length of each line of output (inc CRLF)
90   * @param int The maximum length of the first line in the output (for headers)
91   * @param boolean Whether email addresses between < and > chars should be preserved or not
92   * @param string The line ending
93   * @return string
94   */
95  public function base64Encode($data, $chunk=76, $init_chunk=0, $headers=false, $le="\r\n")
96  {
97    $ret = "";
98    $chunk -= 2;
99    $chunk = $this->getHcf($chunk, 4);
100
101    if ($init_chunk >= 2)
102    {
103      $init_chunk -= 2;
104      $init_chunk = $this->getHcf($init_chunk, 4);
105    }
106
107    if ($headers) $data = $this->quoteChunk($data);
108    else $data = array($data);
109
110    foreach ($data as $key => $string)
111    {
112      $key = (string) $key;
113      if ($key{0} == 'a') //This is an address
114      {
115        if ($init_chunk && $init_chunk < (strlen($string)+2)) $ret .= $le;
116        $ret .= $le . $string;
117      }
118      else
119      {
120        $string = $this->rawBase64Encode($string);
121        if ($init_chunk > 2)
122        {
123          $ret .= substr($string, 0, $init_chunk) . $le;
124          $string = substr($string, $init_chunk);
125        }
126        elseif ($init_chunk) $ret .= $le;
127
128        $ret .= trim(chunk_split($string, $chunk, $le)) . $le;
129      }
130      $init_chunk = 0;
131    }
132
133    return trim($ret);
134  }
135  /**
136   * Return the base64 encoded version of a string with no breaks
137   * @param The input string to encode
138   * @return string
139   */
140  public function rawBase64Encode($string)
141  {
142    return $string = base64_encode($string);
143  }
144  /**
145   * Return the base64 encoded version of a file
146   * @param Swift_File The file input stream
147   * @param int Max line length
148   * @param string The line ending
149   * @return Swift_Cache_OutputStream
150   * @throws Swift_FileException If the file cannot be read
151   */
152  public function base64EncodeFile(Swift_File $file, $chunk=76, $le="\r\n")
153  {
154    Swift_ClassLoader::load("Swift_CacheFactory");
155    $cache = Swift_CacheFactory::getCache();
156    $chunk -= 2;
157    $chunk = $this->getHcf($chunk, 4);
158    $loop = false;
159    //We have to read in multiples of 3 bytes but avoid doing such small chunks that it takes too long
160    while (false !== $bytes = $file->read(8190))
161    {
162      if ($loop) $cache->write("b64", $le);
163      $loop = true;
164      $next = chunk_split($this->rawBase64Encode($bytes), $chunk, $le);
165      $next = trim($next);
166      $cache->write("b64", $next);
167    }
168    $file->reset();
169    return $cache->getOutputStream("b64");
170  }
171  /**
172   * Return the quoted printable version of the input string
173   * @param string The input string to encode
174   * @param int The maximum length of each line of output (inc CRLF)
175   * @param int The maximum length of the first line in the output (for headers)
176   * @param boolean Whether email addresses between < and > chars should be preserved or not
177   * @param string The line ending
178   * @return string
179   */
180  public function QPEncode($data, $chunk=76, $init_chunk=0, $headers=false, $le="\r\n")
181  {
182    $ret = "";
183    if ($headers) $data = $this->quoteChunk($data);
184    else $data = array($data);
185
186    $trailing_spaces = chr(9) . chr(32);
187    foreach ($data as $key => $string)
188    {
189      $key = (string) $key;
190      if ($key{0} == 'a') //An address
191      {
192        if ($init_chunk && $init_chunk < (strlen($string)+3)) $ret .= "=";
193        $ret .= $le . $string;
194      }
195      else
196      {
197        $lines = explode($le, $string);
198        foreach ($lines as $n => $line)
199          $lines[$n] = $this->rawQPEncode(rtrim($line, $trailing_spaces));
200        $string = implode($le, $lines);
201        if ($init_chunk > 3)
202        {
203          if (preg_match('/^.{1,'.($init_chunk-5).'}[^=]{2}(?!=[A-F0-9]{2})/', $string, $matches)
204            || preg_match('/^.{1,'.($init_chunk-6).'}([^=]{0,3})?/', $string, $matches))
205          {
206            $ret .= $this->fixLE($matches[0] . "=", $le); //fixLE added 24/08/07
207            $string = substr($string, strlen($matches[0]));
208          }
209        }
210        elseif ($init_chunk) $ret .= "=";
211
212        while (preg_match('/^.{1,'.($init_chunk-5).'}[^=]{2}(?!=[A-F0-9]{2})/', $string, $matches)
213          || preg_match('/^.{1,'.($chunk-6).'}([^=]{0,3})?/', $string, $matches)
214          || (strlen($string) > 0 && $matches = array($string)))
215        {
216          $ret .= $this->fixLE($le . $matches[0] . "=", $le); //fixLE added 24/08/07
217          $string = substr($string, strlen($matches[0]));
218        }
219      }
220      $init_chunk = 0;
221    }
222
223    if (substr($ret, -1) == "=") return trim(substr($ret, 0, -1));
224    else return trim($ret);
225  }
226  /**
227   * Return the QP encoded version of a string with no breaks
228   * @param string The input to encode
229   * @param boolean True if the data we're encoding is binary
230   * @return string
231   */
232  public function rawQPEncode($string, $bin=false)
233  {
234    $ret = "";
235    if (!$bin)
236    {
237      $string = str_replace(array("\r\n", "\r"), "\n", $string);
238      $string = str_replace("\n", "\r\n", $string);
239    }
240    $len = strlen($string);
241    for ($i = 0; $i < $len; $i++)
242    {
243      $val = ord($string{$i});
244      //9, 32 = HT, SP; 10, 13 = CR, LF; 33-60 & 62-126 are ok
245      // 63 = '?'; 95 = '_' and need encoding to go in the headers
246      if ((!$bin && ($val == 32 || $val == 9 || $val == 10 || $val == 13))
247        || ($val >= 33 && $val <= 60) || ($val >= 62 && $val <= 126)
248        && $val != 63)
249      {
250        $ret .= $string{$i};
251      }
252      else
253      {
254        $ret .= sprintf("=%02X", $val);
255      }
256    }
257    return $ret;
258  }
259  /**
260   * Return a file as a quoted printable encoded string
261   * @param Swift_File The file to encode
262   * @param int Max line length
263   * @param string The line ending
264   * @return Swift_Cache_OutputStream
265   * @throws Swift_FileException If the file cannot be read
266   */
267  public function QPEncodeFile(Swift_File $file, $chunk=76, $le="\r\n")
268  {
269    Swift_ClassLoader::load("Swift_CacheFactory");
270    $cache = Swift_CacheFactory::getCache();
271    while (false !== $bytes = $file->readln())
272    {
273      $next = $this->rawQPEncode($bytes, true);
274      preg_match_all('/.{1,'.($chunk-6).'}([^=]{0,3})?/', $next, $next);
275      if (count($next[0])) $cache->write("qp", $this->fixLE(implode("=" . $le, $next[0]), $le));
276    }
277    return $cache->getOutputStream("qp");
278  }
279  /**
280   * Encode a string as 7bit ascii
281   * @param string Input data to encode
282   * @param int Max line length
283   * @param string The line ending
284   * @return string
285   */
286  public function encode7Bit($data, $chunk=76, $le="\r\n")
287  {
288    return $this->fixLE(wordwrap($data, $chunk-2, $le, 1), $le);
289  }
290  /**
291   * Return a 7bit string from a file
292   * @param Swift_File The file stream to read from
293   * @param int The max line length
294   * @param string The line ending
295   * @return Swift_Cache_OutputStream
296   * @throws Swift_FileException If the file cannot be read
297   */
298  public function encode7BitFile(Swift_File $file, $chunk=76, $le="\r\n")
299  {
300    Swift_ClassLoader::load("Swift_CacheFactory");
301    $cache = Swift_CacheFactory::getCache();
302    $ret = "";
303    while (false !== $bytes = $file->read(8192)) $ret .= $bytes;
304    $cache->write("7b", $this->fixLE(wordwrap($ret, $chunk-2, $le, 1), $le));
305    return $cache->getOutputStream("7b");
306  }
307  /**
308   * Return the 8bit encoded form of a string (unchanged there-abouts)
309   * @param string Input data to encode
310   * @param int Maximum line length
311   * @param string The line ending
312   * @return string
313   */
314  public function encode8Bit($data, $chunk=76, $le="\r\n")
315  {
316    return $this->fixLE(wordwrap($data, $chunk-2, $le, 1), $le);
317  }
318  /**
319   * Return a 8bit string from a file
320   * @param Swift_File The file stream to read from
321   * @param int Max line length (including CRLF)
322   * @param string The line ending
323   * @return Swift_Cache_OutputStream
324   * @throws Swift_FileException If the file cannot be read
325   */
326  public function encode8BitFile(Swift_File $file, $chunk=76, $le="\r\n")
327  {
328    Swift_ClassLoader::load("Swift_CacheFactory");
329    $cache = Swift_CacheFactory::getCache();
330    $ret = "";
331    while (false !== $bytes = $file->read(8192)) $ret .= $bytes;
332    $cache->write("8b", $this->fixLE(wordwrap($ret, $chunk-2, $le, 1), $le));
333    return $cache->getOutputStream("8b");
334  }
335  /**
336   * Keeps lines longer than 76 characters trimmed down to size
337   * This currently does not convert other string encodings into 7bit
338   * @param string The data to make safe for headers (defaults to RFC 2822 standards)
339   * @param int maximum length of lines returned
340   * @param int The maximum length of the first line
341   * @param string The Line ending
342   * @return string
343   */
344  public function header7BitEncode($data, $chunk=76, $init_chunk=0, $le="\r\n")
345  {
346    $data = $this->encode7BitPrintable($data);
347    $ret = "";
348    if ($init_chunk > 2)
349    {
350      $data_wrapped = wordwrap($data, $init_chunk, $le);
351      $lines = explode($le, $data_wrapped);
352      $first_line = array_shift($lines);
353      $ret .= $first_line . $le;
354      $data = preg_replace("~^[ \t]~D", "", substr($data, strlen($first_line)));
355    }
356    elseif ($init_chunk) $ret .= $le;
357    $ret .= wordwrap($data, $chunk-2, $le);
358    return trim($ret);
359  }
360  /**
361   * Strip out any characters which are not in the ASCII 7bit printable range
362   * @param string The string to clean
363   * @return string
364   */
365  public function encode7BitPrintable($data)
366  {
367    return preg_replace('/[^\x20-\x7E]/', '', $data);
368  }
369  /**
370   * Detect if a string contains multi-byte non-ascii chars that fall in the UTF-8 ranges
371   * @param string Data to detect UTF-8 sequences in
372   * @return boolean
373   */
374  public function isUTF8($data)
375  {
376    return preg_match('%(?:
377    [\xC2-\xDF][\x80-\xBF]				# non-overlong 2-byte
378    |\xE0[\xA0-\xBF][\x80-\xBF]			# excluding overlongs
379    |[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}	# straight 3-byte
380    |\xED[\x80-\x9F][\x80-\xBF]			# excluding surrogates
381    |\xF0[\x90-\xBF][\x80-\xBF]{2}		# planes 1-3
382    |[\xF1-\xF3][\x80-\xBF]{3}			# planes 4-15
383    |\xF4[\x80-\x8F][\x80-\xBF]{2}		# plane 16
384    )+%xs', $data);
385  }
386  /**
387   * This function checks for 7bit *printable* characters
388   * which excludes \r \n \t etc and so, is safe for use in mail headers
389   * Actual permitted chars [\ !"#\$%&'\(\)\*\+,-\.\/0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]\^_`abcdefghijklmnopqrstuvwxyz{\|}~]
390   * Ranges \x00-\x1F are printer control sequences
391   * \x7F is the ascii delete character
392   * @param string Data to check against
393   * @return boolean
394   */
395  public function is7BitPrintable($data)
396  {
397    return (!preg_match('/[^\x20-\x7E]/', $data));
398  }
399  /**
400   * Check that a string does not contain any evil characters for headers.
401   * @param string The string to check
402   * @return boolean
403   */
404  public function isHeaderSafe($data)
405  {
406    return ($this->is7BitPrintable($data) && strpos($data, ";") === false);
407  }
408  /**
409   * If the characters fall exclusively in the 7bit ascii range, return true
410   * @param string Input to check
411   * @return boolean
412   */
413  public function is7BitAscii($data)
414  {
415    return (!preg_match('/[^\x01-\x7F]/', $data));
416  }
417  /**
418   * Encode a string for RFC 2047 compatability (url-encode)
419   * @param string The input for encoding
420   * @param string The charset used
421   * @param string The language used
422   * @param int The maximum line length
423   * @param int The maximum length of the first line
424   * @param string The line ending
425   * @return string
426   */
427  public function rfc2047Encode($str, $charset="iso-8859-1", $language="en-us", $chunk=76, $le="\r\n")
428  {
429    $lang_spec = "";
430    if (!$this->is7BitPrintable($str))
431    {
432      $lang_spec = $charset . "'" . $language . "'";
433      $str = $lang_spec . str_replace("+", "%20", urlencode($str));
434    }
435    preg_match_all('~.{1,'.($chunk-6).'}([^%]{0,3})~', $str, $matches);
436    if (count($matches[0])) return implode($le, $matches[0]);
437  }
438  /**
439   * Fixes line endings to be whatever is specified by the user
440   * SMTP requires the CRLF be used, but using sendmail in -t mode uses LF
441   * This method also escapes dots on a start of line to avoid injection
442   * @param string The data to fix
443   * @return string
444   */
445  protected function fixLE($data, $le)
446  {
447    $data = str_replace(array("\r\n", "\r"), "\n", $data);
448    if ($le != "\n") $data = str_replace("\n", $le, $data);
449    return $data = str_replace($le . ".", $le . "..", $data);
450  }
451  protected function getHcf($value, $factor)
452  {
453    return ($value - ($value % $factor));
454  }
455}
456