1<?php 2 3/** 4 * Swift Mailer Message Encoder 5 * Please read the LICENSE file 6 * @copyright Chris Corbyn <chris@w3style.co.uk> 7 * @author Chris Corbyn <chris@w3style.co.uk> 8 * @package Swift_Message 9 * @license GNU Lesser General Public License 10 */ 11 12require_once dirname(__FILE__) . "/../ClassLoader.php"; 13Swift_ClassLoader::load("Swift_File"); 14 15/** 16 * Encodes strings in a variety of formats and detects some encoding formats 17 * @package Swift_Message 18 * @author Chris Corbyn <chris@w3style.co.uk> 19 */ 20class Swift_Message_Encoder 21{ 22 /** 23 * A regular expression which matches valid e-mail addresses (including some unlikely ones) 24 */ 25 const CHEAP_ADDRESS_RE = '(?#Start of dot-atom 26 )[-!#\$%&\'\*\+\/=\?\^_`{}\|~0-9A-Za-z]+(?:\.[-!#\$%&\'\*\+\/=\?\^_`{}\|~0-9A-Za-z]+)*(?# 27 End of dot-atom)(?:@(?#Start of domain)[-0-9A-Za-z]+(?:\.[-0-9A-Za-z]+)*(?#End of domain))?'; 28 /** 29 * A singleton of this class 30 * @var Swift_Message_Encoder 31 */ 32 protected static $instance = null; 33 /** 34 * Retreive an instance of the encoder as a singleton. 35 * New instances are never ever needed since it's monostatic. 36 * @return Message_Encoder 37 */ 38 public static function instance() 39 { 40 if (self::$instance === null) 41 { 42 self::$instance = new Swift_Message_Encoder(); 43 } 44 return self::$instance; 45 } 46 /** 47 * Break a string apart at every occurence of <add@ress> and return an array 48 * This method does NOT remove any characters like a preg_split() would do. 49 * Elements matching an address start with "a" followed by the numeric index 50 * @param string The input string to separate 51 * @return array 52 */ 53 public function addressChunk($input) 54 { 55 $elements = 0; 56 while (preg_match('/^(.*?)(<' . self::CHEAP_ADDRESS_RE . '>)/s', $input, $matches)) 57 { 58 if (!empty($matches[1])) $ret[($elements++)] = $matches[1]; 59 $ret[('a' . ($elements++))] = $matches[2]; 60 $input = substr($input, strlen($matches[0])); 61 } 62 if ($input != "") $ret[($elements++)] = $input; //Whatever is left over 63 64 return $ret; 65 } 66 /** 67 * Break a string apart at every occurence of <xxxyyy> and return an array 68 * This method does NOT remove any characters like a preg_split() would do. 69 * Elements matching a quoted string start with "a" followed by the numeric index 70 * @param string The input string to separate 71 * @return array 72 */ 73 public function quoteChunk($input) 74 { 75 $elements = 0; 76 while (preg_match('/^(.*?)(<[\x20-\x3A\x3C-\x7E]*>)/s', $input, $matches)) 77 { 78 if (!empty($matches[1])) $ret[($elements++)] = $matches[1]; 79 $ret[('a' . ($elements++))] = $matches[2]; 80 $input = substr($input, strlen($matches[0])); 81 } 82 if ($input != "") $ret[($elements++)] = $input; //Whatever is left over 83 84 return $ret; 85 } 86 /** 87 * Return the base64 encoded version of the string 88 * @param string The input string to encode 89 * @param int The maximum length of each line of output (inc CRLF) 90 * @param int The maximum length of the first line in the output (for headers) 91 * @param boolean Whether email addresses between < and > chars should be preserved or not 92 * @param string The line ending 93 * @return string 94 */ 95 public function base64Encode($data, $chunk=76, $init_chunk=0, $headers=false, $le="\r\n") 96 { 97 $ret = ""; 98 $chunk -= 2; 99 $chunk = $this->getHcf($chunk, 4); 100 101 if ($init_chunk >= 2) 102 { 103 $init_chunk -= 2; 104 $init_chunk = $this->getHcf($init_chunk, 4); 105 } 106 107 if ($headers) $data = $this->quoteChunk($data); 108 else $data = array($data); 109 110 foreach ($data as $key => $string) 111 { 112 $key = (string) $key; 113 if ($key{0} == 'a') //This is an address 114 { 115 if ($init_chunk && $init_chunk < (strlen($string)+2)) $ret .= $le; 116 $ret .= $le . $string; 117 } 118 else 119 { 120 $string = $this->rawBase64Encode($string); 121 if ($init_chunk > 2) 122 { 123 $ret .= substr($string, 0, $init_chunk) . $le; 124 $string = substr($string, $init_chunk); 125 } 126 elseif ($init_chunk) $ret .= $le; 127 128 $ret .= trim(chunk_split($string, $chunk, $le)) . $le; 129 } 130 $init_chunk = 0; 131 } 132 133 return trim($ret); 134 } 135 /** 136 * Return the base64 encoded version of a string with no breaks 137 * @param The input string to encode 138 * @return string 139 */ 140 public function rawBase64Encode($string) 141 { 142 return $string = base64_encode($string); 143 } 144 /** 145 * Return the base64 encoded version of a file 146 * @param Swift_File The file input stream 147 * @param int Max line length 148 * @param string The line ending 149 * @return Swift_Cache_OutputStream 150 * @throws Swift_FileException If the file cannot be read 151 */ 152 public function base64EncodeFile(Swift_File $file, $chunk=76, $le="\r\n") 153 { 154 Swift_ClassLoader::load("Swift_CacheFactory"); 155 $cache = Swift_CacheFactory::getCache(); 156 $chunk -= 2; 157 $chunk = $this->getHcf($chunk, 4); 158 $loop = false; 159 //We have to read in multiples of 3 bytes but avoid doing such small chunks that it takes too long 160 while (false !== $bytes = $file->read(8190)) 161 { 162 if ($loop) $cache->write("b64", $le); 163 $loop = true; 164 $next = chunk_split($this->rawBase64Encode($bytes), $chunk, $le); 165 $next = trim($next); 166 $cache->write("b64", $next); 167 } 168 $file->reset(); 169 return $cache->getOutputStream("b64"); 170 } 171 /** 172 * Return the quoted printable version of the input string 173 * @param string The input string to encode 174 * @param int The maximum length of each line of output (inc CRLF) 175 * @param int The maximum length of the first line in the output (for headers) 176 * @param boolean Whether email addresses between < and > chars should be preserved or not 177 * @param string The line ending 178 * @return string 179 */ 180 public function QPEncode($data, $chunk=76, $init_chunk=0, $headers=false, $le="\r\n") 181 { 182 $ret = ""; 183 if ($headers) $data = $this->quoteChunk($data); 184 else $data = array($data); 185 186 $trailing_spaces = chr(9) . chr(32); 187 foreach ($data as $key => $string) 188 { 189 $key = (string) $key; 190 if ($key{0} == 'a') //An address 191 { 192 if ($init_chunk && $init_chunk < (strlen($string)+3)) $ret .= "="; 193 $ret .= $le . $string; 194 } 195 else 196 { 197 $lines = explode($le, $string); 198 foreach ($lines as $n => $line) 199 $lines[$n] = $this->rawQPEncode(rtrim($line, $trailing_spaces)); 200 $string = implode($le, $lines); 201 if ($init_chunk > 3) 202 { 203 if (preg_match('/^.{1,'.($init_chunk-5).'}[^=]{2}(?!=[A-F0-9]{2})/', $string, $matches) 204 || preg_match('/^.{1,'.($init_chunk-6).'}([^=]{0,3})?/', $string, $matches)) 205 { 206 $ret .= $this->fixLE($matches[0] . "=", $le); //fixLE added 24/08/07 207 $string = substr($string, strlen($matches[0])); 208 } 209 } 210 elseif ($init_chunk) $ret .= "="; 211 212 while (preg_match('/^.{1,'.($init_chunk-5).'}[^=]{2}(?!=[A-F0-9]{2})/', $string, $matches) 213 || preg_match('/^.{1,'.($chunk-6).'}([^=]{0,3})?/', $string, $matches) 214 || (strlen($string) > 0 && $matches = array($string))) 215 { 216 $ret .= $this->fixLE($le . $matches[0] . "=", $le); //fixLE added 24/08/07 217 $string = substr($string, strlen($matches[0])); 218 } 219 } 220 $init_chunk = 0; 221 } 222 223 if (substr($ret, -1) == "=") return trim(substr($ret, 0, -1)); 224 else return trim($ret); 225 } 226 /** 227 * Return the QP encoded version of a string with no breaks 228 * @param string The input to encode 229 * @param boolean True if the data we're encoding is binary 230 * @return string 231 */ 232 public function rawQPEncode($string, $bin=false) 233 { 234 $ret = ""; 235 if (!$bin) 236 { 237 $string = str_replace(array("\r\n", "\r"), "\n", $string); 238 $string = str_replace("\n", "\r\n", $string); 239 } 240 $len = strlen($string); 241 for ($i = 0; $i < $len; $i++) 242 { 243 $val = ord($string{$i}); 244 //9, 32 = HT, SP; 10, 13 = CR, LF; 33-60 & 62-126 are ok 245 // 63 = '?'; 95 = '_' and need encoding to go in the headers 246 if ((!$bin && ($val == 32 || $val == 9 || $val == 10 || $val == 13)) 247 || ($val >= 33 && $val <= 60) || ($val >= 62 && $val <= 126) 248 && $val != 63) 249 { 250 $ret .= $string{$i}; 251 } 252 else 253 { 254 $ret .= sprintf("=%02X", $val); 255 } 256 } 257 return $ret; 258 } 259 /** 260 * Return a file as a quoted printable encoded string 261 * @param Swift_File The file to encode 262 * @param int Max line length 263 * @param string The line ending 264 * @return Swift_Cache_OutputStream 265 * @throws Swift_FileException If the file cannot be read 266 */ 267 public function QPEncodeFile(Swift_File $file, $chunk=76, $le="\r\n") 268 { 269 Swift_ClassLoader::load("Swift_CacheFactory"); 270 $cache = Swift_CacheFactory::getCache(); 271 while (false !== $bytes = $file->readln()) 272 { 273 $next = $this->rawQPEncode($bytes, true); 274 preg_match_all('/.{1,'.($chunk-6).'}([^=]{0,3})?/', $next, $next); 275 if (count($next[0])) $cache->write("qp", $this->fixLE(implode("=" . $le, $next[0]), $le)); 276 } 277 return $cache->getOutputStream("qp"); 278 } 279 /** 280 * Encode a string as 7bit ascii 281 * @param string Input data to encode 282 * @param int Max line length 283 * @param string The line ending 284 * @return string 285 */ 286 public function encode7Bit($data, $chunk=76, $le="\r\n") 287 { 288 return $this->fixLE(wordwrap($data, $chunk-2, $le, 1), $le); 289 } 290 /** 291 * Return a 7bit string from a file 292 * @param Swift_File The file stream to read from 293 * @param int The max line length 294 * @param string The line ending 295 * @return Swift_Cache_OutputStream 296 * @throws Swift_FileException If the file cannot be read 297 */ 298 public function encode7BitFile(Swift_File $file, $chunk=76, $le="\r\n") 299 { 300 Swift_ClassLoader::load("Swift_CacheFactory"); 301 $cache = Swift_CacheFactory::getCache(); 302 $ret = ""; 303 while (false !== $bytes = $file->read(8192)) $ret .= $bytes; 304 $cache->write("7b", $this->fixLE(wordwrap($ret, $chunk-2, $le, 1), $le)); 305 return $cache->getOutputStream("7b"); 306 } 307 /** 308 * Return the 8bit encoded form of a string (unchanged there-abouts) 309 * @param string Input data to encode 310 * @param int Maximum line length 311 * @param string The line ending 312 * @return string 313 */ 314 public function encode8Bit($data, $chunk=76, $le="\r\n") 315 { 316 return $this->fixLE(wordwrap($data, $chunk-2, $le, 1), $le); 317 } 318 /** 319 * Return a 8bit string from a file 320 * @param Swift_File The file stream to read from 321 * @param int Max line length (including CRLF) 322 * @param string The line ending 323 * @return Swift_Cache_OutputStream 324 * @throws Swift_FileException If the file cannot be read 325 */ 326 public function encode8BitFile(Swift_File $file, $chunk=76, $le="\r\n") 327 { 328 Swift_ClassLoader::load("Swift_CacheFactory"); 329 $cache = Swift_CacheFactory::getCache(); 330 $ret = ""; 331 while (false !== $bytes = $file->read(8192)) $ret .= $bytes; 332 $cache->write("8b", $this->fixLE(wordwrap($ret, $chunk-2, $le, 1), $le)); 333 return $cache->getOutputStream("8b"); 334 } 335 /** 336 * Keeps lines longer than 76 characters trimmed down to size 337 * This currently does not convert other string encodings into 7bit 338 * @param string The data to make safe for headers (defaults to RFC 2822 standards) 339 * @param int maximum length of lines returned 340 * @param int The maximum length of the first line 341 * @param string The Line ending 342 * @return string 343 */ 344 public function header7BitEncode($data, $chunk=76, $init_chunk=0, $le="\r\n") 345 { 346 $data = $this->encode7BitPrintable($data); 347 $ret = ""; 348 if ($init_chunk > 2) 349 { 350 $data_wrapped = wordwrap($data, $init_chunk, $le); 351 $lines = explode($le, $data_wrapped); 352 $first_line = array_shift($lines); 353 $ret .= $first_line . $le; 354 $data = preg_replace("~^[ \t]~D", "", substr($data, strlen($first_line))); 355 } 356 elseif ($init_chunk) $ret .= $le; 357 $ret .= wordwrap($data, $chunk-2, $le); 358 return trim($ret); 359 } 360 /** 361 * Strip out any characters which are not in the ASCII 7bit printable range 362 * @param string The string to clean 363 * @return string 364 */ 365 public function encode7BitPrintable($data) 366 { 367 return preg_replace('/[^\x20-\x7E]/', '', $data); 368 } 369 /** 370 * Detect if a string contains multi-byte non-ascii chars that fall in the UTF-8 ranges 371 * @param string Data to detect UTF-8 sequences in 372 * @return boolean 373 */ 374 public function isUTF8($data) 375 { 376 return preg_match('%(?: 377 [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte 378 |\xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs 379 |[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte 380 |\xED[\x80-\x9F][\x80-\xBF] # excluding surrogates 381 |\xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 382 |[\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 383 |\xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 384 )+%xs', $data); 385 } 386 /** 387 * This function checks for 7bit *printable* characters 388 * which excludes \r \n \t etc and so, is safe for use in mail headers 389 * Actual permitted chars [\ !"#\$%&'\(\)\*\+,-\.\/0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]\^_`abcdefghijklmnopqrstuvwxyz{\|}~] 390 * Ranges \x00-\x1F are printer control sequences 391 * \x7F is the ascii delete character 392 * @param string Data to check against 393 * @return boolean 394 */ 395 public function is7BitPrintable($data) 396 { 397 return (!preg_match('/[^\x20-\x7E]/', $data)); 398 } 399 /** 400 * Check that a string does not contain any evil characters for headers. 401 * @param string The string to check 402 * @return boolean 403 */ 404 public function isHeaderSafe($data) 405 { 406 return ($this->is7BitPrintable($data) && strpos($data, ";") === false); 407 } 408 /** 409 * If the characters fall exclusively in the 7bit ascii range, return true 410 * @param string Input to check 411 * @return boolean 412 */ 413 public function is7BitAscii($data) 414 { 415 return (!preg_match('/[^\x01-\x7F]/', $data)); 416 } 417 /** 418 * Encode a string for RFC 2047 compatability (url-encode) 419 * @param string The input for encoding 420 * @param string The charset used 421 * @param string The language used 422 * @param int The maximum line length 423 * @param int The maximum length of the first line 424 * @param string The line ending 425 * @return string 426 */ 427 public function rfc2047Encode($str, $charset="iso-8859-1", $language="en-us", $chunk=76, $le="\r\n") 428 { 429 $lang_spec = ""; 430 if (!$this->is7BitPrintable($str)) 431 { 432 $lang_spec = $charset . "'" . $language . "'"; 433 $str = $lang_spec . str_replace("+", "%20", urlencode($str)); 434 } 435 preg_match_all('~.{1,'.($chunk-6).'}([^%]{0,3})~', $str, $matches); 436 if (count($matches[0])) return implode($le, $matches[0]); 437 } 438 /** 439 * Fixes line endings to be whatever is specified by the user 440 * SMTP requires the CRLF be used, but using sendmail in -t mode uses LF 441 * This method also escapes dots on a start of line to avoid injection 442 * @param string The data to fix 443 * @return string 444 */ 445 protected function fixLE($data, $le) 446 { 447 $data = str_replace(array("\r\n", "\r"), "\n", $data); 448 if ($le != "\n") $data = str_replace("\n", $le, $data); 449 return $data = str_replace($le . ".", $le . "..", $data); 450 } 451 protected function getHcf($value, $factor) 452 { 453 return ($value - ($value % $factor)); 454 } 455} 456