1<?php 2 3namespace dokuwiki; 4 5use dokuwiki\Utf8\Conversion; 6 7/** 8 * Stateless email-address utilities: obfuscation, validation, and quoted-printable body encoding. 9 */ 10class MailUtils 11{ 12 /** 13 * RFC 2822 atext characters (paras 3.4.1 & 3.2.4). 14 * 15 * NOTE: the unquoted '/' must remain unquoted to be usable as part of a 16 * Lexer pattern; pick the surrounding pattern delimiters with care. 17 */ 18 public const RFC2822_ATEXT = "0-9a-zA-Z!#$%&'*+/=?^_`{|}~-"; 19 20 /** 21 * Pattern for use in email detection and validation. 22 * 23 * Uses non-capturing groups since the parser does not allow captures. 24 */ 25 public const PREG_PATTERN_VALID_EMAIL = 26 '[' . self::RFC2822_ATEXT . ']+(?:\.[' . self::RFC2822_ATEXT . ']+)*' 27 . '@(?i:[0-9a-z][0-9a-z-]*\.)+(?i:[a-z]{2,63})'; 28 29 // region email-address obfuscation 30 31 /** 32 * Return an obfuscated email address suitable for HTML text content 33 * (link labels, titles). 34 * 35 * The caller MUST pass a raw, unescaped string; the result is 36 * HTML-text-safe. Any query string after the first '?' is preserved 37 * verbatim and is never run through the [at]/[dot]/[dash] substitution, 38 * so dots and dashes inside body/subject values stay intact. 39 * 40 * @param string $email raw email address, optionally followed by ?query 41 * @return string HTML-text-safe representation 42 */ 43 public static function obfuscate(string $email): string 44 { 45 global $conf; 46 47 [$addr, $query] = sexplode('?', $email, 2); 48 $out = self::obfuscateAddress($addr); 49 // 'hex' output is already pure ASCII numeric entities → HTML-safe. 50 // For 'none'/'visible' the address half still needs HTML escaping. 51 if ($conf['mailguard'] !== 'hex') { 52 $out = htmlspecialchars($out, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); 53 } 54 if ($query !== null) { 55 $out .= '?' . htmlspecialchars($query, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); 56 } 57 return $out; 58 } 59 60 /** 61 * Return an obfuscated email address suitable for use as a mailto: href 62 * value (HTML attribute context). 63 * 64 * Like obfuscate() but for HTML attribute context. The caller MUST pass a 65 * raw, unescaped string. The address half is obfuscated per the mailguard 66 * setting; in 'visible' mode the address (with its [at]/[dot] spaces) is 67 * percent-encoded so the URL is well-formed. The query string is 68 * preserved verbatim with only HTML-attribute escaping applied, so mail 69 * clients receive correct subject/body separators. 70 * 71 * @param string $email raw email address, optionally followed by ?query 72 * @return string HTML-attribute-safe URL fragment (without 'mailto:' prefix) 73 */ 74 public static function obfuscateUrl(string $email): string 75 { 76 global $conf; 77 78 [$addr, $query] = sexplode('?', $email, 2); 79 $addr = self::obfuscateAddress($addr); 80 if ($conf['mailguard'] === 'visible') { 81 $addr = rawurlencode($addr); 82 } 83 if ($conf['mailguard'] !== 'hex') { 84 $addr = htmlspecialchars($addr, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); 85 } 86 if ($query !== null) { 87 $addr .= '?' . htmlspecialchars($query, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); 88 } 89 return $addr; 90 } 91 92 /** 93 * Apply the configured mailguard mode to the address half of a mailto 94 * target. Returns hex-mode output as numeric entities (HTML-safe); 95 * visible/none modes return raw text that still needs HTML escaping. 96 * 97 * @param string $addr raw local@domain 98 * @return string 99 */ 100 protected static function obfuscateAddress(string $addr): string 101 { 102 global $conf; 103 104 return match ($conf['mailguard']) { 105 'visible' => strtr($addr, ['@' => ' [at] ', '.' => ' [dot] ', '-' => ' [dash] ']), 106 'hex' => Conversion::toHtml($addr, true), 107 default => $addr, 108 }; 109 } 110 111 // endregion 112 // region outgoing-mail helpers 113 114 /** 115 * Check if a given mail address is valid. 116 * 117 * @param string $email the address to check 118 * @return bool true if address is valid 119 */ 120 public static function isValid(string $email): bool 121 { 122 return \EmailAddressValidator::checkEmailAddress($email, true); 123 } 124 125 /** 126 * RFC 2045 quoted-printable encoding. 127 * 128 * @param string $sText 129 * @param int $maxlen 130 * @param bool $bEmulate_imap_8bit 131 * @return string 132 * @author umu <umuAThrz.tu-chemnitz.de> 133 * @link http://php.net/manual/en/function.imap-8bit.php#61216 134 * 135 */ 136 public static function quotedPrintableEncode( 137 string $sText, 138 int $maxlen = 74, 139 bool $bEmulate_imap_8bit = true 140 ): string 141 { 142 // split text into lines 143 $aLines = preg_split("/(?:\r\n|\r|\n)/", $sText); 144 $cnt = count($aLines); 145 146 for ($i = 0; $i < $cnt; $i++) { 147 $sLine =& $aLines[$i]; 148 if ($sLine === '') continue; // do nothing, if empty 149 150 $sRegExp = '/[^\x09\x20\x21-\x3C\x3E-\x7E]/e'; 151 152 // imap_8bit encodes x09 everywhere, not only at lineends, 153 // for EBCDIC safeness encode !"#$@[\]^`{|}~, 154 // for complete safeness encode every character :) 155 if ($bEmulate_imap_8bit) 156 $sRegExp = '/[^\x20\x21-\x3C\x3E-\x7E]/'; 157 158 $sLine = preg_replace_callback( 159 $sRegExp, 160 static fn(array $matches): string => sprintf("=%02X", ord($matches[0])), 161 $sLine 162 ); 163 164 // encode x09,x20 at lineends 165 $iLength = strlen($sLine); 166 $iLastChar = ord($sLine[$iLength - 1]); 167 168 // imap_8_bit does not encode x20 at the very end of a text, 169 // here is, where I don't agree with imap_8_bit, 170 // please correct me, if I'm wrong, 171 // or comment next line for RFC2045 conformance, if you like 172 if (!($bEmulate_imap_8bit && ($i == count($aLines) - 1))) { 173 if (($iLastChar == 0x09) || ($iLastChar == 0x20)) { 174 $sLine[$iLength - 1] = '='; 175 $sLine .= ($iLastChar == 0x09) ? '09' : '20'; 176 } 177 } 178 179 // imap_8bit encodes x20 before chr(13), too 180 // although IMHO not requested by RFC2045, why not do it safer :) 181 // and why not encode any x20 around chr(10) or chr(13) 182 if ($bEmulate_imap_8bit) { 183 $sLine = str_replace(' =0D', '=20=0D', $sLine); 184 //$sLine=str_replace(' =0A','=20=0A',$sLine); 185 //$sLine=str_replace('=0D ','=0D=20',$sLine); 186 //$sLine=str_replace('=0A ','=0A=20',$sLine); 187 } 188 189 // finally split into softlines no longer than $maxlen chars, 190 // for even more safeness one could encode x09,x20 191 // at the very first character of the line 192 // and after soft linebreaks, as well, 193 // but this wouldn't be caught by such an easy RegExp 194 if ($maxlen) { 195 preg_match_all('/.{1,' . ($maxlen - 2) . '}([^=]{0,2})?/', $sLine, $aMatch); 196 $sLine = implode('=' . MAILHEADER_EOL, $aMatch[0]); // add soft crlf's 197 } 198 } 199 200 // join lines into text 201 return implode(MAILHEADER_EOL, $aLines); 202 } 203 204 // endregion 205} 206