1*73dc0a89SAndreas Gohr<?php 2*73dc0a89SAndreas Gohr 3*73dc0a89SAndreas Gohrnamespace dokuwiki; 4*73dc0a89SAndreas Gohr 5*73dc0a89SAndreas Gohruse dokuwiki\Utf8\Conversion; 6*73dc0a89SAndreas Gohr 7*73dc0a89SAndreas Gohr/** 8*73dc0a89SAndreas Gohr * Stateless email-address utilities: obfuscation, validation, and quoted-printable body encoding. 9*73dc0a89SAndreas Gohr */ 10*73dc0a89SAndreas Gohrclass MailUtils 11*73dc0a89SAndreas Gohr{ 12*73dc0a89SAndreas Gohr /** 13*73dc0a89SAndreas Gohr * RFC 2822 atext characters (paras 3.4.1 & 3.2.4). 14*73dc0a89SAndreas Gohr * 15*73dc0a89SAndreas Gohr * NOTE: the unquoted '/' must remain unquoted to be usable as part of a 16*73dc0a89SAndreas Gohr * Lexer pattern; pick the surrounding pattern delimiters with care. 17*73dc0a89SAndreas Gohr */ 18*73dc0a89SAndreas Gohr public const RFC2822_ATEXT = "0-9a-zA-Z!#$%&'*+/=?^_`{|}~-"; 19*73dc0a89SAndreas Gohr 20*73dc0a89SAndreas Gohr /** 21*73dc0a89SAndreas Gohr * Pattern for use in email detection and validation. 22*73dc0a89SAndreas Gohr * 23*73dc0a89SAndreas Gohr * Uses non-capturing groups since the parser does not allow captures. 24*73dc0a89SAndreas Gohr */ 25*73dc0a89SAndreas Gohr public const PREG_PATTERN_VALID_EMAIL = 26*73dc0a89SAndreas Gohr '[' . self::RFC2822_ATEXT . ']+(?:\.[' . self::RFC2822_ATEXT . ']+)*' 27*73dc0a89SAndreas Gohr . '@(?i:[0-9a-z][0-9a-z-]*\.)+(?i:[a-z]{2,63})'; 28*73dc0a89SAndreas Gohr 29*73dc0a89SAndreas Gohr // region email-address obfuscation 30*73dc0a89SAndreas Gohr 31*73dc0a89SAndreas Gohr /** 32*73dc0a89SAndreas Gohr * Return an obfuscated email address suitable for HTML text content 33*73dc0a89SAndreas Gohr * (link labels, titles). 34*73dc0a89SAndreas Gohr * 35*73dc0a89SAndreas Gohr * The caller MUST pass a raw, unescaped string; the result is 36*73dc0a89SAndreas Gohr * HTML-text-safe. Any query string after the first '?' is preserved 37*73dc0a89SAndreas Gohr * verbatim and is never run through the [at]/[dot]/[dash] substitution, 38*73dc0a89SAndreas Gohr * so dots and dashes inside body/subject values stay intact. 39*73dc0a89SAndreas Gohr * 40*73dc0a89SAndreas Gohr * @param string $email raw email address, optionally followed by ?query 41*73dc0a89SAndreas Gohr * @return string HTML-text-safe representation 42*73dc0a89SAndreas Gohr */ 43*73dc0a89SAndreas Gohr public static function obfuscate(string $email): string 44*73dc0a89SAndreas Gohr { 45*73dc0a89SAndreas Gohr global $conf; 46*73dc0a89SAndreas Gohr 47*73dc0a89SAndreas Gohr [$addr, $query] = sexplode('?', $email, 2); 48*73dc0a89SAndreas Gohr $out = self::obfuscateAddress($addr); 49*73dc0a89SAndreas Gohr // 'hex' output is already pure ASCII numeric entities → HTML-safe. 50*73dc0a89SAndreas Gohr // For 'none'/'visible' the address half still needs HTML escaping. 51*73dc0a89SAndreas Gohr if ($conf['mailguard'] !== 'hex') { 52*73dc0a89SAndreas Gohr $out = htmlspecialchars($out, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); 53*73dc0a89SAndreas Gohr } 54*73dc0a89SAndreas Gohr if ($query !== null) { 55*73dc0a89SAndreas Gohr $out .= '?' . htmlspecialchars($query, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); 56*73dc0a89SAndreas Gohr } 57*73dc0a89SAndreas Gohr return $out; 58*73dc0a89SAndreas Gohr } 59*73dc0a89SAndreas Gohr 60*73dc0a89SAndreas Gohr /** 61*73dc0a89SAndreas Gohr * Return an obfuscated email address suitable for use as a mailto: href 62*73dc0a89SAndreas Gohr * value (HTML attribute context). 63*73dc0a89SAndreas Gohr * 64*73dc0a89SAndreas Gohr * Like obfuscate() but for HTML attribute context. The caller MUST pass a 65*73dc0a89SAndreas Gohr * raw, unescaped string. The address half is obfuscated per the mailguard 66*73dc0a89SAndreas Gohr * setting; in 'visible' mode the address (with its [at]/[dot] spaces) is 67*73dc0a89SAndreas Gohr * percent-encoded so the URL is well-formed. The query string is 68*73dc0a89SAndreas Gohr * preserved verbatim with only HTML-attribute escaping applied, so mail 69*73dc0a89SAndreas Gohr * clients receive correct subject/body separators. 70*73dc0a89SAndreas Gohr * 71*73dc0a89SAndreas Gohr * @param string $email raw email address, optionally followed by ?query 72*73dc0a89SAndreas Gohr * @return string HTML-attribute-safe URL fragment (without 'mailto:' prefix) 73*73dc0a89SAndreas Gohr */ 74*73dc0a89SAndreas Gohr public static function obfuscateUrl(string $email): string 75*73dc0a89SAndreas Gohr { 76*73dc0a89SAndreas Gohr global $conf; 77*73dc0a89SAndreas Gohr 78*73dc0a89SAndreas Gohr [$addr, $query] = sexplode('?', $email, 2); 79*73dc0a89SAndreas Gohr $addr = self::obfuscateAddress($addr); 80*73dc0a89SAndreas Gohr if ($conf['mailguard'] === 'visible') { 81*73dc0a89SAndreas Gohr $addr = rawurlencode($addr); 82*73dc0a89SAndreas Gohr } 83*73dc0a89SAndreas Gohr if ($conf['mailguard'] !== 'hex') { 84*73dc0a89SAndreas Gohr $addr = htmlspecialchars($addr, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); 85*73dc0a89SAndreas Gohr } 86*73dc0a89SAndreas Gohr if ($query !== null) { 87*73dc0a89SAndreas Gohr $addr .= '?' . htmlspecialchars($query, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); 88*73dc0a89SAndreas Gohr } 89*73dc0a89SAndreas Gohr return $addr; 90*73dc0a89SAndreas Gohr } 91*73dc0a89SAndreas Gohr 92*73dc0a89SAndreas Gohr /** 93*73dc0a89SAndreas Gohr * Apply the configured mailguard mode to the address half of a mailto 94*73dc0a89SAndreas Gohr * target. Returns hex-mode output as numeric entities (HTML-safe); 95*73dc0a89SAndreas Gohr * visible/none modes return raw text that still needs HTML escaping. 96*73dc0a89SAndreas Gohr * 97*73dc0a89SAndreas Gohr * @param string $addr raw local@domain 98*73dc0a89SAndreas Gohr * @return string 99*73dc0a89SAndreas Gohr */ 100*73dc0a89SAndreas Gohr protected static function obfuscateAddress(string $addr): string 101*73dc0a89SAndreas Gohr { 102*73dc0a89SAndreas Gohr global $conf; 103*73dc0a89SAndreas Gohr 104*73dc0a89SAndreas Gohr return match ($conf['mailguard']) { 105*73dc0a89SAndreas Gohr 'visible' => strtr($addr, ['@' => ' [at] ', '.' => ' [dot] ', '-' => ' [dash] ']), 106*73dc0a89SAndreas Gohr 'hex' => Conversion::toHtml($addr, true), 107*73dc0a89SAndreas Gohr default => $addr, 108*73dc0a89SAndreas Gohr }; 109*73dc0a89SAndreas Gohr } 110*73dc0a89SAndreas Gohr 111*73dc0a89SAndreas Gohr // endregion 112*73dc0a89SAndreas Gohr // region outgoing-mail helpers 113*73dc0a89SAndreas Gohr 114*73dc0a89SAndreas Gohr /** 115*73dc0a89SAndreas Gohr * Check if a given mail address is valid. 116*73dc0a89SAndreas Gohr * 117*73dc0a89SAndreas Gohr * @param string $email the address to check 118*73dc0a89SAndreas Gohr * @return bool true if address is valid 119*73dc0a89SAndreas Gohr */ 120*73dc0a89SAndreas Gohr public static function isValid(string $email): bool 121*73dc0a89SAndreas Gohr { 122*73dc0a89SAndreas Gohr return \EmailAddressValidator::checkEmailAddress($email, true); 123*73dc0a89SAndreas Gohr } 124*73dc0a89SAndreas Gohr 125*73dc0a89SAndreas Gohr /** 126*73dc0a89SAndreas Gohr * RFC 2045 quoted-printable encoding. 127*73dc0a89SAndreas Gohr * 128*73dc0a89SAndreas Gohr * @param string $sText 129*73dc0a89SAndreas Gohr * @param int $maxlen 130*73dc0a89SAndreas Gohr * @param bool $bEmulate_imap_8bit 131*73dc0a89SAndreas Gohr * @return string 132*73dc0a89SAndreas Gohr * @author umu <umuAThrz.tu-chemnitz.de> 133*73dc0a89SAndreas Gohr * @link http://php.net/manual/en/function.imap-8bit.php#61216 134*73dc0a89SAndreas Gohr * 135*73dc0a89SAndreas Gohr */ 136*73dc0a89SAndreas Gohr public static function quotedPrintableEncode( 137*73dc0a89SAndreas Gohr string $sText, 138*73dc0a89SAndreas Gohr int $maxlen = 74, 139*73dc0a89SAndreas Gohr bool $bEmulate_imap_8bit = true 140*73dc0a89SAndreas Gohr ): string 141*73dc0a89SAndreas Gohr { 142*73dc0a89SAndreas Gohr // split text into lines 143*73dc0a89SAndreas Gohr $aLines = preg_split("/(?:\r\n|\r|\n)/", $sText); 144*73dc0a89SAndreas Gohr $cnt = count($aLines); 145*73dc0a89SAndreas Gohr 146*73dc0a89SAndreas Gohr for ($i = 0; $i < $cnt; $i++) { 147*73dc0a89SAndreas Gohr $sLine =& $aLines[$i]; 148*73dc0a89SAndreas Gohr if ($sLine === '') continue; // do nothing, if empty 149*73dc0a89SAndreas Gohr 150*73dc0a89SAndreas Gohr $sRegExp = '/[^\x09\x20\x21-\x3C\x3E-\x7E]/e'; 151*73dc0a89SAndreas Gohr 152*73dc0a89SAndreas Gohr // imap_8bit encodes x09 everywhere, not only at lineends, 153*73dc0a89SAndreas Gohr // for EBCDIC safeness encode !"#$@[\]^`{|}~, 154*73dc0a89SAndreas Gohr // for complete safeness encode every character :) 155*73dc0a89SAndreas Gohr if ($bEmulate_imap_8bit) 156*73dc0a89SAndreas Gohr $sRegExp = '/[^\x20\x21-\x3C\x3E-\x7E]/'; 157*73dc0a89SAndreas Gohr 158*73dc0a89SAndreas Gohr $sLine = preg_replace_callback( 159*73dc0a89SAndreas Gohr $sRegExp, 160*73dc0a89SAndreas Gohr static fn(array $matches): string => sprintf("=%02X", ord($matches[0])), 161*73dc0a89SAndreas Gohr $sLine 162*73dc0a89SAndreas Gohr ); 163*73dc0a89SAndreas Gohr 164*73dc0a89SAndreas Gohr // encode x09,x20 at lineends 165*73dc0a89SAndreas Gohr $iLength = strlen($sLine); 166*73dc0a89SAndreas Gohr $iLastChar = ord($sLine[$iLength - 1]); 167*73dc0a89SAndreas Gohr 168*73dc0a89SAndreas Gohr // imap_8_bit does not encode x20 at the very end of a text, 169*73dc0a89SAndreas Gohr // here is, where I don't agree with imap_8_bit, 170*73dc0a89SAndreas Gohr // please correct me, if I'm wrong, 171*73dc0a89SAndreas Gohr // or comment next line for RFC2045 conformance, if you like 172*73dc0a89SAndreas Gohr if (!($bEmulate_imap_8bit && ($i == count($aLines) - 1))) { 173*73dc0a89SAndreas Gohr if (($iLastChar == 0x09) || ($iLastChar == 0x20)) { 174*73dc0a89SAndreas Gohr $sLine[$iLength - 1] = '='; 175*73dc0a89SAndreas Gohr $sLine .= ($iLastChar == 0x09) ? '09' : '20'; 176*73dc0a89SAndreas Gohr } 177*73dc0a89SAndreas Gohr } 178*73dc0a89SAndreas Gohr 179*73dc0a89SAndreas Gohr // imap_8bit encodes x20 before chr(13), too 180*73dc0a89SAndreas Gohr // although IMHO not requested by RFC2045, why not do it safer :) 181*73dc0a89SAndreas Gohr // and why not encode any x20 around chr(10) or chr(13) 182*73dc0a89SAndreas Gohr if ($bEmulate_imap_8bit) { 183*73dc0a89SAndreas Gohr $sLine = str_replace(' =0D', '=20=0D', $sLine); 184*73dc0a89SAndreas Gohr //$sLine=str_replace(' =0A','=20=0A',$sLine); 185*73dc0a89SAndreas Gohr //$sLine=str_replace('=0D ','=0D=20',$sLine); 186*73dc0a89SAndreas Gohr //$sLine=str_replace('=0A ','=0A=20',$sLine); 187*73dc0a89SAndreas Gohr } 188*73dc0a89SAndreas Gohr 189*73dc0a89SAndreas Gohr // finally split into softlines no longer than $maxlen chars, 190*73dc0a89SAndreas Gohr // for even more safeness one could encode x09,x20 191*73dc0a89SAndreas Gohr // at the very first character of the line 192*73dc0a89SAndreas Gohr // and after soft linebreaks, as well, 193*73dc0a89SAndreas Gohr // but this wouldn't be caught by such an easy RegExp 194*73dc0a89SAndreas Gohr if ($maxlen) { 195*73dc0a89SAndreas Gohr preg_match_all('/.{1,' . ($maxlen - 2) . '}([^=]{0,2})?/', $sLine, $aMatch); 196*73dc0a89SAndreas Gohr $sLine = implode('=' . MAILHEADER_EOL, $aMatch[0]); // add soft crlf's 197*73dc0a89SAndreas Gohr } 198*73dc0a89SAndreas Gohr } 199*73dc0a89SAndreas Gohr 200*73dc0a89SAndreas Gohr // join lines into text 201*73dc0a89SAndreas Gohr return implode(MAILHEADER_EOL, $aLines); 202*73dc0a89SAndreas Gohr } 203*73dc0a89SAndreas Gohr 204*73dc0a89SAndreas Gohr // endregion 205*73dc0a89SAndreas Gohr} 206