xref: /dokuwiki/inc/MailUtils.php (revision 73dc0a8919857718a3b64a4c0741b57580a34b2a)
1*73dc0a89SAndreas Gohr<?php
2*73dc0a89SAndreas Gohr
3*73dc0a89SAndreas Gohrnamespace dokuwiki;
4*73dc0a89SAndreas Gohr
5*73dc0a89SAndreas Gohruse dokuwiki\Utf8\Conversion;
6*73dc0a89SAndreas Gohr
7*73dc0a89SAndreas Gohr/**
8*73dc0a89SAndreas Gohr * Stateless email-address utilities: obfuscation, validation, and quoted-printable body encoding.
9*73dc0a89SAndreas Gohr */
10*73dc0a89SAndreas Gohrclass MailUtils
11*73dc0a89SAndreas Gohr{
12*73dc0a89SAndreas Gohr    /**
13*73dc0a89SAndreas Gohr     * RFC 2822 atext characters (paras 3.4.1 & 3.2.4).
14*73dc0a89SAndreas Gohr     *
15*73dc0a89SAndreas Gohr     * NOTE: the unquoted '/' must remain unquoted to be usable as part of a
16*73dc0a89SAndreas Gohr     * Lexer pattern; pick the surrounding pattern delimiters with care.
17*73dc0a89SAndreas Gohr     */
18*73dc0a89SAndreas Gohr    public const RFC2822_ATEXT = "0-9a-zA-Z!#$%&'*+/=?^_`{|}~-";
19*73dc0a89SAndreas Gohr
20*73dc0a89SAndreas Gohr    /**
21*73dc0a89SAndreas Gohr     * Pattern for use in email detection and validation.
22*73dc0a89SAndreas Gohr     *
23*73dc0a89SAndreas Gohr     * Uses non-capturing groups since the parser does not allow captures.
24*73dc0a89SAndreas Gohr     */
25*73dc0a89SAndreas Gohr    public const PREG_PATTERN_VALID_EMAIL =
26*73dc0a89SAndreas Gohr        '[' . self::RFC2822_ATEXT . ']+(?:\.[' . self::RFC2822_ATEXT . ']+)*'
27*73dc0a89SAndreas Gohr        . '@(?i:[0-9a-z][0-9a-z-]*\.)+(?i:[a-z]{2,63})';
28*73dc0a89SAndreas Gohr
29*73dc0a89SAndreas Gohr    // region email-address obfuscation
30*73dc0a89SAndreas Gohr
31*73dc0a89SAndreas Gohr    /**
32*73dc0a89SAndreas Gohr     * Return an obfuscated email address suitable for HTML text content
33*73dc0a89SAndreas Gohr     * (link labels, titles).
34*73dc0a89SAndreas Gohr     *
35*73dc0a89SAndreas Gohr     * The caller MUST pass a raw, unescaped string; the result is
36*73dc0a89SAndreas Gohr     * HTML-text-safe. Any query string after the first '?' is preserved
37*73dc0a89SAndreas Gohr     * verbatim and is never run through the [at]/[dot]/[dash] substitution,
38*73dc0a89SAndreas Gohr     * so dots and dashes inside body/subject values stay intact.
39*73dc0a89SAndreas Gohr     *
40*73dc0a89SAndreas Gohr     * @param string $email raw email address, optionally followed by ?query
41*73dc0a89SAndreas Gohr     * @return string HTML-text-safe representation
42*73dc0a89SAndreas Gohr     */
43*73dc0a89SAndreas Gohr    public static function obfuscate(string $email): string
44*73dc0a89SAndreas Gohr    {
45*73dc0a89SAndreas Gohr        global $conf;
46*73dc0a89SAndreas Gohr
47*73dc0a89SAndreas Gohr        [$addr, $query] = sexplode('?', $email, 2);
48*73dc0a89SAndreas Gohr        $out = self::obfuscateAddress($addr);
49*73dc0a89SAndreas Gohr        // 'hex' output is already pure ASCII numeric entities → HTML-safe.
50*73dc0a89SAndreas Gohr        // For 'none'/'visible' the address half still needs HTML escaping.
51*73dc0a89SAndreas Gohr        if ($conf['mailguard'] !== 'hex') {
52*73dc0a89SAndreas Gohr            $out = htmlspecialchars($out, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
53*73dc0a89SAndreas Gohr        }
54*73dc0a89SAndreas Gohr        if ($query !== null) {
55*73dc0a89SAndreas Gohr            $out .= '?' . htmlspecialchars($query, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
56*73dc0a89SAndreas Gohr        }
57*73dc0a89SAndreas Gohr        return $out;
58*73dc0a89SAndreas Gohr    }
59*73dc0a89SAndreas Gohr
60*73dc0a89SAndreas Gohr    /**
61*73dc0a89SAndreas Gohr     * Return an obfuscated email address suitable for use as a mailto: href
62*73dc0a89SAndreas Gohr     * value (HTML attribute context).
63*73dc0a89SAndreas Gohr     *
64*73dc0a89SAndreas Gohr     * Like obfuscate() but for HTML attribute context. The caller MUST pass a
65*73dc0a89SAndreas Gohr     * raw, unescaped string. The address half is obfuscated per the mailguard
66*73dc0a89SAndreas Gohr     * setting; in 'visible' mode the address (with its [at]/[dot] spaces) is
67*73dc0a89SAndreas Gohr     * percent-encoded so the URL is well-formed. The query string is
68*73dc0a89SAndreas Gohr     * preserved verbatim with only HTML-attribute escaping applied, so mail
69*73dc0a89SAndreas Gohr     * clients receive correct subject/body separators.
70*73dc0a89SAndreas Gohr     *
71*73dc0a89SAndreas Gohr     * @param string $email raw email address, optionally followed by ?query
72*73dc0a89SAndreas Gohr     * @return string HTML-attribute-safe URL fragment (without 'mailto:' prefix)
73*73dc0a89SAndreas Gohr     */
74*73dc0a89SAndreas Gohr    public static function obfuscateUrl(string $email): string
75*73dc0a89SAndreas Gohr    {
76*73dc0a89SAndreas Gohr        global $conf;
77*73dc0a89SAndreas Gohr
78*73dc0a89SAndreas Gohr        [$addr, $query] = sexplode('?', $email, 2);
79*73dc0a89SAndreas Gohr        $addr = self::obfuscateAddress($addr);
80*73dc0a89SAndreas Gohr        if ($conf['mailguard'] === 'visible') {
81*73dc0a89SAndreas Gohr            $addr = rawurlencode($addr);
82*73dc0a89SAndreas Gohr        }
83*73dc0a89SAndreas Gohr        if ($conf['mailguard'] !== 'hex') {
84*73dc0a89SAndreas Gohr            $addr = htmlspecialchars($addr, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
85*73dc0a89SAndreas Gohr        }
86*73dc0a89SAndreas Gohr        if ($query !== null) {
87*73dc0a89SAndreas Gohr            $addr .= '?' . htmlspecialchars($query, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
88*73dc0a89SAndreas Gohr        }
89*73dc0a89SAndreas Gohr        return $addr;
90*73dc0a89SAndreas Gohr    }
91*73dc0a89SAndreas Gohr
92*73dc0a89SAndreas Gohr    /**
93*73dc0a89SAndreas Gohr     * Apply the configured mailguard mode to the address half of a mailto
94*73dc0a89SAndreas Gohr     * target. Returns hex-mode output as numeric entities (HTML-safe);
95*73dc0a89SAndreas Gohr     * visible/none modes return raw text that still needs HTML escaping.
96*73dc0a89SAndreas Gohr     *
97*73dc0a89SAndreas Gohr     * @param string $addr raw local@domain
98*73dc0a89SAndreas Gohr     * @return string
99*73dc0a89SAndreas Gohr     */
100*73dc0a89SAndreas Gohr    protected static function obfuscateAddress(string $addr): string
101*73dc0a89SAndreas Gohr    {
102*73dc0a89SAndreas Gohr        global $conf;
103*73dc0a89SAndreas Gohr
104*73dc0a89SAndreas Gohr        return match ($conf['mailguard']) {
105*73dc0a89SAndreas Gohr            'visible' => strtr($addr, ['@' => ' [at] ', '.' => ' [dot] ', '-' => ' [dash] ']),
106*73dc0a89SAndreas Gohr            'hex' => Conversion::toHtml($addr, true),
107*73dc0a89SAndreas Gohr            default => $addr,
108*73dc0a89SAndreas Gohr        };
109*73dc0a89SAndreas Gohr    }
110*73dc0a89SAndreas Gohr
111*73dc0a89SAndreas Gohr    // endregion
112*73dc0a89SAndreas Gohr    // region outgoing-mail helpers
113*73dc0a89SAndreas Gohr
114*73dc0a89SAndreas Gohr    /**
115*73dc0a89SAndreas Gohr     * Check if a given mail address is valid.
116*73dc0a89SAndreas Gohr     *
117*73dc0a89SAndreas Gohr     * @param string $email the address to check
118*73dc0a89SAndreas Gohr     * @return bool true if address is valid
119*73dc0a89SAndreas Gohr     */
120*73dc0a89SAndreas Gohr    public static function isValid(string $email): bool
121*73dc0a89SAndreas Gohr    {
122*73dc0a89SAndreas Gohr        return \EmailAddressValidator::checkEmailAddress($email, true);
123*73dc0a89SAndreas Gohr    }
124*73dc0a89SAndreas Gohr
125*73dc0a89SAndreas Gohr    /**
126*73dc0a89SAndreas Gohr     * RFC 2045 quoted-printable encoding.
127*73dc0a89SAndreas Gohr     *
128*73dc0a89SAndreas Gohr     * @param string $sText
129*73dc0a89SAndreas Gohr     * @param int $maxlen
130*73dc0a89SAndreas Gohr     * @param bool $bEmulate_imap_8bit
131*73dc0a89SAndreas Gohr     * @return string
132*73dc0a89SAndreas Gohr     * @author umu <umuAThrz.tu-chemnitz.de>
133*73dc0a89SAndreas Gohr     * @link   http://php.net/manual/en/function.imap-8bit.php#61216
134*73dc0a89SAndreas Gohr     *
135*73dc0a89SAndreas Gohr     */
136*73dc0a89SAndreas Gohr    public static function quotedPrintableEncode(
137*73dc0a89SAndreas Gohr        string $sText,
138*73dc0a89SAndreas Gohr        int    $maxlen = 74,
139*73dc0a89SAndreas Gohr        bool   $bEmulate_imap_8bit = true
140*73dc0a89SAndreas Gohr    ): string
141*73dc0a89SAndreas Gohr    {
142*73dc0a89SAndreas Gohr        // split text into lines
143*73dc0a89SAndreas Gohr        $aLines = preg_split("/(?:\r\n|\r|\n)/", $sText);
144*73dc0a89SAndreas Gohr        $cnt = count($aLines);
145*73dc0a89SAndreas Gohr
146*73dc0a89SAndreas Gohr        for ($i = 0; $i < $cnt; $i++) {
147*73dc0a89SAndreas Gohr            $sLine =& $aLines[$i];
148*73dc0a89SAndreas Gohr            if ($sLine === '') continue; // do nothing, if empty
149*73dc0a89SAndreas Gohr
150*73dc0a89SAndreas Gohr            $sRegExp = '/[^\x09\x20\x21-\x3C\x3E-\x7E]/e';
151*73dc0a89SAndreas Gohr
152*73dc0a89SAndreas Gohr            // imap_8bit encodes x09 everywhere, not only at lineends,
153*73dc0a89SAndreas Gohr            // for EBCDIC safeness encode !"#$@[\]^`{|}~,
154*73dc0a89SAndreas Gohr            // for complete safeness encode every character :)
155*73dc0a89SAndreas Gohr            if ($bEmulate_imap_8bit)
156*73dc0a89SAndreas Gohr                $sRegExp = '/[^\x20\x21-\x3C\x3E-\x7E]/';
157*73dc0a89SAndreas Gohr
158*73dc0a89SAndreas Gohr            $sLine = preg_replace_callback(
159*73dc0a89SAndreas Gohr                $sRegExp,
160*73dc0a89SAndreas Gohr                static fn(array $matches): string => sprintf("=%02X", ord($matches[0])),
161*73dc0a89SAndreas Gohr                $sLine
162*73dc0a89SAndreas Gohr            );
163*73dc0a89SAndreas Gohr
164*73dc0a89SAndreas Gohr            // encode x09,x20 at lineends
165*73dc0a89SAndreas Gohr            $iLength = strlen($sLine);
166*73dc0a89SAndreas Gohr            $iLastChar = ord($sLine[$iLength - 1]);
167*73dc0a89SAndreas Gohr
168*73dc0a89SAndreas Gohr            // imap_8_bit does not encode x20 at the very end of a text,
169*73dc0a89SAndreas Gohr            // here is, where I don't agree with imap_8_bit,
170*73dc0a89SAndreas Gohr            // please correct me, if I'm wrong,
171*73dc0a89SAndreas Gohr            // or comment next line for RFC2045 conformance, if you like
172*73dc0a89SAndreas Gohr            if (!($bEmulate_imap_8bit && ($i == count($aLines) - 1))) {
173*73dc0a89SAndreas Gohr                if (($iLastChar == 0x09) || ($iLastChar == 0x20)) {
174*73dc0a89SAndreas Gohr                    $sLine[$iLength - 1] = '=';
175*73dc0a89SAndreas Gohr                    $sLine .= ($iLastChar == 0x09) ? '09' : '20';
176*73dc0a89SAndreas Gohr                }
177*73dc0a89SAndreas Gohr            }
178*73dc0a89SAndreas Gohr
179*73dc0a89SAndreas Gohr            // imap_8bit encodes x20 before chr(13), too
180*73dc0a89SAndreas Gohr            // although IMHO not requested by RFC2045, why not do it safer :)
181*73dc0a89SAndreas Gohr            // and why not encode any x20 around chr(10) or chr(13)
182*73dc0a89SAndreas Gohr            if ($bEmulate_imap_8bit) {
183*73dc0a89SAndreas Gohr                $sLine = str_replace(' =0D', '=20=0D', $sLine);
184*73dc0a89SAndreas Gohr                //$sLine=str_replace(' =0A','=20=0A',$sLine);
185*73dc0a89SAndreas Gohr                //$sLine=str_replace('=0D ','=0D=20',$sLine);
186*73dc0a89SAndreas Gohr                //$sLine=str_replace('=0A ','=0A=20',$sLine);
187*73dc0a89SAndreas Gohr            }
188*73dc0a89SAndreas Gohr
189*73dc0a89SAndreas Gohr            // finally split into softlines no longer than $maxlen chars,
190*73dc0a89SAndreas Gohr            // for even more safeness one could encode x09,x20
191*73dc0a89SAndreas Gohr            // at the very first character of the line
192*73dc0a89SAndreas Gohr            // and after soft linebreaks, as well,
193*73dc0a89SAndreas Gohr            // but this wouldn't be caught by such an easy RegExp
194*73dc0a89SAndreas Gohr            if ($maxlen) {
195*73dc0a89SAndreas Gohr                preg_match_all('/.{1,' . ($maxlen - 2) . '}([^=]{0,2})?/', $sLine, $aMatch);
196*73dc0a89SAndreas Gohr                $sLine = implode('=' . MAILHEADER_EOL, $aMatch[0]); // add soft crlf's
197*73dc0a89SAndreas Gohr            }
198*73dc0a89SAndreas Gohr        }
199*73dc0a89SAndreas Gohr
200*73dc0a89SAndreas Gohr        // join lines into text
201*73dc0a89SAndreas Gohr        return implode(MAILHEADER_EOL, $aLines);
202*73dc0a89SAndreas Gohr    }
203*73dc0a89SAndreas Gohr
204*73dc0a89SAndreas Gohr    // endregion
205*73dc0a89SAndreas Gohr}
206