xref: /dokuwiki/inc/MailUtils.php (revision 73dc0a8919857718a3b64a4c0741b57580a34b2a)
1<?php
2
3namespace dokuwiki;
4
5use dokuwiki\Utf8\Conversion;
6
7/**
8 * Stateless email-address utilities: obfuscation, validation, and quoted-printable body encoding.
9 */
10class MailUtils
11{
12    /**
13     * RFC 2822 atext characters (paras 3.4.1 & 3.2.4).
14     *
15     * NOTE: the unquoted '/' must remain unquoted to be usable as part of a
16     * Lexer pattern; pick the surrounding pattern delimiters with care.
17     */
18    public const RFC2822_ATEXT = "0-9a-zA-Z!#$%&'*+/=?^_`{|}~-";
19
20    /**
21     * Pattern for use in email detection and validation.
22     *
23     * Uses non-capturing groups since the parser does not allow captures.
24     */
25    public const PREG_PATTERN_VALID_EMAIL =
26        '[' . self::RFC2822_ATEXT . ']+(?:\.[' . self::RFC2822_ATEXT . ']+)*'
27        . '@(?i:[0-9a-z][0-9a-z-]*\.)+(?i:[a-z]{2,63})';
28
29    // region email-address obfuscation
30
31    /**
32     * Return an obfuscated email address suitable for HTML text content
33     * (link labels, titles).
34     *
35     * The caller MUST pass a raw, unescaped string; the result is
36     * HTML-text-safe. Any query string after the first '?' is preserved
37     * verbatim and is never run through the [at]/[dot]/[dash] substitution,
38     * so dots and dashes inside body/subject values stay intact.
39     *
40     * @param string $email raw email address, optionally followed by ?query
41     * @return string HTML-text-safe representation
42     */
43    public static function obfuscate(string $email): string
44    {
45        global $conf;
46
47        [$addr, $query] = sexplode('?', $email, 2);
48        $out = self::obfuscateAddress($addr);
49        // 'hex' output is already pure ASCII numeric entities → HTML-safe.
50        // For 'none'/'visible' the address half still needs HTML escaping.
51        if ($conf['mailguard'] !== 'hex') {
52            $out = htmlspecialchars($out, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
53        }
54        if ($query !== null) {
55            $out .= '?' . htmlspecialchars($query, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
56        }
57        return $out;
58    }
59
60    /**
61     * Return an obfuscated email address suitable for use as a mailto: href
62     * value (HTML attribute context).
63     *
64     * Like obfuscate() but for HTML attribute context. The caller MUST pass a
65     * raw, unescaped string. The address half is obfuscated per the mailguard
66     * setting; in 'visible' mode the address (with its [at]/[dot] spaces) is
67     * percent-encoded so the URL is well-formed. The query string is
68     * preserved verbatim with only HTML-attribute escaping applied, so mail
69     * clients receive correct subject/body separators.
70     *
71     * @param string $email raw email address, optionally followed by ?query
72     * @return string HTML-attribute-safe URL fragment (without 'mailto:' prefix)
73     */
74    public static function obfuscateUrl(string $email): string
75    {
76        global $conf;
77
78        [$addr, $query] = sexplode('?', $email, 2);
79        $addr = self::obfuscateAddress($addr);
80        if ($conf['mailguard'] === 'visible') {
81            $addr = rawurlencode($addr);
82        }
83        if ($conf['mailguard'] !== 'hex') {
84            $addr = htmlspecialchars($addr, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
85        }
86        if ($query !== null) {
87            $addr .= '?' . htmlspecialchars($query, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
88        }
89        return $addr;
90    }
91
92    /**
93     * Apply the configured mailguard mode to the address half of a mailto
94     * target. Returns hex-mode output as numeric entities (HTML-safe);
95     * visible/none modes return raw text that still needs HTML escaping.
96     *
97     * @param string $addr raw local@domain
98     * @return string
99     */
100    protected static function obfuscateAddress(string $addr): string
101    {
102        global $conf;
103
104        return match ($conf['mailguard']) {
105            'visible' => strtr($addr, ['@' => ' [at] ', '.' => ' [dot] ', '-' => ' [dash] ']),
106            'hex' => Conversion::toHtml($addr, true),
107            default => $addr,
108        };
109    }
110
111    // endregion
112    // region outgoing-mail helpers
113
114    /**
115     * Check if a given mail address is valid.
116     *
117     * @param string $email the address to check
118     * @return bool true if address is valid
119     */
120    public static function isValid(string $email): bool
121    {
122        return \EmailAddressValidator::checkEmailAddress($email, true);
123    }
124
125    /**
126     * RFC 2045 quoted-printable encoding.
127     *
128     * @param string $sText
129     * @param int $maxlen
130     * @param bool $bEmulate_imap_8bit
131     * @return string
132     * @author umu <umuAThrz.tu-chemnitz.de>
133     * @link   http://php.net/manual/en/function.imap-8bit.php#61216
134     *
135     */
136    public static function quotedPrintableEncode(
137        string $sText,
138        int    $maxlen = 74,
139        bool   $bEmulate_imap_8bit = true
140    ): string
141    {
142        // split text into lines
143        $aLines = preg_split("/(?:\r\n|\r|\n)/", $sText);
144        $cnt = count($aLines);
145
146        for ($i = 0; $i < $cnt; $i++) {
147            $sLine =& $aLines[$i];
148            if ($sLine === '') continue; // do nothing, if empty
149
150            $sRegExp = '/[^\x09\x20\x21-\x3C\x3E-\x7E]/e';
151
152            // imap_8bit encodes x09 everywhere, not only at lineends,
153            // for EBCDIC safeness encode !"#$@[\]^`{|}~,
154            // for complete safeness encode every character :)
155            if ($bEmulate_imap_8bit)
156                $sRegExp = '/[^\x20\x21-\x3C\x3E-\x7E]/';
157
158            $sLine = preg_replace_callback(
159                $sRegExp,
160                static fn(array $matches): string => sprintf("=%02X", ord($matches[0])),
161                $sLine
162            );
163
164            // encode x09,x20 at lineends
165            $iLength = strlen($sLine);
166            $iLastChar = ord($sLine[$iLength - 1]);
167
168            // imap_8_bit does not encode x20 at the very end of a text,
169            // here is, where I don't agree with imap_8_bit,
170            // please correct me, if I'm wrong,
171            // or comment next line for RFC2045 conformance, if you like
172            if (!($bEmulate_imap_8bit && ($i == count($aLines) - 1))) {
173                if (($iLastChar == 0x09) || ($iLastChar == 0x20)) {
174                    $sLine[$iLength - 1] = '=';
175                    $sLine .= ($iLastChar == 0x09) ? '09' : '20';
176                }
177            }
178
179            // imap_8bit encodes x20 before chr(13), too
180            // although IMHO not requested by RFC2045, why not do it safer :)
181            // and why not encode any x20 around chr(10) or chr(13)
182            if ($bEmulate_imap_8bit) {
183                $sLine = str_replace(' =0D', '=20=0D', $sLine);
184                //$sLine=str_replace(' =0A','=20=0A',$sLine);
185                //$sLine=str_replace('=0D ','=0D=20',$sLine);
186                //$sLine=str_replace('=0A ','=0A=20',$sLine);
187            }
188
189            // finally split into softlines no longer than $maxlen chars,
190            // for even more safeness one could encode x09,x20
191            // at the very first character of the line
192            // and after soft linebreaks, as well,
193            // but this wouldn't be caught by such an easy RegExp
194            if ($maxlen) {
195                preg_match_all('/.{1,' . ($maxlen - 2) . '}([^=]{0,2})?/', $sLine, $aMatch);
196                $sLine = implode('=' . MAILHEADER_EOL, $aMatch[0]); // add soft crlf's
197            }
198        }
199
200        // join lines into text
201        return implode(MAILHEADER_EOL, $aLines);
202    }
203
204    // endregion
205}
206