xref: /dokuwiki/inc/Utf8/PhpString.php (revision f41bbe4cad0871728891d9ffb45bd6fd79ab1024)
1*f41bbe4cSAndreas Gohr<?php
2*f41bbe4cSAndreas Gohr
3*f41bbe4cSAndreas Gohrnamespace dokuwiki\Utf8;
4*f41bbe4cSAndreas Gohr
5*f41bbe4cSAndreas Gohr/**
6*f41bbe4cSAndreas Gohr * UTF-8 aware equivalents to PHP's string functions
7*f41bbe4cSAndreas Gohr */
8*f41bbe4cSAndreas Gohrclass PhpString
9*f41bbe4cSAndreas Gohr{
10*f41bbe4cSAndreas Gohr
11*f41bbe4cSAndreas Gohr    /**
12*f41bbe4cSAndreas Gohr     * A locale independent basename() implementation
13*f41bbe4cSAndreas Gohr     *
14*f41bbe4cSAndreas Gohr     * works around a bug in PHP's basename() implementation
15*f41bbe4cSAndreas Gohr     *
16*f41bbe4cSAndreas Gohr     * @see basename()
17*f41bbe4cSAndreas Gohr     * @link   https://bugs.php.net/bug.php?id=37738
18*f41bbe4cSAndreas Gohr     *
19*f41bbe4cSAndreas Gohr     * @param string $path A path
20*f41bbe4cSAndreas Gohr     * @param string $suffix If the name component ends in suffix this will also be cut off
21*f41bbe4cSAndreas Gohr     * @return string
22*f41bbe4cSAndreas Gohr     */
23*f41bbe4cSAndreas Gohr    public static function basename($path, $suffix = '')
24*f41bbe4cSAndreas Gohr    {
25*f41bbe4cSAndreas Gohr        $path = trim($path, '\\/');
26*f41bbe4cSAndreas Gohr        $rpos = max(strrpos($path, '/'), strrpos($path, '\\'));
27*f41bbe4cSAndreas Gohr        if ($rpos) {
28*f41bbe4cSAndreas Gohr            $path = substr($path, $rpos + 1);
29*f41bbe4cSAndreas Gohr        }
30*f41bbe4cSAndreas Gohr
31*f41bbe4cSAndreas Gohr        $suflen = strlen($suffix);
32*f41bbe4cSAndreas Gohr        if ($suflen && (substr($path, -$suflen) === $suffix)) {
33*f41bbe4cSAndreas Gohr            $path = substr($path, 0, -$suflen);
34*f41bbe4cSAndreas Gohr        }
35*f41bbe4cSAndreas Gohr
36*f41bbe4cSAndreas Gohr        return $path;
37*f41bbe4cSAndreas Gohr    }
38*f41bbe4cSAndreas Gohr
39*f41bbe4cSAndreas Gohr    /**
40*f41bbe4cSAndreas Gohr     * Unicode aware replacement for strlen()
41*f41bbe4cSAndreas Gohr     *
42*f41bbe4cSAndreas Gohr     * utf8_decode() converts characters that are not in ISO-8859-1
43*f41bbe4cSAndreas Gohr     * to '?', which, for the purpose of counting, is alright - It's
44*f41bbe4cSAndreas Gohr     * even faster than mb_strlen.
45*f41bbe4cSAndreas Gohr     *
46*f41bbe4cSAndreas Gohr     * @author <chernyshevsky at hotmail dot com>
47*f41bbe4cSAndreas Gohr     * @see    strlen()
48*f41bbe4cSAndreas Gohr     * @see    utf8_decode()
49*f41bbe4cSAndreas Gohr     *
50*f41bbe4cSAndreas Gohr     * @param string $string
51*f41bbe4cSAndreas Gohr     * @return int
52*f41bbe4cSAndreas Gohr     */
53*f41bbe4cSAndreas Gohr    public static function strlen($string)
54*f41bbe4cSAndreas Gohr    {
55*f41bbe4cSAndreas Gohr        if (function_exists('utf8_decode')) {
56*f41bbe4cSAndreas Gohr            return strlen(utf8_decode($string));
57*f41bbe4cSAndreas Gohr        }
58*f41bbe4cSAndreas Gohr
59*f41bbe4cSAndreas Gohr        if (UTF8_MBSTRING) {
60*f41bbe4cSAndreas Gohr            return mb_strlen($string, 'UTF-8');
61*f41bbe4cSAndreas Gohr        }
62*f41bbe4cSAndreas Gohr
63*f41bbe4cSAndreas Gohr        if (function_exists('iconv_strlen')) {
64*f41bbe4cSAndreas Gohr            return iconv_strlen($string, 'UTF-8');
65*f41bbe4cSAndreas Gohr        }
66*f41bbe4cSAndreas Gohr
67*f41bbe4cSAndreas Gohr        return strlen($string);
68*f41bbe4cSAndreas Gohr    }
69*f41bbe4cSAndreas Gohr
70*f41bbe4cSAndreas Gohr    /**
71*f41bbe4cSAndreas Gohr     * UTF-8 aware alternative to substr
72*f41bbe4cSAndreas Gohr     *
73*f41bbe4cSAndreas Gohr     * Return part of a string given character offset (and optionally length)
74*f41bbe4cSAndreas Gohr     *
75*f41bbe4cSAndreas Gohr     * @author Harry Fuecks <hfuecks@gmail.com>
76*f41bbe4cSAndreas Gohr     * @author Chris Smith <chris@jalakai.co.uk>
77*f41bbe4cSAndreas Gohr     *
78*f41bbe4cSAndreas Gohr     * @param string $str
79*f41bbe4cSAndreas Gohr     * @param int $offset number of UTF-8 characters offset (from left)
80*f41bbe4cSAndreas Gohr     * @param int $length (optional) length in UTF-8 characters from offset
81*f41bbe4cSAndreas Gohr     * @return string
82*f41bbe4cSAndreas Gohr     */
83*f41bbe4cSAndreas Gohr    public static function substr($str, $offset, $length = null)
84*f41bbe4cSAndreas Gohr    {
85*f41bbe4cSAndreas Gohr        if (UTF8_MBSTRING) {
86*f41bbe4cSAndreas Gohr            if ($length === null) {
87*f41bbe4cSAndreas Gohr                return mb_substr($str, $offset);
88*f41bbe4cSAndreas Gohr            }
89*f41bbe4cSAndreas Gohr
90*f41bbe4cSAndreas Gohr            return mb_substr($str, $offset, $length);
91*f41bbe4cSAndreas Gohr        }
92*f41bbe4cSAndreas Gohr
93*f41bbe4cSAndreas Gohr        /*
94*f41bbe4cSAndreas Gohr         * Notes:
95*f41bbe4cSAndreas Gohr         *
96*f41bbe4cSAndreas Gohr         * no mb string support, so we'll use pcre regex's with 'u' flag
97*f41bbe4cSAndreas Gohr         * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
98*f41bbe4cSAndreas Gohr         * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
99*f41bbe4cSAndreas Gohr         *
100*f41bbe4cSAndreas Gohr         * substr documentation states false can be returned in some cases (e.g. offset > string length)
101*f41bbe4cSAndreas Gohr         * mb_substr never returns false, it will return an empty string instead.
102*f41bbe4cSAndreas Gohr         *
103*f41bbe4cSAndreas Gohr         * calculating the number of characters in the string is a relatively expensive operation, so
104*f41bbe4cSAndreas Gohr         * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
105*f41bbe4cSAndreas Gohr         */
106*f41bbe4cSAndreas Gohr
107*f41bbe4cSAndreas Gohr        // cast parameters to appropriate types to avoid multiple notices/warnings
108*f41bbe4cSAndreas Gohr        $str = (string)$str;                          // generates E_NOTICE for PHP4 objects, but not PHP5 objects
109*f41bbe4cSAndreas Gohr        $offset = (int)$offset;
110*f41bbe4cSAndreas Gohr        if ($length !== null) $length = (int)$length;
111*f41bbe4cSAndreas Gohr
112*f41bbe4cSAndreas Gohr        // handle trivial cases
113*f41bbe4cSAndreas Gohr        if ($length === 0) return '';
114*f41bbe4cSAndreas Gohr        if ($offset < 0 && $length < 0 && $length < $offset) return '';
115*f41bbe4cSAndreas Gohr
116*f41bbe4cSAndreas Gohr        $offset_pattern = '';
117*f41bbe4cSAndreas Gohr        $length_pattern = '';
118*f41bbe4cSAndreas Gohr
119*f41bbe4cSAndreas Gohr        // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
120*f41bbe4cSAndreas Gohr        if ($offset < 0) {
121*f41bbe4cSAndreas Gohr            $strlen = self::strlen($str);        // see notes
122*f41bbe4cSAndreas Gohr            $offset = $strlen + $offset;
123*f41bbe4cSAndreas Gohr            if ($offset < 0) $offset = 0;
124*f41bbe4cSAndreas Gohr        }
125*f41bbe4cSAndreas Gohr
126*f41bbe4cSAndreas Gohr        // establish a pattern for offset, a non-captured group equal in length to offset
127*f41bbe4cSAndreas Gohr        if ($offset > 0) {
128*f41bbe4cSAndreas Gohr            $Ox = (int)($offset / 65535);
129*f41bbe4cSAndreas Gohr            $Oy = $offset % 65535;
130*f41bbe4cSAndreas Gohr
131*f41bbe4cSAndreas Gohr            if ($Ox) $offset_pattern = '(?:.{65535}){' . $Ox . '}';
132*f41bbe4cSAndreas Gohr            $offset_pattern = '^(?:' . $offset_pattern . '.{' . $Oy . '})';
133*f41bbe4cSAndreas Gohr        } else {
134*f41bbe4cSAndreas Gohr            $offset_pattern = '^';                      // offset == 0; just anchor the pattern
135*f41bbe4cSAndreas Gohr        }
136*f41bbe4cSAndreas Gohr
137*f41bbe4cSAndreas Gohr        // establish a pattern for length
138*f41bbe4cSAndreas Gohr        if ($length === null) {
139*f41bbe4cSAndreas Gohr            $length_pattern = '(.*)$';                  // the rest of the string
140*f41bbe4cSAndreas Gohr        } else {
141*f41bbe4cSAndreas Gohr
142*f41bbe4cSAndreas Gohr            if (!isset($strlen)) $strlen = self::strlen($str);    // see notes
143*f41bbe4cSAndreas Gohr            if ($offset > $strlen) return '';           // another trivial case
144*f41bbe4cSAndreas Gohr
145*f41bbe4cSAndreas Gohr            if ($length > 0) {
146*f41bbe4cSAndreas Gohr
147*f41bbe4cSAndreas Gohr                $length = min($strlen - $offset, $length);  // reduce any length that would go past the end of the string
148*f41bbe4cSAndreas Gohr
149*f41bbe4cSAndreas Gohr                $Lx = (int)($length / 65535);
150*f41bbe4cSAndreas Gohr                $Ly = $length % 65535;
151*f41bbe4cSAndreas Gohr
152*f41bbe4cSAndreas Gohr                // +ve length requires ... a captured group of length characters
153*f41bbe4cSAndreas Gohr                if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
154*f41bbe4cSAndreas Gohr                $length_pattern = '(' . $length_pattern . '.{' . $Ly . '})';
155*f41bbe4cSAndreas Gohr
156*f41bbe4cSAndreas Gohr            } else if ($length < 0) {
157*f41bbe4cSAndreas Gohr
158*f41bbe4cSAndreas Gohr                if ($length < ($offset - $strlen)) return '';
159*f41bbe4cSAndreas Gohr
160*f41bbe4cSAndreas Gohr                $Lx = (int)((-$length) / 65535);
161*f41bbe4cSAndreas Gohr                $Ly = (-$length) % 65535;
162*f41bbe4cSAndreas Gohr
163*f41bbe4cSAndreas Gohr                // -ve length requires ... capture everything except a group of -length characters
164*f41bbe4cSAndreas Gohr                //                         anchored at the tail-end of the string
165*f41bbe4cSAndreas Gohr                if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
166*f41bbe4cSAndreas Gohr                $length_pattern = '(.*)(?:' . $length_pattern . '.{' . $Ly . '})$';
167*f41bbe4cSAndreas Gohr            }
168*f41bbe4cSAndreas Gohr        }
169*f41bbe4cSAndreas Gohr
170*f41bbe4cSAndreas Gohr        if (!preg_match('#' . $offset_pattern . $length_pattern . '#us', $str, $match)) return '';
171*f41bbe4cSAndreas Gohr        return $match[1];
172*f41bbe4cSAndreas Gohr    }
173*f41bbe4cSAndreas Gohr
174*f41bbe4cSAndreas Gohr    /**
175*f41bbe4cSAndreas Gohr     * Unicode aware replacement for substr_replace()
176*f41bbe4cSAndreas Gohr     *
177*f41bbe4cSAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
178*f41bbe4cSAndreas Gohr     * @see    substr_replace()
179*f41bbe4cSAndreas Gohr     *
180*f41bbe4cSAndreas Gohr     * @param string $string input string
181*f41bbe4cSAndreas Gohr     * @param string $replacement the replacement
182*f41bbe4cSAndreas Gohr     * @param int $start the replacing will begin at the start'th offset into string.
183*f41bbe4cSAndreas Gohr     * @param int $length If given and is positive, it represents the length of the portion of string which is
184*f41bbe4cSAndreas Gohr     *                            to be replaced. If length is zero then this function will have the effect of inserting
185*f41bbe4cSAndreas Gohr     *                            replacement into string at the given start offset.
186*f41bbe4cSAndreas Gohr     * @return string
187*f41bbe4cSAndreas Gohr     */
188*f41bbe4cSAndreas Gohr    public static function substr_replace($string, $replacement, $start, $length = 0)
189*f41bbe4cSAndreas Gohr    {
190*f41bbe4cSAndreas Gohr        $ret = '';
191*f41bbe4cSAndreas Gohr        if ($start > 0) $ret .= self::substr($string, 0, $start);
192*f41bbe4cSAndreas Gohr        $ret .= $replacement;
193*f41bbe4cSAndreas Gohr        $ret .= self::substr($string, $start + $length);
194*f41bbe4cSAndreas Gohr        return $ret;
195*f41bbe4cSAndreas Gohr    }
196*f41bbe4cSAndreas Gohr
197*f41bbe4cSAndreas Gohr    /**
198*f41bbe4cSAndreas Gohr     * Unicode aware replacement for ltrim()
199*f41bbe4cSAndreas Gohr     *
200*f41bbe4cSAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
201*f41bbe4cSAndreas Gohr     * @see    ltrim()
202*f41bbe4cSAndreas Gohr     *
203*f41bbe4cSAndreas Gohr     * @param  string $str
204*f41bbe4cSAndreas Gohr     * @param  string $charlist
205*f41bbe4cSAndreas Gohr     * @return string
206*f41bbe4cSAndreas Gohr     */
207*f41bbe4cSAndreas Gohr    public static function ltrim($str, $charlist = '')
208*f41bbe4cSAndreas Gohr    {
209*f41bbe4cSAndreas Gohr        if ($charlist === '') return ltrim($str);
210*f41bbe4cSAndreas Gohr
211*f41bbe4cSAndreas Gohr        //quote charlist for use in a characterclass
212*f41bbe4cSAndreas Gohr        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
213*f41bbe4cSAndreas Gohr
214*f41bbe4cSAndreas Gohr        return preg_replace('/^[' . $charlist . ']+/u', '', $str);
215*f41bbe4cSAndreas Gohr    }
216*f41bbe4cSAndreas Gohr
217*f41bbe4cSAndreas Gohr    /**
218*f41bbe4cSAndreas Gohr     * Unicode aware replacement for rtrim()
219*f41bbe4cSAndreas Gohr     *
220*f41bbe4cSAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
221*f41bbe4cSAndreas Gohr     * @see    rtrim()
222*f41bbe4cSAndreas Gohr     *
223*f41bbe4cSAndreas Gohr     * @param  string $str
224*f41bbe4cSAndreas Gohr     * @param  string $charlist
225*f41bbe4cSAndreas Gohr     * @return string
226*f41bbe4cSAndreas Gohr     */
227*f41bbe4cSAndreas Gohr    public static function rtrim($str, $charlist = '')
228*f41bbe4cSAndreas Gohr    {
229*f41bbe4cSAndreas Gohr        if ($charlist === '') return rtrim($str);
230*f41bbe4cSAndreas Gohr
231*f41bbe4cSAndreas Gohr        //quote charlist for use in a characterclass
232*f41bbe4cSAndreas Gohr        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
233*f41bbe4cSAndreas Gohr
234*f41bbe4cSAndreas Gohr        return preg_replace('/[' . $charlist . ']+$/u', '', $str);
235*f41bbe4cSAndreas Gohr    }
236*f41bbe4cSAndreas Gohr
237*f41bbe4cSAndreas Gohr    /**
238*f41bbe4cSAndreas Gohr     * Unicode aware replacement for trim()
239*f41bbe4cSAndreas Gohr     *
240*f41bbe4cSAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
241*f41bbe4cSAndreas Gohr     * @see    trim()
242*f41bbe4cSAndreas Gohr     *
243*f41bbe4cSAndreas Gohr     * @param  string $str
244*f41bbe4cSAndreas Gohr     * @param  string $charlist
245*f41bbe4cSAndreas Gohr     * @return string
246*f41bbe4cSAndreas Gohr     */
247*f41bbe4cSAndreas Gohr    public static function trim($str, $charlist = '')
248*f41bbe4cSAndreas Gohr    {
249*f41bbe4cSAndreas Gohr        if ($charlist === '') return trim($str);
250*f41bbe4cSAndreas Gohr
251*f41bbe4cSAndreas Gohr        return self::ltrim(self::rtrim($str, $charlist), $charlist);
252*f41bbe4cSAndreas Gohr    }
253*f41bbe4cSAndreas Gohr
254*f41bbe4cSAndreas Gohr    /**
255*f41bbe4cSAndreas Gohr     * This is a unicode aware replacement for strtolower()
256*f41bbe4cSAndreas Gohr     *
257*f41bbe4cSAndreas Gohr     * Uses mb_string extension if available
258*f41bbe4cSAndreas Gohr     *
259*f41bbe4cSAndreas Gohr     * @author Leo Feyer <leo@typolight.org>
260*f41bbe4cSAndreas Gohr     * @see    strtolower()
261*f41bbe4cSAndreas Gohr     * @see    utf8_strtoupper()
262*f41bbe4cSAndreas Gohr     *
263*f41bbe4cSAndreas Gohr     * @param string $string
264*f41bbe4cSAndreas Gohr     * @return string
265*f41bbe4cSAndreas Gohr     */
266*f41bbe4cSAndreas Gohr    public static function strtolower($string)
267*f41bbe4cSAndreas Gohr    {
268*f41bbe4cSAndreas Gohr        if (UTF8_MBSTRING) {
269*f41bbe4cSAndreas Gohr            if (class_exists('Normalizer', $autoload = false)) {
270*f41bbe4cSAndreas Gohr                return \Normalizer::normalize(mb_strtolower($string, 'utf-8'));
271*f41bbe4cSAndreas Gohr            }
272*f41bbe4cSAndreas Gohr            return (mb_strtolower($string, 'utf-8'));
273*f41bbe4cSAndreas Gohr        }
274*f41bbe4cSAndreas Gohr        return strtr($string, Table::upperCaseToLowerCase());
275*f41bbe4cSAndreas Gohr    }
276*f41bbe4cSAndreas Gohr
277*f41bbe4cSAndreas Gohr    /**
278*f41bbe4cSAndreas Gohr     * This is a unicode aware replacement for strtoupper()
279*f41bbe4cSAndreas Gohr     *
280*f41bbe4cSAndreas Gohr     * Uses mb_string extension if available
281*f41bbe4cSAndreas Gohr     *
282*f41bbe4cSAndreas Gohr     * @author Leo Feyer <leo@typolight.org>
283*f41bbe4cSAndreas Gohr     * @see    strtoupper()
284*f41bbe4cSAndreas Gohr     * @see    utf8_strtoupper()
285*f41bbe4cSAndreas Gohr     *
286*f41bbe4cSAndreas Gohr     * @param string $string
287*f41bbe4cSAndreas Gohr     * @return string
288*f41bbe4cSAndreas Gohr     */
289*f41bbe4cSAndreas Gohr    public static function strtoupper($string)
290*f41bbe4cSAndreas Gohr    {
291*f41bbe4cSAndreas Gohr        if (UTF8_MBSTRING) return mb_strtoupper($string, 'utf-8');
292*f41bbe4cSAndreas Gohr
293*f41bbe4cSAndreas Gohr        return strtr($string, Table::lowerCaseToUpperCase());
294*f41bbe4cSAndreas Gohr    }
295*f41bbe4cSAndreas Gohr
296*f41bbe4cSAndreas Gohr
297*f41bbe4cSAndreas Gohr    /**
298*f41bbe4cSAndreas Gohr     * UTF-8 aware alternative to ucfirst
299*f41bbe4cSAndreas Gohr     * Make a string's first character uppercase
300*f41bbe4cSAndreas Gohr     *
301*f41bbe4cSAndreas Gohr     * @author Harry Fuecks
302*f41bbe4cSAndreas Gohr     *
303*f41bbe4cSAndreas Gohr     * @param string $str
304*f41bbe4cSAndreas Gohr     * @return string with first character as upper case (if applicable)
305*f41bbe4cSAndreas Gohr     */
306*f41bbe4cSAndreas Gohr    public static function ucfirst($str)
307*f41bbe4cSAndreas Gohr    {
308*f41bbe4cSAndreas Gohr        switch (self::strlen($str)) {
309*f41bbe4cSAndreas Gohr            case 0:
310*f41bbe4cSAndreas Gohr                return '';
311*f41bbe4cSAndreas Gohr            case 1:
312*f41bbe4cSAndreas Gohr                return self::strtoupper($str);
313*f41bbe4cSAndreas Gohr            default:
314*f41bbe4cSAndreas Gohr                preg_match('/^(.{1})(.*)$/us', $str, $matches);
315*f41bbe4cSAndreas Gohr                return self::strtoupper($matches[1]) . $matches[2];
316*f41bbe4cSAndreas Gohr        }
317*f41bbe4cSAndreas Gohr    }
318*f41bbe4cSAndreas Gohr
319*f41bbe4cSAndreas Gohr    /**
320*f41bbe4cSAndreas Gohr     * UTF-8 aware alternative to ucwords
321*f41bbe4cSAndreas Gohr     * Uppercase the first character of each word in a string
322*f41bbe4cSAndreas Gohr     *
323*f41bbe4cSAndreas Gohr     * @author Harry Fuecks
324*f41bbe4cSAndreas Gohr     * @see http://php.net/ucwords
325*f41bbe4cSAndreas Gohr     *
326*f41bbe4cSAndreas Gohr     * @param string $str
327*f41bbe4cSAndreas Gohr     * @return string with first char of each word uppercase
328*f41bbe4cSAndreas Gohr     */
329*f41bbe4cSAndreas Gohr    public static function ucwords($str)
330*f41bbe4cSAndreas Gohr    {
331*f41bbe4cSAndreas Gohr        // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
332*f41bbe4cSAndreas Gohr        // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
333*f41bbe4cSAndreas Gohr        // This corresponds to the definition of a "word" defined at http://php.net/ucwords
334*f41bbe4cSAndreas Gohr        $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
335*f41bbe4cSAndreas Gohr
336*f41bbe4cSAndreas Gohr        return preg_replace_callback(
337*f41bbe4cSAndreas Gohr            $pattern,
338*f41bbe4cSAndreas Gohr            function ($matches) {
339*f41bbe4cSAndreas Gohr                $leadingws = $matches[2];
340*f41bbe4cSAndreas Gohr                $ucfirst = self::strtoupper($matches[3]);
341*f41bbe4cSAndreas Gohr                $ucword = self::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1);
342*f41bbe4cSAndreas Gohr                return $leadingws . $ucword;
343*f41bbe4cSAndreas Gohr            },
344*f41bbe4cSAndreas Gohr            $str
345*f41bbe4cSAndreas Gohr        );
346*f41bbe4cSAndreas Gohr    }
347*f41bbe4cSAndreas Gohr
348*f41bbe4cSAndreas Gohr    /**
349*f41bbe4cSAndreas Gohr     * This is an Unicode aware replacement for strpos
350*f41bbe4cSAndreas Gohr     *
351*f41bbe4cSAndreas Gohr     * @author Leo Feyer <leo@typolight.org>
352*f41bbe4cSAndreas Gohr     * @see    strpos()
353*f41bbe4cSAndreas Gohr     *
354*f41bbe4cSAndreas Gohr     * @param  string $haystack
355*f41bbe4cSAndreas Gohr     * @param  string $needle
356*f41bbe4cSAndreas Gohr     * @param  integer $offset
357*f41bbe4cSAndreas Gohr     * @return integer
358*f41bbe4cSAndreas Gohr     */
359*f41bbe4cSAndreas Gohr    public static function strpos($haystack, $needle, $offset = 0)
360*f41bbe4cSAndreas Gohr    {
361*f41bbe4cSAndreas Gohr        $comp = 0;
362*f41bbe4cSAndreas Gohr        $length = null;
363*f41bbe4cSAndreas Gohr
364*f41bbe4cSAndreas Gohr        while ($length === null || $length < $offset) {
365*f41bbe4cSAndreas Gohr            $pos = strpos($haystack, $needle, $offset + $comp);
366*f41bbe4cSAndreas Gohr
367*f41bbe4cSAndreas Gohr            if ($pos === false)
368*f41bbe4cSAndreas Gohr                return false;
369*f41bbe4cSAndreas Gohr
370*f41bbe4cSAndreas Gohr            $length = self::strlen(substr($haystack, 0, $pos));
371*f41bbe4cSAndreas Gohr
372*f41bbe4cSAndreas Gohr            if ($length < $offset)
373*f41bbe4cSAndreas Gohr                $comp = $pos - $length;
374*f41bbe4cSAndreas Gohr        }
375*f41bbe4cSAndreas Gohr
376*f41bbe4cSAndreas Gohr        return $length;
377*f41bbe4cSAndreas Gohr    }
378*f41bbe4cSAndreas Gohr
379*f41bbe4cSAndreas Gohr
380*f41bbe4cSAndreas Gohr}
381