xref: /dokuwiki/inc/Utf8/PhpString.php (revision 8a9a2e3d00104aa7fb95b603d8baaee8401379ae)
1f41bbe4cSAndreas Gohr<?php
2f41bbe4cSAndreas Gohr
3f41bbe4cSAndreas Gohrnamespace dokuwiki\Utf8;
4f41bbe4cSAndreas Gohr
5f41bbe4cSAndreas Gohr/**
6f41bbe4cSAndreas Gohr * UTF-8 aware equivalents to PHP's string functions
7f41bbe4cSAndreas Gohr */
8f41bbe4cSAndreas Gohrclass PhpString
9f41bbe4cSAndreas Gohr{
10f41bbe4cSAndreas Gohr
11f41bbe4cSAndreas Gohr    /**
12f41bbe4cSAndreas Gohr     * A locale independent basename() implementation
13f41bbe4cSAndreas Gohr     *
14f41bbe4cSAndreas Gohr     * works around a bug in PHP's basename() implementation
15f41bbe4cSAndreas Gohr     *
16f41bbe4cSAndreas Gohr     * @param string $path A path
17f41bbe4cSAndreas Gohr     * @param string $suffix If the name component ends in suffix this will also be cut off
18f41bbe4cSAndreas Gohr     * @return string
19ffdb5936SAndreas Gohr     * @link   https://bugs.php.net/bug.php?id=37738
20ffdb5936SAndreas Gohr     *
21ffdb5936SAndreas Gohr     * @see basename()
22f41bbe4cSAndreas Gohr     */
23f41bbe4cSAndreas Gohr    public static function basename($path, $suffix = '')
24f41bbe4cSAndreas Gohr    {
25f41bbe4cSAndreas Gohr        $path = trim($path, '\\/');
26f41bbe4cSAndreas Gohr        $rpos = max(strrpos($path, '/'), strrpos($path, '\\'));
27f41bbe4cSAndreas Gohr        if ($rpos) {
28f41bbe4cSAndreas Gohr            $path = substr($path, $rpos + 1);
29f41bbe4cSAndreas Gohr        }
30f41bbe4cSAndreas Gohr
31f41bbe4cSAndreas Gohr        $suflen = strlen($suffix);
32f41bbe4cSAndreas Gohr        if ($suflen && (substr($path, -$suflen) === $suffix)) {
33f41bbe4cSAndreas Gohr            $path = substr($path, 0, -$suflen);
34f41bbe4cSAndreas Gohr        }
35f41bbe4cSAndreas Gohr
36f41bbe4cSAndreas Gohr        return $path;
37f41bbe4cSAndreas Gohr    }
38f41bbe4cSAndreas Gohr
39f41bbe4cSAndreas Gohr    /**
40f41bbe4cSAndreas Gohr     * Unicode aware replacement for strlen()
41f41bbe4cSAndreas Gohr     *
42f41bbe4cSAndreas Gohr     * utf8_decode() converts characters that are not in ISO-8859-1
43f41bbe4cSAndreas Gohr     * to '?', which, for the purpose of counting, is alright - It's
44f41bbe4cSAndreas Gohr     * even faster than mb_strlen.
45f41bbe4cSAndreas Gohr     *
46f41bbe4cSAndreas Gohr     * @param string $string
47f41bbe4cSAndreas Gohr     * @return int
48ffdb5936SAndreas Gohr     * @see    utf8_decode()
49ffdb5936SAndreas Gohr     *
50ffdb5936SAndreas Gohr     * @author <chernyshevsky at hotmail dot com>
51ffdb5936SAndreas Gohr     * @see    strlen()
52f41bbe4cSAndreas Gohr     */
53f41bbe4cSAndreas Gohr    public static function strlen($string)
54f41bbe4cSAndreas Gohr    {
55f41bbe4cSAndreas Gohr        if (function_exists('utf8_decode')) {
56f41bbe4cSAndreas Gohr            return strlen(utf8_decode($string));
57f41bbe4cSAndreas Gohr        }
58f41bbe4cSAndreas Gohr
59f41bbe4cSAndreas Gohr        if (UTF8_MBSTRING) {
60f41bbe4cSAndreas Gohr            return mb_strlen($string, 'UTF-8');
61f41bbe4cSAndreas Gohr        }
62f41bbe4cSAndreas Gohr
63f41bbe4cSAndreas Gohr        if (function_exists('iconv_strlen')) {
64f41bbe4cSAndreas Gohr            return iconv_strlen($string, 'UTF-8');
65f41bbe4cSAndreas Gohr        }
66f41bbe4cSAndreas Gohr
67f41bbe4cSAndreas Gohr        return strlen($string);
68f41bbe4cSAndreas Gohr    }
69f41bbe4cSAndreas Gohr
70f41bbe4cSAndreas Gohr    /**
71f41bbe4cSAndreas Gohr     * UTF-8 aware alternative to substr
72f41bbe4cSAndreas Gohr     *
73f41bbe4cSAndreas Gohr     * Return part of a string given character offset (and optionally length)
74f41bbe4cSAndreas Gohr     *
75f41bbe4cSAndreas Gohr     * @param string $str
76f41bbe4cSAndreas Gohr     * @param int $offset number of UTF-8 characters offset (from left)
77f41bbe4cSAndreas Gohr     * @param int $length (optional) length in UTF-8 characters from offset
78f41bbe4cSAndreas Gohr     * @return string
79ffdb5936SAndreas Gohr     * @author Harry Fuecks <hfuecks@gmail.com>
80ffdb5936SAndreas Gohr     * @author Chris Smith <chris@jalakai.co.uk>
81ffdb5936SAndreas Gohr     *
82f41bbe4cSAndreas Gohr     */
83f41bbe4cSAndreas Gohr    public static function substr($str, $offset, $length = null)
84f41bbe4cSAndreas Gohr    {
85f41bbe4cSAndreas Gohr        if (UTF8_MBSTRING) {
86f41bbe4cSAndreas Gohr            if ($length === null) {
87f41bbe4cSAndreas Gohr                return mb_substr($str, $offset);
88f41bbe4cSAndreas Gohr            }
89f41bbe4cSAndreas Gohr
90f41bbe4cSAndreas Gohr            return mb_substr($str, $offset, $length);
91f41bbe4cSAndreas Gohr        }
92f41bbe4cSAndreas Gohr
93f41bbe4cSAndreas Gohr        /*
94f41bbe4cSAndreas Gohr         * Notes:
95f41bbe4cSAndreas Gohr         *
96f41bbe4cSAndreas Gohr         * no mb string support, so we'll use pcre regex's with 'u' flag
97f41bbe4cSAndreas Gohr         * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
98f41bbe4cSAndreas Gohr         * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
99f41bbe4cSAndreas Gohr         *
100f41bbe4cSAndreas Gohr         * substr documentation states false can be returned in some cases (e.g. offset > string length)
101f41bbe4cSAndreas Gohr         * mb_substr never returns false, it will return an empty string instead.
102f41bbe4cSAndreas Gohr         *
103f41bbe4cSAndreas Gohr         * calculating the number of characters in the string is a relatively expensive operation, so
104f41bbe4cSAndreas Gohr         * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
105f41bbe4cSAndreas Gohr         */
106f41bbe4cSAndreas Gohr
107f41bbe4cSAndreas Gohr        // cast parameters to appropriate types to avoid multiple notices/warnings
108f41bbe4cSAndreas Gohr        $str = (string)$str;                          // generates E_NOTICE for PHP4 objects, but not PHP5 objects
109f41bbe4cSAndreas Gohr        $offset = (int)$offset;
110f41bbe4cSAndreas Gohr        if ($length !== null) $length = (int)$length;
111f41bbe4cSAndreas Gohr
112f41bbe4cSAndreas Gohr        // handle trivial cases
113f41bbe4cSAndreas Gohr        if ($length === 0) return '';
114f41bbe4cSAndreas Gohr        if ($offset < 0 && $length < 0 && $length < $offset) return '';
115f41bbe4cSAndreas Gohr
116f41bbe4cSAndreas Gohr        $offset_pattern = '';
117f41bbe4cSAndreas Gohr        $length_pattern = '';
118f41bbe4cSAndreas Gohr
119f41bbe4cSAndreas Gohr        // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
120f41bbe4cSAndreas Gohr        if ($offset < 0) {
121f41bbe4cSAndreas Gohr            $strlen = self::strlen($str);        // see notes
122f41bbe4cSAndreas Gohr            $offset = $strlen + $offset;
123f41bbe4cSAndreas Gohr            if ($offset < 0) $offset = 0;
124f41bbe4cSAndreas Gohr        }
125f41bbe4cSAndreas Gohr
126f41bbe4cSAndreas Gohr        // establish a pattern for offset, a non-captured group equal in length to offset
127f41bbe4cSAndreas Gohr        if ($offset > 0) {
128f41bbe4cSAndreas Gohr            $Ox = (int)($offset / 65535);
129f41bbe4cSAndreas Gohr            $Oy = $offset % 65535;
130f41bbe4cSAndreas Gohr
131f41bbe4cSAndreas Gohr            if ($Ox) $offset_pattern = '(?:.{65535}){' . $Ox . '}';
132f41bbe4cSAndreas Gohr            $offset_pattern = '^(?:' . $offset_pattern . '.{' . $Oy . '})';
133f41bbe4cSAndreas Gohr        } else {
134f41bbe4cSAndreas Gohr            $offset_pattern = '^';                      // offset == 0; just anchor the pattern
135f41bbe4cSAndreas Gohr        }
136f41bbe4cSAndreas Gohr
137f41bbe4cSAndreas Gohr        // establish a pattern for length
138f41bbe4cSAndreas Gohr        if ($length === null) {
139f41bbe4cSAndreas Gohr            $length_pattern = '(.*)$';                  // the rest of the string
140f41bbe4cSAndreas Gohr        } else {
141f41bbe4cSAndreas Gohr
142f41bbe4cSAndreas Gohr            if (!isset($strlen)) $strlen = self::strlen($str);    // see notes
143f41bbe4cSAndreas Gohr            if ($offset > $strlen) return '';           // another trivial case
144f41bbe4cSAndreas Gohr
145f41bbe4cSAndreas Gohr            if ($length > 0) {
146f41bbe4cSAndreas Gohr
147ffdb5936SAndreas Gohr                // reduce any length that would go past the end of the string
148ffdb5936SAndreas Gohr                $length = min($strlen - $offset, $length);
149f41bbe4cSAndreas Gohr
150f41bbe4cSAndreas Gohr                $Lx = (int)($length / 65535);
151f41bbe4cSAndreas Gohr                $Ly = $length % 65535;
152f41bbe4cSAndreas Gohr
153f41bbe4cSAndreas Gohr                // +ve length requires ... a captured group of length characters
154f41bbe4cSAndreas Gohr                if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
155f41bbe4cSAndreas Gohr                $length_pattern = '(' . $length_pattern . '.{' . $Ly . '})';
156f41bbe4cSAndreas Gohr
157f41bbe4cSAndreas Gohr            } else if ($length < 0) {
158f41bbe4cSAndreas Gohr
159f41bbe4cSAndreas Gohr                if ($length < ($offset - $strlen)) return '';
160f41bbe4cSAndreas Gohr
161f41bbe4cSAndreas Gohr                $Lx = (int)((-$length) / 65535);
162f41bbe4cSAndreas Gohr                $Ly = (-$length) % 65535;
163f41bbe4cSAndreas Gohr
164f41bbe4cSAndreas Gohr                // -ve length requires ... capture everything except a group of -length characters
165f41bbe4cSAndreas Gohr                //                         anchored at the tail-end of the string
166f41bbe4cSAndreas Gohr                if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
167f41bbe4cSAndreas Gohr                $length_pattern = '(.*)(?:' . $length_pattern . '.{' . $Ly . '})$';
168f41bbe4cSAndreas Gohr            }
169f41bbe4cSAndreas Gohr        }
170f41bbe4cSAndreas Gohr
171f41bbe4cSAndreas Gohr        if (!preg_match('#' . $offset_pattern . $length_pattern . '#us', $str, $match)) return '';
172f41bbe4cSAndreas Gohr        return $match[1];
173f41bbe4cSAndreas Gohr    }
174f41bbe4cSAndreas Gohr
175*8a9a2e3dSAndreas Gohr    // phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
176f41bbe4cSAndreas Gohr    /**
177f41bbe4cSAndreas Gohr     * Unicode aware replacement for substr_replace()
178f41bbe4cSAndreas Gohr     *
179f41bbe4cSAndreas Gohr     * @param string $string input string
180f41bbe4cSAndreas Gohr     * @param string $replacement the replacement
181f41bbe4cSAndreas Gohr     * @param int $start the replacing will begin at the start'th offset into string.
182f41bbe4cSAndreas Gohr     * @param int $length If given and is positive, it represents the length of the portion of string which is
183f41bbe4cSAndreas Gohr     *                            to be replaced. If length is zero then this function will have the effect of inserting
184f41bbe4cSAndreas Gohr     *                            replacement into string at the given start offset.
185f41bbe4cSAndreas Gohr     * @return string
186ffdb5936SAndreas Gohr     * @see    substr_replace()
187ffdb5936SAndreas Gohr     *
188ffdb5936SAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
189f41bbe4cSAndreas Gohr     */
190f41bbe4cSAndreas Gohr    public static function substr_replace($string, $replacement, $start, $length = 0)
191f41bbe4cSAndreas Gohr    {
192f41bbe4cSAndreas Gohr        $ret = '';
193f41bbe4cSAndreas Gohr        if ($start > 0) $ret .= self::substr($string, 0, $start);
194f41bbe4cSAndreas Gohr        $ret .= $replacement;
195f41bbe4cSAndreas Gohr        $ret .= self::substr($string, $start + $length);
196f41bbe4cSAndreas Gohr        return $ret;
197f41bbe4cSAndreas Gohr    }
198*8a9a2e3dSAndreas Gohr    // phpcs:enable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
199f41bbe4cSAndreas Gohr
200f41bbe4cSAndreas Gohr    /**
201f41bbe4cSAndreas Gohr     * Unicode aware replacement for ltrim()
202f41bbe4cSAndreas Gohr     *
203f41bbe4cSAndreas Gohr     * @param string $str
204f41bbe4cSAndreas Gohr     * @param string $charlist
205f41bbe4cSAndreas Gohr     * @return string
206ffdb5936SAndreas Gohr     * @see    ltrim()
207ffdb5936SAndreas Gohr     *
208ffdb5936SAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
209f41bbe4cSAndreas Gohr     */
210f41bbe4cSAndreas Gohr    public static function ltrim($str, $charlist = '')
211f41bbe4cSAndreas Gohr    {
212f41bbe4cSAndreas Gohr        if ($charlist === '') return ltrim($str);
213f41bbe4cSAndreas Gohr
214f41bbe4cSAndreas Gohr        //quote charlist for use in a characterclass
215f41bbe4cSAndreas Gohr        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
216f41bbe4cSAndreas Gohr
217f41bbe4cSAndreas Gohr        return preg_replace('/^[' . $charlist . ']+/u', '', $str);
218f41bbe4cSAndreas Gohr    }
219f41bbe4cSAndreas Gohr
220f41bbe4cSAndreas Gohr    /**
221f41bbe4cSAndreas Gohr     * Unicode aware replacement for rtrim()
222f41bbe4cSAndreas Gohr     *
223f41bbe4cSAndreas Gohr     * @param string $str
224f41bbe4cSAndreas Gohr     * @param string $charlist
225f41bbe4cSAndreas Gohr     * @return string
226ffdb5936SAndreas Gohr     * @see    rtrim()
227ffdb5936SAndreas Gohr     *
228ffdb5936SAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
229f41bbe4cSAndreas Gohr     */
230f41bbe4cSAndreas Gohr    public static function rtrim($str, $charlist = '')
231f41bbe4cSAndreas Gohr    {
232f41bbe4cSAndreas Gohr        if ($charlist === '') return rtrim($str);
233f41bbe4cSAndreas Gohr
234f41bbe4cSAndreas Gohr        //quote charlist for use in a characterclass
235f41bbe4cSAndreas Gohr        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
236f41bbe4cSAndreas Gohr
237f41bbe4cSAndreas Gohr        return preg_replace('/[' . $charlist . ']+$/u', '', $str);
238f41bbe4cSAndreas Gohr    }
239f41bbe4cSAndreas Gohr
240f41bbe4cSAndreas Gohr    /**
241f41bbe4cSAndreas Gohr     * Unicode aware replacement for trim()
242f41bbe4cSAndreas Gohr     *
243f41bbe4cSAndreas Gohr     * @param string $str
244f41bbe4cSAndreas Gohr     * @param string $charlist
245f41bbe4cSAndreas Gohr     * @return string
246ffdb5936SAndreas Gohr     * @see    trim()
247ffdb5936SAndreas Gohr     *
248ffdb5936SAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
249f41bbe4cSAndreas Gohr     */
250f41bbe4cSAndreas Gohr    public static function trim($str, $charlist = '')
251f41bbe4cSAndreas Gohr    {
252f41bbe4cSAndreas Gohr        if ($charlist === '') return trim($str);
253f41bbe4cSAndreas Gohr
254f41bbe4cSAndreas Gohr        return self::ltrim(self::rtrim($str, $charlist), $charlist);
255f41bbe4cSAndreas Gohr    }
256f41bbe4cSAndreas Gohr
257f41bbe4cSAndreas Gohr    /**
258f41bbe4cSAndreas Gohr     * This is a unicode aware replacement for strtolower()
259f41bbe4cSAndreas Gohr     *
260f41bbe4cSAndreas Gohr     * Uses mb_string extension if available
261f41bbe4cSAndreas Gohr     *
262f41bbe4cSAndreas Gohr     * @param string $string
263f41bbe4cSAndreas Gohr     * @return string
2648cbc5ee8SAndreas Gohr     * @see    \dokuwiki\Utf8\PhpString::strtoupper()
265ffdb5936SAndreas Gohr     *
266ffdb5936SAndreas Gohr     * @author Leo Feyer <leo@typolight.org>
267ffdb5936SAndreas Gohr     * @see    strtolower()
268f41bbe4cSAndreas Gohr     */
269f41bbe4cSAndreas Gohr    public static function strtolower($string)
270f41bbe4cSAndreas Gohr    {
271f41bbe4cSAndreas Gohr        if (UTF8_MBSTRING) {
272f41bbe4cSAndreas Gohr            if (class_exists('Normalizer', $autoload = false)) {
273f41bbe4cSAndreas Gohr                return \Normalizer::normalize(mb_strtolower($string, 'utf-8'));
274f41bbe4cSAndreas Gohr            }
275f41bbe4cSAndreas Gohr            return (mb_strtolower($string, 'utf-8'));
276f41bbe4cSAndreas Gohr        }
277f41bbe4cSAndreas Gohr        return strtr($string, Table::upperCaseToLowerCase());
278f41bbe4cSAndreas Gohr    }
279f41bbe4cSAndreas Gohr
280f41bbe4cSAndreas Gohr    /**
281f41bbe4cSAndreas Gohr     * This is a unicode aware replacement for strtoupper()
282f41bbe4cSAndreas Gohr     *
283f41bbe4cSAndreas Gohr     * Uses mb_string extension if available
284f41bbe4cSAndreas Gohr     *
285f41bbe4cSAndreas Gohr     * @param string $string
286f41bbe4cSAndreas Gohr     * @return string
2878cbc5ee8SAndreas Gohr     * @see    \dokuwiki\Utf8\PhpString::strtoupper()
288ffdb5936SAndreas Gohr     *
289ffdb5936SAndreas Gohr     * @author Leo Feyer <leo@typolight.org>
290ffdb5936SAndreas Gohr     * @see    strtoupper()
291f41bbe4cSAndreas Gohr     */
292f41bbe4cSAndreas Gohr    public static function strtoupper($string)
293f41bbe4cSAndreas Gohr    {
294f41bbe4cSAndreas Gohr        if (UTF8_MBSTRING) return mb_strtoupper($string, 'utf-8');
295f41bbe4cSAndreas Gohr
296f41bbe4cSAndreas Gohr        return strtr($string, Table::lowerCaseToUpperCase());
297f41bbe4cSAndreas Gohr    }
298f41bbe4cSAndreas Gohr
299f41bbe4cSAndreas Gohr
300f41bbe4cSAndreas Gohr    /**
301f41bbe4cSAndreas Gohr     * UTF-8 aware alternative to ucfirst
302f41bbe4cSAndreas Gohr     * Make a string's first character uppercase
303f41bbe4cSAndreas Gohr     *
304f41bbe4cSAndreas Gohr     * @param string $str
305f41bbe4cSAndreas Gohr     * @return string with first character as upper case (if applicable)
306ffdb5936SAndreas Gohr     * @author Harry Fuecks
307ffdb5936SAndreas Gohr     *
308f41bbe4cSAndreas Gohr     */
309f41bbe4cSAndreas Gohr    public static function ucfirst($str)
310f41bbe4cSAndreas Gohr    {
311f41bbe4cSAndreas Gohr        switch (self::strlen($str)) {
312f41bbe4cSAndreas Gohr            case 0:
313f41bbe4cSAndreas Gohr                return '';
314f41bbe4cSAndreas Gohr            case 1:
315f41bbe4cSAndreas Gohr                return self::strtoupper($str);
316f41bbe4cSAndreas Gohr            default:
317f41bbe4cSAndreas Gohr                preg_match('/^(.{1})(.*)$/us', $str, $matches);
318f41bbe4cSAndreas Gohr                return self::strtoupper($matches[1]) . $matches[2];
319f41bbe4cSAndreas Gohr        }
320f41bbe4cSAndreas Gohr    }
321f41bbe4cSAndreas Gohr
322f41bbe4cSAndreas Gohr    /**
323f41bbe4cSAndreas Gohr     * UTF-8 aware alternative to ucwords
324f41bbe4cSAndreas Gohr     * Uppercase the first character of each word in a string
325f41bbe4cSAndreas Gohr     *
326ffdb5936SAndreas Gohr     * @param string $str
327ffdb5936SAndreas Gohr     * @return string with first char of each word uppercase
328f41bbe4cSAndreas Gohr     * @author Harry Fuecks
329f41bbe4cSAndreas Gohr     * @see http://php.net/ucwords
330f41bbe4cSAndreas Gohr     *
331f41bbe4cSAndreas Gohr     */
332f41bbe4cSAndreas Gohr    public static function ucwords($str)
333f41bbe4cSAndreas Gohr    {
334f41bbe4cSAndreas Gohr        // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
335f41bbe4cSAndreas Gohr        // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
336f41bbe4cSAndreas Gohr        // This corresponds to the definition of a "word" defined at http://php.net/ucwords
337f41bbe4cSAndreas Gohr        $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
338f41bbe4cSAndreas Gohr
339f41bbe4cSAndreas Gohr        return preg_replace_callback(
340f41bbe4cSAndreas Gohr            $pattern,
341f41bbe4cSAndreas Gohr            function ($matches) {
342f41bbe4cSAndreas Gohr                $leadingws = $matches[2];
343f41bbe4cSAndreas Gohr                $ucfirst = self::strtoupper($matches[3]);
344f41bbe4cSAndreas Gohr                $ucword = self::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1);
345f41bbe4cSAndreas Gohr                return $leadingws . $ucword;
346f41bbe4cSAndreas Gohr            },
347f41bbe4cSAndreas Gohr            $str
348f41bbe4cSAndreas Gohr        );
349f41bbe4cSAndreas Gohr    }
350f41bbe4cSAndreas Gohr
351f41bbe4cSAndreas Gohr    /**
352f41bbe4cSAndreas Gohr     * This is an Unicode aware replacement for strpos
353f41bbe4cSAndreas Gohr     *
354f41bbe4cSAndreas Gohr     * @param string $haystack
355f41bbe4cSAndreas Gohr     * @param string $needle
356f41bbe4cSAndreas Gohr     * @param integer $offset
357f41bbe4cSAndreas Gohr     * @return integer
358ffdb5936SAndreas Gohr     * @author Leo Feyer <leo@typolight.org>
359ffdb5936SAndreas Gohr     * @see    strpos()
360ffdb5936SAndreas Gohr     *
361f41bbe4cSAndreas Gohr     */
362f41bbe4cSAndreas Gohr    public static function strpos($haystack, $needle, $offset = 0)
363f41bbe4cSAndreas Gohr    {
364f41bbe4cSAndreas Gohr        $comp = 0;
365f41bbe4cSAndreas Gohr        $length = null;
366f41bbe4cSAndreas Gohr
367f41bbe4cSAndreas Gohr        while ($length === null || $length < $offset) {
368f41bbe4cSAndreas Gohr            $pos = strpos($haystack, $needle, $offset + $comp);
369f41bbe4cSAndreas Gohr
370f41bbe4cSAndreas Gohr            if ($pos === false)
371f41bbe4cSAndreas Gohr                return false;
372f41bbe4cSAndreas Gohr
373f41bbe4cSAndreas Gohr            $length = self::strlen(substr($haystack, 0, $pos));
374f41bbe4cSAndreas Gohr
375f41bbe4cSAndreas Gohr            if ($length < $offset)
376f41bbe4cSAndreas Gohr                $comp = $pos - $length;
377f41bbe4cSAndreas Gohr        }
378f41bbe4cSAndreas Gohr
379f41bbe4cSAndreas Gohr        return $length;
380f41bbe4cSAndreas Gohr    }
381f41bbe4cSAndreas Gohr
382f41bbe4cSAndreas Gohr
383f41bbe4cSAndreas Gohr}
384