* @see strlen() */ public static function strlen($string) { if (UTF8_MBSTRING) { return mb_strlen($string, 'UTF-8'); } if (function_exists('iconv_strlen')) { return iconv_strlen($string, 'UTF-8'); } // utf8_decode is deprecated if (function_exists('utf8_decode')) { return strlen(utf8_decode($string)); } return strlen($string); } /** * UTF-8 aware alternative to substr * * Return part of a string given character offset (and optionally length) * * @param string $str * @param int $offset number of UTF-8 characters offset (from left) * @param int $length (optional) length in UTF-8 characters from offset * @return string * @author Harry Fuecks * @author Chris Smith * */ public static function substr($str, $offset, $length = null) { if (UTF8_MBSTRING) { if ($length === null) { return mb_substr($str, $offset); } return mb_substr($str, $offset, $length); } /* * Notes: * * no mb string support, so we'll use pcre regex's with 'u' flag * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) * * substr documentation states false can be returned in some cases (e.g. offset > string length) * mb_substr never returns false, it will return an empty string instead. * * calculating the number of characters in the string is a relatively expensive operation, so * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length */ // cast parameters to appropriate types to avoid multiple notices/warnings $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects $offset = (int)$offset; if ($length !== null) $length = (int)$length; // handle trivial cases if ($length === 0) return ''; if ($offset < 0 && $length < 0 && $length < $offset) return ''; $offset_pattern = ''; $length_pattern = ''; // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) if ($offset < 0) { $strlen = self::strlen($str); // see notes $offset = $strlen + $offset; if ($offset < 0) $offset = 0; } // establish a pattern for offset, a non-captured group equal in length to offset if ($offset > 0) { $Ox = (int)($offset / 65535); $Oy = $offset % 65535; if ($Ox) $offset_pattern = '(?:.{65535}){' . $Ox . '}'; $offset_pattern = '^(?:' . $offset_pattern . '.{' . $Oy . '})'; } else { $offset_pattern = '^'; // offset == 0; just anchor the pattern } // establish a pattern for length if ($length === null) { $length_pattern = '(.*)$'; // the rest of the string } else { if (!isset($strlen)) $strlen = self::strlen($str); // see notes if ($offset > $strlen) return ''; // another trivial case if ($length > 0) { // reduce any length that would go past the end of the string $length = min($strlen - $offset, $length); $Lx = (int)($length / 65535); $Ly = $length % 65535; // +ve length requires ... a captured group of length characters if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}'; $length_pattern = '(' . $length_pattern . '.{' . $Ly . '})'; } elseif ($length < 0) { if ($length < ($offset - $strlen)) return ''; $Lx = (int)((-$length) / 65535); $Ly = (-$length) % 65535; // -ve length requires ... capture everything except a group of -length characters // anchored at the tail-end of the string if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}'; $length_pattern = '(.*)(?:' . $length_pattern . '.{' . $Ly . '})$'; } } if (!preg_match('#' . $offset_pattern . $length_pattern . '#us', $str, $match)) return ''; return $match[1]; } // phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps /** * Unicode aware replacement for substr_replace() * * @param string $string input string * @param string $replacement the replacement * @param int $start the replacing will begin at the start'th offset into string. * @param int $length If given and is positive, it represents the length of the portion of string which is * to be replaced. If length is zero then this function will have the effect of inserting * replacement into string at the given start offset. * @return string * @see substr_replace() * * @author Andreas Gohr */ public static function substr_replace($string, $replacement, $start, $length = 0) { $ret = ''; if ($start > 0) $ret .= self::substr($string, 0, $start); $ret .= $replacement; $ret .= self::substr($string, $start + $length); return $ret; } // phpcs:enable PSR1.Methods.CamelCapsMethodName.NotCamelCaps /** * Unicode aware replacement for ltrim() * * @param string $str * @param string $charlist * @return string * @see ltrim() * * @author Andreas Gohr */ public static function ltrim($str, $charlist = '') { if ($charlist === '') return ltrim($str); //quote charlist for use in a characterclass $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist); return preg_replace('/^[' . $charlist . ']+/u', '', $str); } /** * Unicode aware replacement for rtrim() * * @param string $str * @param string $charlist * @return string * @see rtrim() * * @author Andreas Gohr */ public static function rtrim($str, $charlist = '') { if ($charlist === '') return rtrim($str); //quote charlist for use in a characterclass $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist); return preg_replace('/[' . $charlist . ']+$/u', '', $str); } /** * Unicode aware replacement for trim() * * @param string $str * @param string $charlist * @return string * @see trim() * * @author Andreas Gohr */ public static function trim($str, $charlist = '') { if ($charlist === '') return trim($str); return self::ltrim(self::rtrim($str, $charlist), $charlist); } /** * This is a unicode aware replacement for strtolower() * * Uses mb_string extension if available * * @param string $string * @return string * @see \dokuwiki\Utf8\PhpString::strtoupper() * * @author Leo Feyer * @see strtolower() */ public static function strtolower($string) { if ($string === null) return ''; // pre-8.1 behaviour if (UTF8_MBSTRING) { if (class_exists('Normalizer', $autoload = false)) { return \Normalizer::normalize(mb_strtolower($string, 'utf-8')); } return (mb_strtolower($string, 'utf-8')); } return strtr($string, Table::upperCaseToLowerCase()); } /** * This is a unicode aware replacement for strtoupper() * * Uses mb_string extension if available * * @param string $string * @return string * @see \dokuwiki\Utf8\PhpString::strtoupper() * * @author Leo Feyer * @see strtoupper() */ public static function strtoupper($string) { if (UTF8_MBSTRING) return mb_strtoupper($string, 'utf-8'); return strtr($string, Table::lowerCaseToUpperCase()); } /** * UTF-8 aware alternative to ucfirst * Make a string's first character uppercase * * @param string $str * @return string with first character as upper case (if applicable) * @author Harry Fuecks * */ public static function ucfirst($str) { switch (self::strlen($str)) { case 0: return ''; case 1: return self::strtoupper($str); default: preg_match('/^(.{1})(.*)$/us', $str, $matches); return self::strtoupper($matches[1]) . $matches[2]; } } /** * UTF-8 aware alternative to ucwords * Uppercase the first character of each word in a string * * @param string $str * @return string with first char of each word uppercase * @author Harry Fuecks * @see http://php.net/ucwords * */ public static function ucwords($str) { // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns // This corresponds to the definition of a "word" defined at http://php.net/ucwords $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; return preg_replace_callback( $pattern, function ($matches) { $leadingws = $matches[2]; $ucfirst = self::strtoupper($matches[3]); $ucword = self::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1); return $leadingws . $ucword; }, $str ); } /** * This is an Unicode aware replacement for strpos * * @param string $haystack * @param string $needle * @param integer $offset * @return integer * @author Leo Feyer * @see strpos() * */ public static function strpos($haystack, $needle, $offset = 0) { $comp = 0; $length = null; while ($length === null || $length < $offset) { $pos = strpos($haystack, $needle, $offset + $comp); if ($pos === false) return false; $length = self::strlen(substr($haystack, 0, $pos)); if ($length < $offset) $comp = $pos - $length; } return $length; } }