append($string); } return; } /** * Check if ext/mbstring is available. * * @return bool */ public static function checkMbString() { return function_exists('mb_substr'); } /** * Check if ext/iconv is available. * * @return bool */ public static function checkIconv() { return function_exists('iconv'); } /** * Append a substring to the current string, i.e. add to the end. * * @param string $substring Substring to append. * @return \Hoa\Ustring */ public function append($substring) { $this->_string .= $substring; return $this; } /** * Prepend a substring to the current string, i.e. add to the start. * * @param string $substring Substring to append. * @return \Hoa\Ustring */ public function prepend($substring) { $this->_string = $substring . $this->_string; return $this; } /** * Pad the current string to a certain length with another piece, aka piece. * * @param int $length Length. * @param string $piece Piece. * @param int $side Whether we append at the end or the beginning * of the current string. * @return \Hoa\Ustring */ public function pad($length, $piece, $side = self::END) { $difference = $length - $this->count(); if (0 >= $difference) { return $this; } $handle = null; for ($i = $difference / mb_strlen($piece) - 1; $i >= 0; --$i) { $handle .= $piece; } $handle .= mb_substr($piece, 0, $difference - mb_strlen($handle)); return static::END === $side ? $this->append($handle) : $this->prepend($handle); } /** * Make a comparison with a string. * Return < 0 if current string is less than $string, > 0 if greater and 0 * if equal. * * @param mixed $string String. * @return int */ public function compare($string) { if (null === $collator = static::getCollator()) { return strcmp($this->_string, (string) $string); } return $collator->compare($this->_string, $string); } /** * Get collator. * * @return \Collator */ public static function getCollator() { if (false === class_exists('Collator')) { return null; } if (null === static::$_collator) { static::$_collator = new \Collator(setlocale(LC_COLLATE, null)); } return static::$_collator; } /** * Ensure that the pattern is safe for Unicode: add the “u” option. * * @param string $pattern Pattern. * @return string */ public static function safePattern($pattern) { $delimiter = mb_substr($pattern, 0, 1); $options = mb_substr( mb_strrchr($pattern, $delimiter, false), mb_strlen($delimiter) ); if (false === strpos($options, 'u')) { $pattern .= 'u'; } return $pattern; } /** * Perform a regular expression (PCRE) match. * * @param string $pattern Pattern. * @param array $matches Matches. * @param int $flags Please, see constants self::WITH_OFFSET, * self::GROUP_BY_PATTERN and * self::GROUP_BY_TUPLE. * @param int $offset Alternate place from which to start the * search. * @param bool $global Whether the match is global or not. * @return int */ public function match( $pattern, &$matches = null, $flags = 0, $offset = 0, $global = false ) { $pattern = static::safePattern($pattern); if (0 === $flags) { if (true === $global) { $flags = static::GROUP_BY_PATTERN; } } else { $flags &= ~PREG_SPLIT_OFFSET_CAPTURE; } $offset = strlen(mb_substr($this->_string, 0, $offset)); if (true === $global) { return preg_match_all( $pattern, $this->_string, $matches, $flags, $offset ); } return preg_match($pattern, $this->_string, $matches, $flags, $offset); } /** * Perform a regular expression (PCRE) search and replace. * * @param mixed $pattern Pattern(s). * @param mixed $replacement Replacement(s) (please, see * preg_replace() documentation). * @param int $limit Maximum of replacements. -1 for unbound. * @return \Hoa\Ustring */ public function replace($pattern, $replacement, $limit = -1) { $pattern = static::safePattern($pattern); if (false === is_callable($replacement)) { $this->_string = preg_replace( $pattern, $replacement, $this->_string, $limit ); } else { $this->_string = preg_replace_callback( $pattern, $replacement, $this->_string, $limit ); } return $this; } /** * Split the current string according to a given pattern (PCRE). * * @param string $pattern Pattern (as a regular expression). * @param int $limit Maximum of split. -1 for unbound. * @param int $flags Please, see constants self::WITHOUT_EMPTY, * self::WITH_DELIMITERS, self::WITH_OFFSET. * @return array */ public function split( $pattern, $limit = -1, $flags = self::WITHOUT_EMPTY ) { return preg_split( static::safePattern($pattern), $this->_string, $limit, $flags ); } /** * Iterator over chars. * * @return \ArrayIterator */ public function getIterator() { return new \ArrayIterator(preg_split('#(?_string)); } /** * Perform a lowercase folding on the current string. * * @return \Hoa\Ustring */ public function toLowerCase() { $this->_string = mb_strtolower($this->_string); return $this; } /** * Perform an uppercase folding on the current string. * * @return \Hoa\Ustring */ public function toUpperCase() { $this->_string = mb_strtoupper($this->_string); return $this; } /** * Transform a UTF-8 string into an ASCII one. * First, try with a transliterator. If not available, will fallback to a * normalizer. If not available, will try something homemade. * * @param bool $try Try something if \Normalizer is not present. * @return \Hoa\Ustring * @throws \Hoa\Ustring\Exception */ public function toAscii($try = false) { if (0 === preg_match('#[\x80-\xff]#', $this->_string)) { return $this; } $string = $this->_string; $transId = 'Any-Latin; ' . '[\p{S}] Name; ' . 'Latin-ASCII'; if (null !== $transliterator = static::getTransliterator($transId)) { $this->_string = preg_replace_callback( '#\\\N\{([A-Z ]+)\}#u', function (array $matches) { return '(' . strtolower($matches[1]) . ')'; }, $transliterator->transliterate($string) ); return $this; } if (false === class_exists('Normalizer')) { if (false === $try) { throw new Exception( '%s needs the class Normalizer to work properly, ' . 'or you can force a try by using %1$s(true).', 0, __METHOD__ ); } $string = static::transcode($string, 'UTF-8', 'ASCII//IGNORE//TRANSLIT'); $this->_string = preg_replace('#(?:[\'"`^](\w))#u', '\1', $string); return $this; } $string = \Normalizer::normalize($string, \Normalizer::NFKD); $string = preg_replace('#\p{Mn}+#u', '', $string); $this->_string = static::transcode($string, 'UTF-8', 'ASCII//IGNORE//TRANSLIT'); return $this; } /** * Transliterate the string into another. * See self::getTransliterator for more information. * * @param string $identifier Identifier. * @param int $start Start. * @param int $end End. * @return \Hoa\Ustring * @throws \Hoa\Ustring\Exception */ public function transliterate($identifier, $start = 0, $end = null) { if (null === $transliterator = static::getTransliterator($identifier)) { throw new Exception( '%s needs the class Transliterator to work properly.', 1, __METHOD__ ); } $this->_string = $transliterator->transliterate($this->_string, $start, $end); return $this; } /** * Get transliterator. * See http://userguide.icu-project.org/transforms/general for $identifier. * * @param string $identifier Identifier. * @return \Transliterator */ public static function getTransliterator($identifier) { if (false === class_exists('Transliterator')) { return null; } return \Transliterator::create($identifier); } /** * Strip characters (default \s) of the current string. * * @param string $regex Characters to remove. * @param int $side Whether we trim the beginning, the end or both * sides, of the current string. * @return \Hoa\Ustring */ public function trim($regex = '\s', $side = 3 /* static::BEGINNING | static::END */) { $regex = '(?:' . $regex . ')+'; $handle = null; if (0 !== ($side & static::BEGINNING)) { $handle .= '(^' . $regex . ')'; } if (0 !== ($side & static::END)) { if (null !== $handle) { $handle .= '|'; } $handle .= '(' . $regex . '$)'; } $this->_string = preg_replace('#' . $handle . '#u', '', $this->_string); $this->_direction = null; return $this; } /** * Compute offset (negative, unbound etc.). * * @param int $offset Offset. * @return int */ protected function computeOffset($offset) { $length = mb_strlen($this->_string); if (0 > $offset) { $offset = -$offset % $length; if (0 !== $offset) { $offset = $length - $offset; } } elseif ($offset >= $length) { $offset %= $length; } return $offset; } /** * Get a specific chars of the current string. * * @param int $offset Offset (can be negative and unbound). * @return string */ public function offsetGet($offset) { return mb_substr($this->_string, $this->computeOffset($offset), 1); } /** * Set a specific character of the current string. * * @param int $offset Offset (can be negative and unbound). * @param string $value Value. * @return \Hoa\Ustring */ public function offsetSet($offset, $value) { $head = null; $offset = $this->computeOffset($offset); if (0 < $offset) { $head = mb_substr($this->_string, 0, $offset); } $tail = mb_substr($this->_string, $offset + 1); $this->_string = $head . $value . $tail; $this->_direction = null; return $this; } /** * Delete a specific character of the current string. * * @param int $offset Offset (can be negative and unbound). * @return string */ public function offsetUnset($offset) { return $this->offsetSet($offset, null); } /** * Check if a specific offset exists. * * @return bool */ public function offsetExists($offset) { return true; } /** * Reduce the strings. * * @param int $start Position of first character. * @param int $length Maximum number of characters. * @return \Hoa\Ustring */ public function reduce($start, $length = null) { $this->_string = mb_substr($this->_string, $start, $length); return $this; } /** * Count number of characters of the current string. * * @return int */ public function count() { return mb_strlen($this->_string); } /** * Get byte (not character) at a specific offset. * * @param int $offset Offset (can be negative and unbound). * @return string */ public function getByteAt($offset) { $length = strlen($this->_string); if (0 > $offset) { $offset = -$offset % $length; if (0 !== $offset) { $offset = $length - $offset; } } elseif ($offset >= $length) { $offset %= $length; } return $this->_string[$offset]; } /** * Count number of bytes (not characters) of the current string. * * @return int */ public function getBytesLength() { return strlen($this->_string); } /** * Get the width of the current string. * Useful when printing the string in monotype (some character need more * than one column to be printed). * * @return int */ public function getWidth() { return mb_strwidth($this->_string); } /** * Get direction of the current string. * Please, see the self::LTR and self::RTL constants. * It does not yet support embedding directions. * * @return int */ public function getDirection() { if (null === $this->_direction) { if (null === $this->_string) { $this->_direction = static::LTR; } else { $this->_direction = static::getCharDirection( mb_substr($this->_string, 0, 1) ); } } return $this->_direction; } /** * Get character of a specific character. * Please, see the self::LTR and self::RTL constants. * * @param string $char Character. * @return int */ public static function getCharDirection($char) { $c = static::toCode($char); if (!(0x5be <= $c && 0x10b7f >= $c)) { return static::LTR; } if (0x85e >= $c) { if (0x5be === $c || 0x5c0 === $c || 0x5c3 === $c || 0x5c6 === $c || (0x5d0 <= $c && 0x5ea >= $c) || (0x5f0 <= $c && 0x5f4 >= $c) || 0x608 === $c || 0x60b === $c || 0x60d === $c || 0x61b === $c || (0x61e <= $c && 0x64a >= $c) || (0x66d <= $c && 0x66f >= $c) || (0x671 <= $c && 0x6d5 >= $c) || (0x6e5 <= $c && 0x6e6 >= $c) || (0x6ee <= $c && 0x6ef >= $c) || (0x6fa <= $c && 0x70d >= $c) || 0x710 === $c || (0x712 <= $c && 0x72f >= $c) || (0x74d <= $c && 0x7a5 >= $c) || 0x7b1 === $c || (0x7c0 <= $c && 0x7ea >= $c) || (0x7f4 <= $c && 0x7f5 >= $c) || 0x7fa === $c || (0x800 <= $c && 0x815 >= $c) || 0x81a === $c || 0x824 === $c || 0x828 === $c || (0x830 <= $c && 0x83e >= $c) || (0x840 <= $c && 0x858 >= $c) || 0x85e === $c) { return static::RTL; } } elseif (0x200f === $c) { return static::RTL; } elseif (0xfb1d <= $c) { if (0xfb1d === $c || (0xfb1f <= $c && 0xfb28 >= $c) || (0xfb2a <= $c && 0xfb36 >= $c) || (0xfb38 <= $c && 0xfb3c >= $c) || 0xfb3e === $c || (0xfb40 <= $c && 0xfb41 >= $c) || (0xfb43 <= $c && 0xfb44 >= $c) || (0xfb46 <= $c && 0xfbc1 >= $c) || (0xfbd3 <= $c && 0xfd3d >= $c) || (0xfd50 <= $c && 0xfd8f >= $c) || (0xfd92 <= $c && 0xfdc7 >= $c) || (0xfdf0 <= $c && 0xfdfc >= $c) || (0xfe70 <= $c && 0xfe74 >= $c) || (0xfe76 <= $c && 0xfefc >= $c) || (0x10800 <= $c && 0x10805 >= $c) || 0x10808 === $c || (0x1080a <= $c && 0x10835 >= $c) || (0x10837 <= $c && 0x10838 >= $c) || 0x1083c === $c || (0x1083f <= $c && 0x10855 >= $c) || (0x10857 <= $c && 0x1085f >= $c) || (0x10900 <= $c && 0x1091b >= $c) || (0x10920 <= $c && 0x10939 >= $c) || 0x1093f === $c || 0x10a00 === $c || (0x10a10 <= $c && 0x10a13 >= $c) || (0x10a15 <= $c && 0x10a17 >= $c) || (0x10a19 <= $c && 0x10a33 >= $c) || (0x10a40 <= $c && 0x10a47 >= $c) || (0x10a50 <= $c && 0x10a58 >= $c) || (0x10a60 <= $c && 0x10a7f >= $c) || (0x10b00 <= $c && 0x10b35 >= $c) || (0x10b40 <= $c && 0x10b55 >= $c) || (0x10b58 <= $c && 0x10b72 >= $c) || (0x10b78 <= $c && 0x10b7f >= $c)) { return static::RTL; } } return static::LTR; } /** * Get the number of column positions of a wide-character. * * This is a PHP implementation of wcwidth() and wcswidth() (defined in IEEE * Std 1002.1-2001) for Unicode, by Markus Kuhn. Please, see * http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c. * * The wcwidth(wc) function shall either return 0 (if wc is a null * wide-character code), or return the number of column positions to be * occupied by the wide-character code wc, or return -1 (if wc does not * correspond to a printable wide-character code). * * @param string $char Character. * @return int */ public static function getCharWidth($char) { $char = (string) $char; $c = static::toCode($char); // Test for 8-bit control characters. if (0x0 === $c) { return 0; } if (0x20 > $c || (0x7f <= $c && $c < 0xa0)) { return -1; } // Non-spacing characters. if (0xad !== $c && 0 !== preg_match('#^[\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11ff}\x{200b}]#u', $char)) { return 0; } // If we arrive here, $c is not a combining C0/C1 control character. return 1 + (0x1100 <= $c && (0x115f >= $c || // Hangul Jamo init. consonants 0x2329 === $c || 0x232a === $c || (0x2e80 <= $c && 0xa4cf >= $c && 0x303f !== $c) || // CJK…Yi (0xac00 <= $c && 0xd7a3 >= $c) || // Hangul Syllables (0xf900 <= $c && 0xfaff >= $c) || // CJK Compatibility Ideographs (0xfe10 <= $c && 0xfe19 >= $c) || // Vertical forms (0xfe30 <= $c && 0xfe6f >= $c) || // CJK Compatibility Forms (0xff00 <= $c && 0xff60 >= $c) || // Fullwidth Forms (0xffe0 <= $c && 0xffe6 >= $c) || (0x20000 <= $c && 0x2fffd >= $c) || (0x30000 <= $c && 0x3fffd >= $c))); } /** * Check whether the character is printable or not. * * @param string $char Character. * @return bool */ public static function isCharPrintable($char) { return 1 <= static::getCharWidth($char); } /** * Get a UTF-8 character from its decimal code representation. * * @param int $code Code. * @return string */ public static function fromCode($code) { return mb_convert_encoding( '&#x' . dechex($code) . ';', 'UTF-8', 'HTML-ENTITIES' ); } /** * Get a decimal code representation of a specific character. * * @param string $char Character. * @return int */ public static function toCode($char) { $char = (string) $char; $code = ord($char[0]); $bytes = 1; if (!($code & 0x80)) { // 0xxxxxxx return $code; } if (($code & 0xe0) === 0xc0) { // 110xxxxx $bytes = 2; $code = $code & ~0xc0; } elseif (($code & 0xf0) == 0xe0) { // 1110xxxx $bytes = 3; $code = $code & ~0xe0; } elseif (($code & 0xf8) === 0xf0) { // 11110xxx $bytes = 4; $code = $code & ~0xf0; } for ($i = 2; $i <= $bytes; $i++) { // 10xxxxxx $code = ($code << 6) + (ord($char[$i - 1]) & ~0x80); } return $code; } /** * Get a binary representation of a specific character. * * @param string $char Character. * @return string */ public static function toBinaryCode($char) { $char = (string) $char; $out = null; for ($i = 0, $max = strlen($char); $i < $max; ++$i) { $out .= vsprintf('%08b', ord($char[$i])); } return $out; } /** * Transcode. * * @param string $string String. * @param string $from Original encoding. * @param string $to Final encoding. * @return string * @throws \Hoa\Ustring\Exception */ public static function transcode($string, $from, $to = 'UTF-8') { if (false === static::checkIconv()) { throw new Exception( '%s needs the iconv extension.', 2, __CLASS__ ); } return iconv($from, $to, $string); } /** * Check if a string is encoded in UTF-8. * * @param string $string String. * @return bool */ public static function isUtf8($string) { return (bool) preg_match('##u', $string); } /** * Copy current object string * * @return \Hoa\Ustring */ public function copy() { return clone $this; } /** * Transform the object as a string. * * @return string */ public function __toString() { return $this->_string; } } /** * Flex entity. */ Consistency::flexEntity('Hoa\Ustring\Ustring'); if (false === Ustring::checkMbString()) { throw new Exception( '%s needs the mbstring extension.', 0, __NAMESPACE__ . '\Ustring' ); }