<?php

/**
 * Hoa
 *
 *
 * @license
 *
 * New BSD License
 *
 * Copyright © 2007-2017, Hoa community. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of the Hoa nor the names of its contributors may be
 *       used to endorse or promote products derived from this software without
 *       specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

namespace Hoa\Ustring;

use Hoa\Consistency;

/**
 * Class \Hoa\Ustring.
 *
 * This class represents a UTF-8 string.
 * Please, see:
 *     • http://www.ietf.org/rfc/rfc3454.txt;
 *     • http://unicode.org/reports/tr9/;
 *     • http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt.
 *
 * @copyright  Copyright © 2007-2017 Hoa community
 * @license    New BSD License
 */
class Ustring implements \ArrayAccess, \Countable, \IteratorAggregate
{
    /**
     * Left-To-Right.
     *
     * @const int
     */
    const LTR              = 0;

    /**
     * Right-To-Left.
     *
     * @const int
     */
    const RTL              = 1;

    /**
     * ZERO WIDTH NON-BREAKING SPACE (ZWNPBSP, aka byte-order mark, BOM).
     *
     * @const int
     */
    const BOM              = 0xfeff;

    /**
     * LEFT-TO-RIGHT MARK.
     *
     * @const int
     */
    const LRM              = 0x200e;

    /**
     * RIGHT-TO-LEFT MARK.
     *
     * @const int
     */
    const RLM              = 0x200f;

    /**
     * LEFT-TO-RIGHT EMBEDDING.
     *
     * @const int
     */
    const LRE              = 0x202a;

    /**
     * RIGHT-TO-LEFT EMBEDDING.
     *
     * @const int
     */
    const RLE              = 0x202b;

    /**
     * POP DIRECTIONAL FORMATTING.
     *
     * @const int
     */
    const PDF              = 0x202c;

    /**
     * LEFT-TO-RIGHT OVERRIDE.
     *
     * @const int
     */
    const LRO              = 0x202d;

    /**
     * RIGHT-TO-LEFT OVERRIDE.
     *
     * @const int
     */
    const RLO              = 0x202e;

    /**
     * Represent the beginning of the string.
     *
     * @const int
     */
    const BEGINNING        = 1;

    /**
     * Represent the end of the string.
     *
     * @const int
     */
    const END              = 2;

    /**
     * Split: non-empty pieces is returned.
     *
     * @const int
     */
    const WITHOUT_EMPTY    = PREG_SPLIT_NO_EMPTY;

    /**
     * Split: parenthesized expression in the delimiter pattern will be captured
     * and returned.
     *
     * @const int
     */
    const WITH_DELIMITERS  = PREG_SPLIT_DELIM_CAPTURE;

    /**
     * Split: offsets of captures will be returned.
     *
     * @const int
     */
    const WITH_OFFSET      = 260; //   PREG_OFFSET_CAPTURE
                                  // | PREG_SPLIT_OFFSET_CAPTURE

    /**
     * Group results by patterns.
     *
     * @const int
     */
    const GROUP_BY_PATTERN = PREG_PATTERN_ORDER;

    /**
     * Group results by tuple (set of patterns).
     *
     * @const int
     */
    const GROUP_BY_TUPLE   = PREG_SET_ORDER;

    /**
     * Current string.
     *
     * @var string
     */
    protected $_string          = null;

    /**
     * Direction. Please see self::LTR and self::RTL constants.
     *
     * @var int
     */
    protected $_direction       = null;

    /**
     * Collator.
     *
     * @var \Collator
     */
    protected static $_collator = null;


    /**
     * Construct a UTF-8 string.
     *
     * @param   string  $string    String.
     */
    public function __construct($string = null)
    {
        if (null !== $string) {
            $this->append($string);
        }

        return;
    }

    /**
     * Check if ext/mbstring is available.
     *
     * @return  bool
     */
    public static function checkMbString()
    {
        return function_exists('mb_substr');
    }

    /**
     * Check if ext/iconv is available.
     *
     * @return  bool
     */
    public static function checkIconv()
    {
        return function_exists('iconv');
    }

    /**
     * Append a substring to the current string, i.e. add to the end.
     *
     * @param   string  $substring    Substring to append.
     * @return  \Hoa\Ustring
     */
    public function append($substring)
    {
        $this->_string .= $substring;

        return $this;
    }

    /**
     * Prepend a substring to the current string, i.e. add to the start.
     *
     * @param   string  $substring    Substring to append.
     * @return  \Hoa\Ustring
     */
    public function prepend($substring)
    {
        $this->_string = $substring . $this->_string;

        return $this;
    }

    /**
     * Pad the current string to a certain length with another piece, aka piece.
     *
     * @param   int     $length    Length.
     * @param   string  $piece     Piece.
     * @param   int     $side      Whether we append at the end or the beginning
     *                             of the current string.
     * @return  \Hoa\Ustring
     */
    public function pad($length, $piece, $side = self::END)
    {
        $difference = $length - $this->count();

        if (0 >= $difference) {
            return $this;
        }

        $handle = null;

        for ($i = $difference / mb_strlen($piece) - 1; $i >= 0; --$i) {
            $handle .= $piece;
        }

        $handle .= mb_substr($piece, 0, $difference - mb_strlen($handle));

        return
            static::END === $side
                ? $this->append($handle)
                : $this->prepend($handle);
    }

    /**
     * Make a comparison with a string.
     * Return < 0 if current string is less than $string, > 0 if greater and 0
     * if equal.
     *
     * @param   mixed  $string    String.
     * @return  int
     */
    public function compare($string)
    {
        if (null === $collator = static::getCollator()) {
            return strcmp($this->_string, (string) $string);
        }

        return $collator->compare($this->_string, $string);
    }

    /**
     * Get collator.
     *
     * @return  \Collator
     */
    public static function getCollator()
    {
        if (false === class_exists('Collator')) {
            return null;
        }

        if (null === static::$_collator) {
            static::$_collator = new \Collator(setlocale(LC_COLLATE, null));
        }

        return static::$_collator;
    }

    /**
     * Ensure that the pattern is safe for Unicode: add the “u” option.
     *
     * @param   string  $pattern    Pattern.
     * @return  string
     */
    public static function safePattern($pattern)
    {
        $delimiter = mb_substr($pattern, 0, 1);
        $options   = mb_substr(
            mb_strrchr($pattern, $delimiter, false),
            mb_strlen($delimiter)
        );

        if (false === strpos($options, 'u')) {
            $pattern .= 'u';
        }

        return $pattern;
    }

    /**
     * Perform a regular expression (PCRE) match.
     *
     * @param   string  $pattern    Pattern.
     * @param   array   $matches    Matches.
     * @param   int     $flags      Please, see constants self::WITH_OFFSET,
     *                              self::GROUP_BY_PATTERN and
     *                              self::GROUP_BY_TUPLE.
     * @param   int     $offset     Alternate place from which to start the
     *                              search.
     * @param   bool    $global     Whether the match is global or not.
     * @return  int
     */
    public function match(
        $pattern,
        &$matches = null,
        $flags    = 0,
        $offset   = 0,
        $global   = false
    ) {
        $pattern = static::safePattern($pattern);

        if (0 === $flags) {
            if (true === $global) {
                $flags = static::GROUP_BY_PATTERN;
            }
        } else {
            $flags &= ~PREG_SPLIT_OFFSET_CAPTURE;
        }


        $offset = strlen(mb_substr($this->_string, 0, $offset));

        if (true === $global) {
            return preg_match_all(
                $pattern,
                $this->_string,
                $matches,
                $flags,
                $offset
            );
        }

        return preg_match($pattern, $this->_string, $matches, $flags, $offset);
    }

    /**
     * Perform a regular expression (PCRE) search and replace.
     *
     * @param   mixed   $pattern        Pattern(s).
     * @param   mixed   $replacement    Replacement(s) (please, see
     *                                  preg_replace() documentation).
     * @param   int     $limit          Maximum of replacements. -1 for unbound.
     * @return  \Hoa\Ustring
     */
    public function replace($pattern, $replacement, $limit = -1)
    {
        $pattern = static::safePattern($pattern);

        if (false === is_callable($replacement)) {
            $this->_string = preg_replace(
                $pattern,
                $replacement,
                $this->_string,
                $limit
            );
        } else {
            $this->_string = preg_replace_callback(
                $pattern,
                $replacement,
                $this->_string,
                $limit
            );
        }

        return $this;
    }

    /**
     * Split the current string according to a given pattern (PCRE).
     *
     * @param   string  $pattern    Pattern (as a regular expression).
     * @param   int     $limit      Maximum of split. -1 for unbound.
     * @param   int     $flags      Please, see constants self::WITHOUT_EMPTY,
     *                              self::WITH_DELIMITERS, self::WITH_OFFSET.
     * @return  array
     */
    public function split(
        $pattern,
        $limit = -1,
        $flags = self::WITHOUT_EMPTY
    ) {
        return preg_split(
            static::safePattern($pattern),
            $this->_string,
            $limit,
            $flags
        );
    }

    /**
     * Iterator over chars.
     *
     * @return  \ArrayIterator
     */
    public function getIterator()
    {
        return new \ArrayIterator(preg_split('#(?<!^)(?!$)#u', $this->_string));
    }

    /**
     * Perform a lowercase folding on the current string.
     *
     * @return  \Hoa\Ustring
     */
    public function toLowerCase()
    {
        $this->_string = mb_strtolower($this->_string);

        return $this;
    }

    /**
     * Perform an uppercase folding on the current string.
     *
     * @return  \Hoa\Ustring
     */
    public function toUpperCase()
    {
        $this->_string = mb_strtoupper($this->_string);

        return $this;
    }

    /**
     * Transform a UTF-8 string into an ASCII one.
     * First, try with a transliterator. If not available, will fallback to a
     * normalizer. If not available, will try something homemade.
     *
     * @param   bool  $try    Try something if \Normalizer is not present.
     * @return  \Hoa\Ustring
     * @throws  \Hoa\Ustring\Exception
     */
    public function toAscii($try = false)
    {
        if (0 === preg_match('#[\x80-\xff]#', $this->_string)) {
            return $this;
        }

        $string  = $this->_string;
        $transId =
            'Any-Latin; ' .
            '[\p{S}] Name; ' .
            'Latin-ASCII';

        if (null !== $transliterator = static::getTransliterator($transId)) {
            $this->_string = preg_replace_callback(
                '#\\\N\{([A-Z ]+)\}#u',
                function (array $matches) {
                    return '(' . strtolower($matches[1]) . ')';
                },
                $transliterator->transliterate($string)
            );

            return $this;
        }

        if (false === class_exists('Normalizer')) {
            if (false === $try) {
                throw new Exception(
                    '%s needs the class Normalizer to work properly, ' .
                    'or you can force a try by using %1$s(true).',
                    0,
                    __METHOD__
                );
            }

            $string        = static::transcode($string, 'UTF-8', 'ASCII//IGNORE//TRANSLIT');
            $this->_string = preg_replace('#(?:[\'"`^](\w))#u', '\1', $string);

            return $this;
        }

        $string        = \Normalizer::normalize($string, \Normalizer::NFKD);
        $string        = preg_replace('#\p{Mn}+#u', '', $string);
        $this->_string = static::transcode($string, 'UTF-8', 'ASCII//IGNORE//TRANSLIT');

        return $this;
    }

    /**
     * Transliterate the string into another.
     * See self::getTransliterator for more information.
     *
     * @param   string  $identifier    Identifier.
     * @param   int     $start         Start.
     * @param   int     $end           End.
     * @return  \Hoa\Ustring
     * @throws  \Hoa\Ustring\Exception
     */
    public function transliterate($identifier, $start = 0, $end = null)
    {
        if (null === $transliterator = static::getTransliterator($identifier)) {
            throw new Exception(
                '%s needs the class Transliterator to work properly.',
                1,
                __METHOD__
            );
        }

        $this->_string = $transliterator->transliterate($this->_string, $start, $end);

        return $this;
    }

    /**
     * Get transliterator.
     * See http://userguide.icu-project.org/transforms/general for $identifier.
     *
     * @param   string  $identifier    Identifier.
     * @return  \Transliterator
     */
    public static function getTransliterator($identifier)
    {
        if (false === class_exists('Transliterator')) {
            return null;
        }

        return \Transliterator::create($identifier);
    }

    /**
     * Strip characters (default \s) of the current string.
     *
     * @param   string  $regex    Characters to remove.
     * @param   int     $side     Whether we trim the beginning, the end or both
     *                            sides, of the current string.
     * @return  \Hoa\Ustring
     */
    public function trim($regex = '\s', $side = 3 /* static::BEGINNING | static::END */)
    {
        $regex  = '(?:' . $regex . ')+';
        $handle = null;

        if (0 !== ($side & static::BEGINNING)) {
            $handle .= '(^' . $regex . ')';
        }

        if (0 !== ($side & static::END)) {
            if (null !== $handle) {
                $handle .= '|';
            }

            $handle .= '(' . $regex . '$)';
        }

        $this->_string    = preg_replace('#' . $handle . '#u', '', $this->_string);
        $this->_direction = null;

        return $this;
    }

    /**
     * Compute offset (negative, unbound etc.).
     *
     * @param   int        $offset    Offset.
     * @return  int
     */
    protected function computeOffset($offset)
    {
        $length = mb_strlen($this->_string);

        if (0 > $offset) {
            $offset = -$offset % $length;

            if (0 !== $offset) {
                $offset = $length - $offset;
            }
        } elseif ($offset >= $length) {
            $offset %= $length;
        }

        return $offset;
    }

    /**
     * Get a specific chars of the current string.
     *
     * @param   int     $offset    Offset (can be negative and unbound).
     * @return  string
     */
    public function offsetGet($offset)
    {
        return mb_substr($this->_string, $this->computeOffset($offset), 1);
    }

    /**
     * Set a specific character of the current string.
     *
     * @param   int     $offset    Offset (can be negative and unbound).
     * @param   string  $value     Value.
     * @return  \Hoa\Ustring
     */
    public function offsetSet($offset, $value)
    {
        $head   = null;
        $offset = $this->computeOffset($offset);

        if (0 < $offset) {
            $head = mb_substr($this->_string, 0, $offset);
        }

        $tail             = mb_substr($this->_string, $offset + 1);
        $this->_string    = $head . $value . $tail;
        $this->_direction = null;

        return $this;
    }

    /**
     * Delete a specific character of the current string.
     *
     * @param   int     $offset    Offset (can be negative and unbound).
     * @return  string
     */
    public function offsetUnset($offset)
    {
        return $this->offsetSet($offset, null);
    }

    /**
     * Check if a specific offset exists.
     *
     * @return  bool
     */
    public function offsetExists($offset)
    {
        return true;
    }

    /**
     * Reduce the strings.
     *
     * @param   int  $start     Position of first character.
     * @param   int  $length    Maximum number of characters.
     * @return  \Hoa\Ustring
     */
    public function reduce($start, $length = null)
    {
        $this->_string = mb_substr($this->_string, $start, $length);

        return $this;
    }

    /**
     * Count number of characters of the current string.
     *
     * @return  int
     */
    public function count()
    {
        return mb_strlen($this->_string);
    }

    /**
     * Get byte (not character) at a specific offset.
     *
     * @param   int     $offset    Offset (can be negative and unbound).
     * @return  string
     */
    public function getByteAt($offset)
    {
        $length = strlen($this->_string);

        if (0 > $offset) {
            $offset = -$offset % $length;

            if (0 !== $offset) {
                $offset = $length - $offset;
            }
        } elseif ($offset >= $length) {
            $offset %= $length;
        }

        return $this->_string[$offset];
    }

    /**
     * Count number of bytes (not characters) of the current string.
     *
     * @return  int
     */
    public function getBytesLength()
    {
        return strlen($this->_string);
    }

    /**
     * Get the width of the current string.
     * Useful when printing the string in monotype (some character need more
     * than one column to be printed).
     *
     * @return  int
     */
    public function getWidth()
    {
        return mb_strwidth($this->_string);
    }

    /**
     * Get direction of the current string.
     * Please, see the self::LTR and self::RTL constants.
     * It does not yet support embedding directions.
     *
     * @return  int
     */
    public function getDirection()
    {
        if (null === $this->_direction) {
            if (null === $this->_string) {
                $this->_direction = static::LTR;
            } else {
                $this->_direction = static::getCharDirection(
                    mb_substr($this->_string, 0, 1)
                );
            }
        }

        return $this->_direction;
    }

    /**
     * Get character of a specific character.
     * Please, see the self::LTR and self::RTL constants.
     *
     * @param   string  $char    Character.
     * @return  int
     */
    public static function getCharDirection($char)
    {
        $c = static::toCode($char);

        if (!(0x5be <= $c && 0x10b7f >= $c)) {
            return static::LTR;
        }

        if (0x85e >= $c) {
            if (0x5be === $c ||
                0x5c0 === $c ||
                0x5c3 === $c ||
                0x5c6 === $c ||
                (0x5d0 <= $c && 0x5ea >= $c) ||
                (0x5f0 <= $c && 0x5f4 >= $c) ||
                0x608 === $c ||
                0x60b === $c ||
                0x60d === $c ||
                0x61b === $c ||
                (0x61e <= $c && 0x64a >= $c) ||
                (0x66d <= $c && 0x66f >= $c) ||
                (0x671 <= $c && 0x6d5 >= $c) ||
                (0x6e5 <= $c && 0x6e6 >= $c) ||
                (0x6ee <= $c && 0x6ef >= $c) ||
                (0x6fa <= $c && 0x70d >= $c) ||
                0x710 === $c ||
                (0x712 <= $c && 0x72f >= $c) ||
                (0x74d <= $c && 0x7a5 >= $c) ||
                0x7b1 === $c ||
                (0x7c0 <= $c && 0x7ea >= $c) ||
                (0x7f4 <= $c && 0x7f5 >= $c) ||
                0x7fa === $c ||
                (0x800 <= $c && 0x815 >= $c) ||
                0x81a === $c ||
                0x824 === $c ||
                0x828 === $c ||
                (0x830 <= $c && 0x83e >= $c) ||
                (0x840 <= $c && 0x858 >= $c) ||
                0x85e === $c) {
                return static::RTL;
            }
        } elseif (0x200f === $c) {
            return static::RTL;
        } elseif (0xfb1d <= $c) {
            if (0xfb1d === $c ||
                (0xfb1f <= $c && 0xfb28 >= $c) ||
                (0xfb2a <= $c && 0xfb36 >= $c) ||
                (0xfb38 <= $c && 0xfb3c >= $c) ||
                0xfb3e === $c ||
                (0xfb40 <= $c && 0xfb41 >= $c) ||
                (0xfb43 <= $c && 0xfb44 >= $c) ||
                (0xfb46 <= $c && 0xfbc1 >= $c) ||
                (0xfbd3 <= $c && 0xfd3d >= $c) ||
                (0xfd50 <= $c && 0xfd8f >= $c) ||
                (0xfd92 <= $c && 0xfdc7 >= $c) ||
                (0xfdf0 <= $c && 0xfdfc >= $c) ||
                (0xfe70 <= $c && 0xfe74 >= $c) ||
                (0xfe76 <= $c && 0xfefc >= $c) ||
                (0x10800 <= $c && 0x10805 >= $c) ||
                0x10808 === $c ||
                (0x1080a <= $c && 0x10835 >= $c) ||
                (0x10837 <= $c && 0x10838 >= $c) ||
                0x1083c === $c ||
                (0x1083f <= $c && 0x10855 >= $c) ||
                (0x10857 <= $c && 0x1085f >= $c) ||
                (0x10900 <= $c && 0x1091b >= $c) ||
                (0x10920 <= $c && 0x10939 >= $c) ||
                0x1093f === $c ||
                0x10a00 === $c ||
                (0x10a10 <= $c && 0x10a13 >= $c) ||
                (0x10a15 <= $c && 0x10a17 >= $c) ||
                (0x10a19 <= $c && 0x10a33 >= $c) ||
                (0x10a40 <= $c && 0x10a47 >= $c) ||
                (0x10a50 <= $c && 0x10a58 >= $c) ||
                (0x10a60 <= $c && 0x10a7f >= $c) ||
                (0x10b00 <= $c && 0x10b35 >= $c) ||
                (0x10b40 <= $c && 0x10b55 >= $c) ||
                (0x10b58 <= $c && 0x10b72 >= $c) ||
                (0x10b78 <= $c && 0x10b7f >= $c)) {
                return static::RTL;
            }
        }

        return static::LTR;
    }

    /**
     * Get the number of column positions of a wide-character.
     *
     * This is a PHP implementation of wcwidth() and wcswidth() (defined in IEEE
     * Std 1002.1-2001) for Unicode, by Markus Kuhn. Please, see
     * http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
     *
     * The wcwidth(wc) function shall either return 0 (if wc is a null
     * wide-character code), or return the number of column positions to be
     * occupied by the wide-character code wc, or return -1 (if wc does not
     * correspond to a printable wide-character code).
     *
     * @param   string  $char    Character.
     * @return  int
     */
    public static function getCharWidth($char)
    {
        $char = (string) $char;
        $c    = static::toCode($char);

        // Test for 8-bit control characters.
        if (0x0 === $c) {
            return 0;
        }

        if (0x20 > $c || (0x7f <= $c && $c < 0xa0)) {
            return -1;
        }

        // Non-spacing characters.
        if (0xad !== $c &&
            0    !== preg_match('#^[\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11ff}\x{200b}]#u', $char)) {
            return 0;
        }

        // If we arrive here, $c is not a combining C0/C1 control character.
        return 1 +
            (0x1100 <= $c &&
                (0x115f >= $c ||                        // Hangul Jamo init. consonants
                 0x2329 === $c || 0x232a === $c ||
                     (0x2e80 <= $c && 0xa4cf >= $c &&
                      0x303f !== $c) ||                 // CJK…Yi
                     (0xac00  <= $c && 0xd7a3 >= $c) || // Hangul Syllables
                     (0xf900  <= $c && 0xfaff >= $c) || // CJK Compatibility Ideographs
                     (0xfe10  <= $c && 0xfe19 >= $c) || // Vertical forms
                     (0xfe30  <= $c && 0xfe6f >= $c) || // CJK Compatibility Forms
                     (0xff00  <= $c && 0xff60 >= $c) || // Fullwidth Forms
                     (0xffe0  <= $c && 0xffe6 >= $c) ||
                     (0x20000 <= $c && 0x2fffd >= $c) ||
                     (0x30000 <= $c && 0x3fffd >= $c)));
    }

    /**
     * Check whether the character is printable or not.
     *
     * @param   string  $char    Character.
     * @return  bool
     */
    public static function isCharPrintable($char)
    {
        return 1 <= static::getCharWidth($char);
    }

    /**
     * Get a UTF-8 character from its decimal code representation.
     *
     * @param   int  $code    Code.
     * @return  string
     */
    public static function fromCode($code)
    {
        return mb_convert_encoding(
            '&#x' . dechex($code) . ';',
            'UTF-8',
            'HTML-ENTITIES'
        );
    }

    /**
     * Get a decimal code representation of a specific character.
     *
     * @param   string  $char    Character.
     * @return  int
     */
    public static function toCode($char)
    {
        $char  = (string) $char;
        $code  = ord($char[0]);
        $bytes = 1;

        if (!($code & 0x80)) { // 0xxxxxxx
            return $code;
        }

        if (($code & 0xe0) === 0xc0) { // 110xxxxx
            $bytes = 2;
            $code  = $code & ~0xc0;
        } elseif (($code & 0xf0) == 0xe0) { // 1110xxxx
            $bytes = 3;
            $code  = $code & ~0xe0;
        } elseif (($code & 0xf8) === 0xf0) { // 11110xxx
            $bytes = 4;
            $code  = $code & ~0xf0;
        }

        for ($i = 2; $i <= $bytes; $i++) { // 10xxxxxx
            $code = ($code << 6) + (ord($char[$i - 1]) & ~0x80);
        }

        return $code;
    }

    /**
     * Get a binary representation of a specific character.
     *
     * @param   string  $char    Character.
     * @return  string
     */
    public static function toBinaryCode($char)
    {
        $char = (string) $char;
        $out  = null;

        for ($i = 0, $max = strlen($char); $i < $max; ++$i) {
            $out .= vsprintf('%08b', ord($char[$i]));
        }

        return $out;
    }

    /**
     * Transcode.
     *
     * @param   string  $string    String.
     * @param   string  $from      Original encoding.
     * @param   string  $to        Final encoding.
     * @return  string
     * @throws  \Hoa\Ustring\Exception
     */
    public static function transcode($string, $from, $to = 'UTF-8')
    {
        if (false === static::checkIconv()) {
            throw new Exception(
                '%s needs the iconv extension.',
                2,
                __CLASS__
            );
        }

        return iconv($from, $to, $string);
    }

    /**
     * Check if a string is encoded in UTF-8.
     *
     * @param   string  $string    String.
     * @return  bool
     */
    public static function isUtf8($string)
    {
        return (bool) preg_match('##u', $string);
    }

    /**
     * Copy current object string
     *
     * @return \Hoa\Ustring
     */
    public function copy()
    {
        return clone $this;
    }

    /**
     * Transform the object as a string.
     *
     * @return  string
     */
    public function __toString()
    {
        return $this->_string;
    }
}

/**
 * Flex entity.
 */
Consistency::flexEntity('Hoa\Ustring\Ustring');

if (false === Ustring::checkMbString()) {
    throw new Exception(
        '%s needs the mbstring extension.',
        0,
        __NAMESPACE__ . '\Ustring'
    );
}