xref: /plugin/combo/ComboStrap/StringUtility.php (revision 4cadd4f8c541149bdda95f080e38a6d4e3a640ca)
137748cd8SNickeau<?php
237748cd8SNickeau
337748cd8SNickeaunamespace ComboStrap;
437748cd8SNickeau
537748cd8SNickeau/**
637748cd8SNickeau * Class StringUtility
737748cd8SNickeau * @package ComboStrap
837748cd8SNickeau * A class with string utility
937748cd8SNickeau */
1037748cd8SNickeauclass StringUtility
1137748cd8SNickeau{
1237748cd8SNickeau
13*4cadd4f8SNickeau    public const SEPARATORS_CHARACTERS = [".", "(", ")", ",", "-"];
14*4cadd4f8SNickeau
1537748cd8SNickeau
1637748cd8SNickeau    /**
1737748cd8SNickeau     * Generate a text with a max length of $length
1837748cd8SNickeau     * and add ... if above
1937748cd8SNickeau     * @param $myString
2037748cd8SNickeau     * @param $length
2137748cd8SNickeau     * @return string
2237748cd8SNickeau     */
231fa8c418SNickeau    static function truncateString($myString, $length): string
2437748cd8SNickeau    {
2537748cd8SNickeau
2637748cd8SNickeau        if (strlen($myString) > $length) {
2737748cd8SNickeau            $suffix = ' ...';
2837748cd8SNickeau            $myString = substr($myString, 0, ($length - 1) - strlen($suffix)) . $suffix;
2937748cd8SNickeau        }
3037748cd8SNickeau        return $myString;
3137748cd8SNickeau    }
3237748cd8SNickeau
3337748cd8SNickeau    /**
3437748cd8SNickeau     * @param $string
3537748cd8SNickeau     * @return string - the string without any carriage return
3637748cd8SNickeau     * Used to compare string without worrying about carriage return
3737748cd8SNickeau     */
3837748cd8SNickeau    public static function normalized($string)
3937748cd8SNickeau    {
4037748cd8SNickeau        return str_replace("\n", "", $string);
4137748cd8SNickeau    }
4237748cd8SNickeau
4337748cd8SNickeau    /**
4437748cd8SNickeau     * @param $needle
4537748cd8SNickeau     * @param $haystack
4637748cd8SNickeau     * @return bool
4737748cd8SNickeau     */
4837748cd8SNickeau    public static function contain($needle, $haystack)
4937748cd8SNickeau    {
5037748cd8SNickeau        $pos = strpos($haystack, $needle);
5137748cd8SNickeau        if ($pos === FALSE) {
5237748cd8SNickeau            return false;
5337748cd8SNickeau        } else {
5437748cd8SNickeau            return true;
5537748cd8SNickeau        }
5637748cd8SNickeau    }
5737748cd8SNickeau
5837748cd8SNickeau    public static function toString($value)
5937748cd8SNickeau    {
6037748cd8SNickeau        /**
6137748cd8SNickeau         * No transformation if it's a string
6237748cd8SNickeau         * var_export below is not idempotent
6337748cd8SNickeau         * ie \ would become \\
6437748cd8SNickeau         */
6537748cd8SNickeau        if (is_string($value)) {
6637748cd8SNickeau            return $value;
6737748cd8SNickeau        }
6837748cd8SNickeau
69c3437056SNickeau        if (is_array($value)) {
7037748cd8SNickeau            $string = var_export($value, true);
7137748cd8SNickeau
7237748cd8SNickeau            // An array value gets command in var_export
7337748cd8SNickeau            $lastCharacterIndex = strlen($string) - 1;
7437748cd8SNickeau            if ($string[0] === "'" && $string[$lastCharacterIndex] === "'") {
7537748cd8SNickeau                $string = substr($string, 1, strlen($string) - 2);
7637748cd8SNickeau            }
7737748cd8SNickeau            return $string;
78c3437056SNickeau        }
79c3437056SNickeau
80c3437056SNickeau        if (is_object($value)) {
81c3437056SNickeau            if (method_exists($value, "__toString")) {
82c3437056SNickeau                return strval($value);
83c3437056SNickeau            } else {
84c3437056SNickeau                return get_class($value);
85c3437056SNickeau            }
86c3437056SNickeau        }
87c3437056SNickeau
88c3437056SNickeau        if (is_numeric($value)) {
89c3437056SNickeau            return strval($value);
90c3437056SNickeau        }
91c3437056SNickeau
92c3437056SNickeau        if (is_bool($value)) {
93c3437056SNickeau            return var_export($value, true);
94c3437056SNickeau        }
95c3437056SNickeau
96c3437056SNickeau        $string = var_export($value, true);
97c3437056SNickeau        LogUtility::msg("The type of the value ($string) is unknown and could not be properly cast to string", LogUtility::LVL_MSG_WARNING);
98c3437056SNickeau        return $string;
9937748cd8SNickeau
10037748cd8SNickeau    }
10137748cd8SNickeau
10237748cd8SNickeau    /**
10337748cd8SNickeau     * Add an EOL if not present at the end of the string
10437748cd8SNickeau     * @param $doc
10537748cd8SNickeau     */
10637748cd8SNickeau    public static function addEolCharacterIfNotPresent(&$doc)
10737748cd8SNickeau    {
10837748cd8SNickeau        if ($doc[strlen($doc) - 1] != DOKU_LF) {
10937748cd8SNickeau            $doc .= DOKU_LF;
11037748cd8SNickeau        }
11137748cd8SNickeau    }
11237748cd8SNickeau
11337748cd8SNickeau    /**
11437748cd8SNickeau     * Delete the string from the end
11537748cd8SNickeau     * This is used generally to delete the previous opening tag of an header or a blockquote
11637748cd8SNickeau     * @param $doc
11737748cd8SNickeau     * @param $string
11837748cd8SNickeau     */
11937748cd8SNickeau    public static function rtrim(&$doc, $string)
12037748cd8SNickeau    {
12137748cd8SNickeau
12237748cd8SNickeau        /**
12337748cd8SNickeau         * We trim because in the process, we may get extra {@link DOKU_LF} at the end
12437748cd8SNickeau         */
12537748cd8SNickeau        $doc = trim($doc);
12637748cd8SNickeau        $string = trim($string);
12737748cd8SNickeau        $length = strlen($doc) - strlen($string);
12837748cd8SNickeau        if (substr($doc, $length) === $string) {
12937748cd8SNickeau            $doc = substr($doc, 0, $length);
13037748cd8SNickeau        }
13137748cd8SNickeau
13237748cd8SNickeau    }
13337748cd8SNickeau
13437748cd8SNickeau    /**
13537748cd8SNickeau     * Delete the string from the beginning
13637748cd8SNickeau     * This is used to delete a tag for instance
13737748cd8SNickeau     * @param $doc
13837748cd8SNickeau     * @param $string
13937748cd8SNickeau     */
14037748cd8SNickeau    public static function ltrim(&$doc, $string)
14137748cd8SNickeau    {
14237748cd8SNickeau
14337748cd8SNickeau        $doc = trim($doc);
14437748cd8SNickeau        $string = trim($string);
14537748cd8SNickeau        $length = strlen($string);
14637748cd8SNickeau        if (substr($doc, 0, $length) === $string) {
14737748cd8SNickeau            $doc = substr($doc, $length);
14837748cd8SNickeau        }
14937748cd8SNickeau
15037748cd8SNickeau    }
15137748cd8SNickeau
15237748cd8SNickeau    /**
15337748cd8SNickeau     * The word count does not take into account
15437748cd8SNickeau     * words with non-words characters such as < =
15537748cd8SNickeau     * Therefore the node <node> and attribute name=value are not taken in the count
15637748cd8SNickeau     * @param $text
15737748cd8SNickeau     * @return int the number of words
15837748cd8SNickeau     */
15937748cd8SNickeau    public static function getWordCount($text)
16037748cd8SNickeau    {
16137748cd8SNickeau        /**
16237748cd8SNickeau         * Delete the frontmatter
16337748cd8SNickeau         */
16437748cd8SNickeau        $text = preg_replace("/^---(json)?$.*^---$/Ums", "", $text);
16537748cd8SNickeau        /**
16637748cd8SNickeau         * New line for node
16737748cd8SNickeau         */
16837748cd8SNickeau        $text = str_replace("<", "\n<", $text);
16937748cd8SNickeau        $text = str_replace(">", ">\n", $text);
17037748cd8SNickeau        // \s shorthand for whitespace
17137748cd8SNickeau        // | the table and links are separated with a |
17237748cd8SNickeau        // / to take into account expression such as and/or
17337748cd8SNickeau        // /u for unicode support (https://www.php.net/manual/en/reference.pcre.pattern.modifiers.php)
17437748cd8SNickeau        $wordSeparator = '/[\s|\/]/u';
17537748cd8SNickeau        $preg_split = preg_split($wordSeparator, $text);
17637748cd8SNickeau        $wordsWithoutEmpty = array_filter($preg_split, 'self::isWord');
17737748cd8SNickeau        return count($wordsWithoutEmpty);
17837748cd8SNickeau    }
17937748cd8SNickeau
18037748cd8SNickeau    public static function normalize($expected)
18137748cd8SNickeau    {
18237748cd8SNickeau        $expected = preg_replace("/[\s]/", " ", $expected);
18337748cd8SNickeau        $expected = str_replace("  ", " ", $expected);
18437748cd8SNickeau        $expected = str_replace("  ", " ", $expected);
18537748cd8SNickeau        $expected = str_replace("  ", " ", $expected);
18637748cd8SNickeau        $expected = str_replace("  ", " ", $expected);
18737748cd8SNickeau        return trim($expected);
18837748cd8SNickeau
18937748cd8SNickeau    }
19037748cd8SNickeau
19137748cd8SNickeau    /**
19237748cd8SNickeau     * @param $text
19337748cd8SNickeau     * @return bool
19437748cd8SNickeau     */
19537748cd8SNickeau    public static function isWord($text)
19637748cd8SNickeau    {
19737748cd8SNickeau        if (empty($text)) {
19837748cd8SNickeau            return false;
19937748cd8SNickeau        }
20037748cd8SNickeau        /**
20137748cd8SNickeau         * We also allow `-` minus
20237748cd8SNickeau         *
20337748cd8SNickeau         * And because otherwise the words are not counted:
20437748cd8SNickeau         *   * `'` (used to highlight words)
20537748cd8SNickeau         *   * `[]` used in links
20637748cd8SNickeau         *   * `,` used at the end of a sentenct
20737748cd8SNickeau         */
20837748cd8SNickeau        $preg_match = preg_match("/^[\w\-'\]\[,]*$/u", $text);
20937748cd8SNickeau        return $preg_match == 1;
21037748cd8SNickeau    }
21137748cd8SNickeau
21237748cd8SNickeau    public static function match($subject, $pattern)
21337748cd8SNickeau    {
21437748cd8SNickeau        return preg_match("/$pattern/", $subject) === 1;
21537748cd8SNickeau    }
21637748cd8SNickeau
21737748cd8SNickeau    public static function endWiths($string, $suffix)
21837748cd8SNickeau    {
21937748cd8SNickeau        $suffixStartPosition = strlen($string) - strlen($suffix);
22037748cd8SNickeau        return strrpos($string, $suffix) === $suffixStartPosition;
22137748cd8SNickeau    }
22237748cd8SNickeau
22337748cd8SNickeau    public static function explodeAndTrim($string, $delimiter = ",")
22437748cd8SNickeau    {
22537748cd8SNickeau        return array_map('trim', explode($delimiter, $string));
22637748cd8SNickeau    }
22737748cd8SNickeau
22837748cd8SNickeau    public static function lastIndexOf($haystack, $needle)
22937748cd8SNickeau    {
23037748cd8SNickeau        /**
23137748cd8SNickeau         * strRpos
23237748cd8SNickeau         * and not strpos
23337748cd8SNickeau         */
23437748cd8SNickeau        return strrpos($haystack, $needle);
23537748cd8SNickeau    }
23637748cd8SNickeau
23737748cd8SNickeau    public static function startWiths($string, $prefix)
23837748cd8SNickeau    {
23937748cd8SNickeau        return strrpos($string, $prefix) === 0;
24037748cd8SNickeau    }
24137748cd8SNickeau
242*4cadd4f8SNickeau    /**
243*4cadd4f8SNickeau     * @param $string
244*4cadd4f8SNickeau     * @param null $separatorsCharacters - characters that will separate the words
245*4cadd4f8SNickeau     * @return array a words
246*4cadd4f8SNickeau     */
247*4cadd4f8SNickeau    public static function getWords($string, $separatorsCharacters = null): array
248*4cadd4f8SNickeau    {
249*4cadd4f8SNickeau        // Reserved characters to space
250*4cadd4f8SNickeau        if ($separatorsCharacters === null) {
251*4cadd4f8SNickeau            $separatorsCharacters = StringUtility::getAllSeparators();
252*4cadd4f8SNickeau        }
253*4cadd4f8SNickeau        if (!is_array($separatorsCharacters)) {
254*4cadd4f8SNickeau            LogUtility::msg("The separators characters are not an array, default characters used");
255*4cadd4f8SNickeau            $separatorsCharacters = StringUtility::getAllSeparators();
256*4cadd4f8SNickeau        }
257*4cadd4f8SNickeau
258*4cadd4f8SNickeau        $string = str_replace($separatorsCharacters, " ", $string);
259*4cadd4f8SNickeau        // Doubles spaces to space
260*4cadd4f8SNickeau        $string = preg_replace("/\s{2,}/", " ", $string);
261*4cadd4f8SNickeau        // Trim space
262*4cadd4f8SNickeau        $string = trim($string);
263*4cadd4f8SNickeau
264*4cadd4f8SNickeau        return explode(" ", $string);
265*4cadd4f8SNickeau    }
266*4cadd4f8SNickeau
267*4cadd4f8SNickeau    private static function getAllSeparators(): array
268*4cadd4f8SNickeau    {
269*4cadd4f8SNickeau        return array_merge(
270*4cadd4f8SNickeau            Url::RESERVED_WORDS,
271*4cadd4f8SNickeau            LocalPath::RESERVED_WINDOWS_CHARACTERS,
272*4cadd4f8SNickeau            StringUtility::SEPARATORS_CHARACTERS
273*4cadd4f8SNickeau        );
274*4cadd4f8SNickeau    }
275*4cadd4f8SNickeau
27637748cd8SNickeau}
277