137748cd8SNickeau<?php 237748cd8SNickeau 337748cd8SNickeaunamespace ComboStrap; 437748cd8SNickeau 5*04fd306cSNickeauuse ComboStrap\Web\Url; 6*04fd306cSNickeau 737748cd8SNickeau/** 837748cd8SNickeau * Class StringUtility 937748cd8SNickeau * @package ComboStrap 1037748cd8SNickeau * A class with string utility 1137748cd8SNickeau */ 1237748cd8SNickeauclass StringUtility 1337748cd8SNickeau{ 1437748cd8SNickeau 154cadd4f8SNickeau public const SEPARATORS_CHARACTERS = [".", "(", ")", ",", "-"]; 164cadd4f8SNickeau 1737748cd8SNickeau 1837748cd8SNickeau /** 1937748cd8SNickeau * Generate a text with a max length of $length 2037748cd8SNickeau * and add ... if above 2137748cd8SNickeau * @param $myString 2237748cd8SNickeau * @param $length 2337748cd8SNickeau * @return string 2437748cd8SNickeau */ 251fa8c418SNickeau static function truncateString($myString, $length): string 2637748cd8SNickeau { 2737748cd8SNickeau 2837748cd8SNickeau if (strlen($myString) > $length) { 2937748cd8SNickeau $suffix = ' ...'; 3037748cd8SNickeau $myString = substr($myString, 0, ($length - 1) - strlen($suffix)) . $suffix; 3137748cd8SNickeau } 3237748cd8SNickeau return $myString; 3337748cd8SNickeau } 3437748cd8SNickeau 3537748cd8SNickeau /** 3637748cd8SNickeau * @param $string 3737748cd8SNickeau * @return string - the string without any carriage return 3837748cd8SNickeau * Used to compare string without worrying about carriage return 3937748cd8SNickeau */ 4037748cd8SNickeau public static function normalized($string) 4137748cd8SNickeau { 4237748cd8SNickeau return str_replace("\n", "", $string); 4337748cd8SNickeau } 4437748cd8SNickeau 4537748cd8SNickeau /** 4637748cd8SNickeau * @param $needle 4737748cd8SNickeau * @param $haystack 4837748cd8SNickeau * @return bool 4937748cd8SNickeau */ 5037748cd8SNickeau public static function contain($needle, $haystack) 5137748cd8SNickeau { 5237748cd8SNickeau $pos = strpos($haystack, $needle); 5337748cd8SNickeau if ($pos === FALSE) { 5437748cd8SNickeau return false; 5537748cd8SNickeau } else { 5637748cd8SNickeau return true; 5737748cd8SNickeau } 5837748cd8SNickeau } 5937748cd8SNickeau 6037748cd8SNickeau public static function toString($value) 6137748cd8SNickeau { 6237748cd8SNickeau /** 6337748cd8SNickeau * No transformation if it's a string 6437748cd8SNickeau * var_export below is not idempotent 6537748cd8SNickeau * ie \ would become \\ 6637748cd8SNickeau */ 6737748cd8SNickeau if (is_string($value)) { 6837748cd8SNickeau return $value; 6937748cd8SNickeau } 7037748cd8SNickeau 71c3437056SNickeau if (is_array($value)) { 7237748cd8SNickeau $string = var_export($value, true); 7337748cd8SNickeau 7437748cd8SNickeau // An array value gets command in var_export 7537748cd8SNickeau $lastCharacterIndex = strlen($string) - 1; 7637748cd8SNickeau if ($string[0] === "'" && $string[$lastCharacterIndex] === "'") { 7737748cd8SNickeau $string = substr($string, 1, strlen($string) - 2); 7837748cd8SNickeau } 7937748cd8SNickeau return $string; 80c3437056SNickeau } 81c3437056SNickeau 82c3437056SNickeau if (is_object($value)) { 83c3437056SNickeau if (method_exists($value, "__toString")) { 84c3437056SNickeau return strval($value); 85c3437056SNickeau } else { 86c3437056SNickeau return get_class($value); 87c3437056SNickeau } 88c3437056SNickeau } 89c3437056SNickeau 90c3437056SNickeau if (is_numeric($value)) { 91c3437056SNickeau return strval($value); 92c3437056SNickeau } 93c3437056SNickeau 94c3437056SNickeau if (is_bool($value)) { 95c3437056SNickeau return var_export($value, true); 96c3437056SNickeau } 97c3437056SNickeau 98c3437056SNickeau $string = var_export($value, true); 99c3437056SNickeau LogUtility::msg("The type of the value ($string) is unknown and could not be properly cast to string", LogUtility::LVL_MSG_WARNING); 100c3437056SNickeau return $string; 10137748cd8SNickeau 10237748cd8SNickeau } 10337748cd8SNickeau 10437748cd8SNickeau /** 10537748cd8SNickeau * Add an EOL if not present at the end of the string 10637748cd8SNickeau * @param $doc 10737748cd8SNickeau */ 10837748cd8SNickeau public static function addEolCharacterIfNotPresent(&$doc) 10937748cd8SNickeau { 11037748cd8SNickeau if ($doc[strlen($doc) - 1] != DOKU_LF) { 11137748cd8SNickeau $doc .= DOKU_LF; 11237748cd8SNickeau } 11337748cd8SNickeau } 11437748cd8SNickeau 11537748cd8SNickeau /** 11637748cd8SNickeau * Delete the string from the end 11737748cd8SNickeau * This is used generally to delete the previous opening tag of an header or a blockquote 11837748cd8SNickeau * @param $doc 11937748cd8SNickeau * @param $string 12037748cd8SNickeau */ 12137748cd8SNickeau public static function rtrim(&$doc, $string) 12237748cd8SNickeau { 12337748cd8SNickeau 12437748cd8SNickeau /** 12537748cd8SNickeau * We trim because in the process, we may get extra {@link DOKU_LF} at the end 12637748cd8SNickeau */ 12737748cd8SNickeau $doc = trim($doc); 12837748cd8SNickeau $string = trim($string); 12937748cd8SNickeau $length = strlen($doc) - strlen($string); 13037748cd8SNickeau if (substr($doc, $length) === $string) { 13137748cd8SNickeau $doc = substr($doc, 0, $length); 13237748cd8SNickeau } 13337748cd8SNickeau 13437748cd8SNickeau } 13537748cd8SNickeau 13637748cd8SNickeau /** 13737748cd8SNickeau * Delete the string from the beginning 13837748cd8SNickeau * This is used to delete a tag for instance 13937748cd8SNickeau * @param $doc 14037748cd8SNickeau * @param $string 14137748cd8SNickeau */ 14237748cd8SNickeau public static function ltrim(&$doc, $string) 14337748cd8SNickeau { 14437748cd8SNickeau 14537748cd8SNickeau $doc = trim($doc); 14637748cd8SNickeau $string = trim($string); 14737748cd8SNickeau $length = strlen($string); 14837748cd8SNickeau if (substr($doc, 0, $length) === $string) { 14937748cd8SNickeau $doc = substr($doc, $length); 15037748cd8SNickeau } 15137748cd8SNickeau 15237748cd8SNickeau } 15337748cd8SNickeau 15437748cd8SNickeau /** 15537748cd8SNickeau * The word count does not take into account 15637748cd8SNickeau * words with non-words characters such as < = 15737748cd8SNickeau * Therefore the node <node> and attribute name=value are not taken in the count 15837748cd8SNickeau * @param $text 15937748cd8SNickeau * @return int the number of words 16037748cd8SNickeau */ 16137748cd8SNickeau public static function getWordCount($text) 16237748cd8SNickeau { 16337748cd8SNickeau /** 16437748cd8SNickeau * Delete the frontmatter 16537748cd8SNickeau */ 16637748cd8SNickeau $text = preg_replace("/^---(json)?$.*^---$/Ums", "", $text); 16737748cd8SNickeau /** 16837748cd8SNickeau * New line for node 16937748cd8SNickeau */ 17037748cd8SNickeau $text = str_replace("<", "\n<", $text); 17137748cd8SNickeau $text = str_replace(">", ">\n", $text); 17237748cd8SNickeau // \s shorthand for whitespace 17337748cd8SNickeau // | the table and links are separated with a | 17437748cd8SNickeau // / to take into account expression such as and/or 17537748cd8SNickeau // /u for unicode support (https://www.php.net/manual/en/reference.pcre.pattern.modifiers.php) 17637748cd8SNickeau $wordSeparator = '/[\s|\/]/u'; 17737748cd8SNickeau $preg_split = preg_split($wordSeparator, $text); 17837748cd8SNickeau $wordsWithoutEmpty = array_filter($preg_split, 'self::isWord'); 17937748cd8SNickeau return count($wordsWithoutEmpty); 18037748cd8SNickeau } 18137748cd8SNickeau 18237748cd8SNickeau public static function normalize($expected) 18337748cd8SNickeau { 18437748cd8SNickeau $expected = preg_replace("/[\s]/", " ", $expected); 18537748cd8SNickeau $expected = str_replace(" ", " ", $expected); 18637748cd8SNickeau $expected = str_replace(" ", " ", $expected); 18737748cd8SNickeau $expected = str_replace(" ", " ", $expected); 18837748cd8SNickeau $expected = str_replace(" ", " ", $expected); 18937748cd8SNickeau return trim($expected); 19037748cd8SNickeau 19137748cd8SNickeau } 19237748cd8SNickeau 19337748cd8SNickeau /** 19437748cd8SNickeau * @param $text 19537748cd8SNickeau * @return bool 19637748cd8SNickeau */ 19737748cd8SNickeau public static function isWord($text) 19837748cd8SNickeau { 19937748cd8SNickeau if (empty($text)) { 20037748cd8SNickeau return false; 20137748cd8SNickeau } 20237748cd8SNickeau /** 20337748cd8SNickeau * We also allow `-` minus 20437748cd8SNickeau * 20537748cd8SNickeau * And because otherwise the words are not counted: 20637748cd8SNickeau * * `'` (used to highlight words) 20737748cd8SNickeau * * `[]` used in links 20837748cd8SNickeau * * `,` used at the end of a sentenct 20937748cd8SNickeau */ 21037748cd8SNickeau $preg_match = preg_match("/^[\w\-'\]\[,]*$/u", $text); 21137748cd8SNickeau return $preg_match == 1; 21237748cd8SNickeau } 21337748cd8SNickeau 21437748cd8SNickeau public static function match($subject, $pattern) 21537748cd8SNickeau { 21637748cd8SNickeau return preg_match("/$pattern/", $subject) === 1; 21737748cd8SNickeau } 21837748cd8SNickeau 21937748cd8SNickeau public static function endWiths($string, $suffix) 22037748cd8SNickeau { 22137748cd8SNickeau $suffixStartPosition = strlen($string) - strlen($suffix); 22237748cd8SNickeau return strrpos($string, $suffix) === $suffixStartPosition; 22337748cd8SNickeau } 22437748cd8SNickeau 22537748cd8SNickeau public static function explodeAndTrim($string, $delimiter = ",") 22637748cd8SNickeau { 22737748cd8SNickeau return array_map('trim', explode($delimiter, $string)); 22837748cd8SNickeau } 22937748cd8SNickeau 23037748cd8SNickeau public static function lastIndexOf($haystack, $needle) 23137748cd8SNickeau { 23237748cd8SNickeau /** 23337748cd8SNickeau * strRpos 23437748cd8SNickeau * and not strpos 23537748cd8SNickeau */ 23637748cd8SNickeau return strrpos($haystack, $needle); 23737748cd8SNickeau } 23837748cd8SNickeau 23937748cd8SNickeau public static function startWiths($string, $prefix) 24037748cd8SNickeau { 24137748cd8SNickeau return strrpos($string, $prefix) === 0; 24237748cd8SNickeau } 24337748cd8SNickeau 2444cadd4f8SNickeau /** 2454cadd4f8SNickeau * @param $string 2464cadd4f8SNickeau * @param null $separatorsCharacters - characters that will separate the words 2474cadd4f8SNickeau * @return array a words 2484cadd4f8SNickeau */ 2494cadd4f8SNickeau public static function getWords($string, $separatorsCharacters = null): array 2504cadd4f8SNickeau { 2514cadd4f8SNickeau // Reserved characters to space 2524cadd4f8SNickeau if ($separatorsCharacters === null) { 2534cadd4f8SNickeau $separatorsCharacters = StringUtility::getAllSeparators(); 2544cadd4f8SNickeau } 2554cadd4f8SNickeau if (!is_array($separatorsCharacters)) { 2564cadd4f8SNickeau LogUtility::msg("The separators characters are not an array, default characters used"); 2574cadd4f8SNickeau $separatorsCharacters = StringUtility::getAllSeparators(); 2584cadd4f8SNickeau } 2594cadd4f8SNickeau 2604cadd4f8SNickeau $string = str_replace($separatorsCharacters, " ", $string); 2614cadd4f8SNickeau // Doubles spaces to space 2624cadd4f8SNickeau $string = preg_replace("/\s{2,}/", " ", $string); 2634cadd4f8SNickeau // Trim space 2644cadd4f8SNickeau $string = trim($string); 2654cadd4f8SNickeau 2664cadd4f8SNickeau return explode(" ", $string); 2674cadd4f8SNickeau } 2684cadd4f8SNickeau 2694cadd4f8SNickeau private static function getAllSeparators(): array 2704cadd4f8SNickeau { 2714cadd4f8SNickeau return array_merge( 2724cadd4f8SNickeau Url::RESERVED_WORDS, 2734cadd4f8SNickeau LocalPath::RESERVED_WINDOWS_CHARACTERS, 2744cadd4f8SNickeau StringUtility::SEPARATORS_CHARACTERS 2754cadd4f8SNickeau ); 2764cadd4f8SNickeau } 2774cadd4f8SNickeau 27837748cd8SNickeau} 279