1*a1a3b679SAndreas Boehler<?php 2*a1a3b679SAndreas Boehler 3*a1a3b679SAndreas Boehlernamespace Sabre\VObject; 4*a1a3b679SAndreas Boehler 5*a1a3b679SAndreas Boehler/** 6*a1a3b679SAndreas Boehler * Useful utilities for working with various strings. 7*a1a3b679SAndreas Boehler * 8*a1a3b679SAndreas Boehler * @copyright Copyright (C) 2011-2015 fruux GmbH (https://fruux.com/). 9*a1a3b679SAndreas Boehler * @author Evert Pot (http://evertpot.com/) 10*a1a3b679SAndreas Boehler * @license http://sabre.io/license/ Modified BSD License 11*a1a3b679SAndreas Boehler */ 12*a1a3b679SAndreas Boehlerclass StringUtil { 13*a1a3b679SAndreas Boehler 14*a1a3b679SAndreas Boehler /** 15*a1a3b679SAndreas Boehler * Returns true or false depending on if a string is valid UTF-8 16*a1a3b679SAndreas Boehler * 17*a1a3b679SAndreas Boehler * @param string $str 18*a1a3b679SAndreas Boehler * @return bool 19*a1a3b679SAndreas Boehler */ 20*a1a3b679SAndreas Boehler static public function isUTF8($str) { 21*a1a3b679SAndreas Boehler 22*a1a3b679SAndreas Boehler // Control characters 23*a1a3b679SAndreas Boehler if (preg_match('%[\x00-\x08\x0B-\x0C\x0E\x0F]%', $str)) { 24*a1a3b679SAndreas Boehler return false; 25*a1a3b679SAndreas Boehler } 26*a1a3b679SAndreas Boehler 27*a1a3b679SAndreas Boehler return (bool)preg_match('%%u', $str); 28*a1a3b679SAndreas Boehler 29*a1a3b679SAndreas Boehler } 30*a1a3b679SAndreas Boehler 31*a1a3b679SAndreas Boehler /** 32*a1a3b679SAndreas Boehler * This method tries its best to convert the input string to UTF-8. 33*a1a3b679SAndreas Boehler * 34*a1a3b679SAndreas Boehler * Currently only ISO-5991-1 input and UTF-8 input is supported, but this 35*a1a3b679SAndreas Boehler * may be expanded upon if we receive other examples. 36*a1a3b679SAndreas Boehler * 37*a1a3b679SAndreas Boehler * @param string $str 38*a1a3b679SAndreas Boehler * @return string 39*a1a3b679SAndreas Boehler */ 40*a1a3b679SAndreas Boehler static public function convertToUTF8($str) { 41*a1a3b679SAndreas Boehler 42*a1a3b679SAndreas Boehler $encoding = mb_detect_encoding($str , array('UTF-8','ISO-8859-1', 'WINDOWS-1252'), true); 43*a1a3b679SAndreas Boehler 44*a1a3b679SAndreas Boehler switch($encoding) { 45*a1a3b679SAndreas Boehler case 'ISO-8859-1' : 46*a1a3b679SAndreas Boehler $newStr = utf8_encode($str); 47*a1a3b679SAndreas Boehler break; 48*a1a3b679SAndreas Boehler /* Unreachable code. Not sure yet how we can improve this 49*a1a3b679SAndreas Boehler * situation. 50*a1a3b679SAndreas Boehler case 'WINDOWS-1252' : 51*a1a3b679SAndreas Boehler $newStr = iconv('cp1252', 'UTF-8', $str); 52*a1a3b679SAndreas Boehler break; 53*a1a3b679SAndreas Boehler */ 54*a1a3b679SAndreas Boehler default : 55*a1a3b679SAndreas Boehler $newStr = $str; 56*a1a3b679SAndreas Boehler 57*a1a3b679SAndreas Boehler } 58*a1a3b679SAndreas Boehler 59*a1a3b679SAndreas Boehler // Removing any control characters 60*a1a3b679SAndreas Boehler return (preg_replace('%(?:[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F])%', '', $newStr)); 61*a1a3b679SAndreas Boehler 62*a1a3b679SAndreas Boehler } 63*a1a3b679SAndreas Boehler 64*a1a3b679SAndreas Boehler} 65*a1a3b679SAndreas Boehler 66