1<?php 2/** 3 * Re-work of the phpHyphenation-library from yellowgreen designbüro 4 * Original: JavaScript Hyphenator 10 (Beta) by Matthias Nater 5 * 6 * @author Markus Birth <markus@birth-online.de> 7 * @license Creative Commons Attribution-Share Alike 2.5 Switzerland 8 * @link http://yellowgreen.de/hyphenation-in-web/ 9 */ 10class phpHyphenation { 11 static protected $pathToPatterns = 'patterns/'; 12 protected $language = 'en'; 13 protected $patterns = array(); 14 protected $dictWords = array(); 15 protected $hyphen = '­'; 16 protected $leftMin = 2; 17 protected $rightMin = 2; 18 protected $charMin = 2; 19 protected $charMax = 10; 20 protected $ignoreTags = array('code', 'pre', 'script', 'style'); 21 22 /** 23 * Sets the directory which contains the patterns 24 * @param string $path Path to the directory containing the patterns 25 * @return bool TRUE on success, FALSE if the specified $path does not exist 26 */ 27 public static function setPatternPath($path) { 28 if (!is_dir($path)) return false; 29 self::$pathToPatterns = $path; 30 return true; 31 } 32 33 /** 34 * Sets the tags to ignore (default: code, pre, script, style) 35 * @param array $tags Array containing tags to ignore 36 * @param bool $append Set to true to append the specified $tags to the ignore-list (default: false) 37 */ 38 public function setIgnoreTags($tags, $append=false) { 39 if (!$append) $this->ignoreTags = array(); 40 $this->ignoreTags = array_merge($this->ignoreTags, $tags); 41 } 42 43 /** 44 * Returns the current ignore-list for tags 45 * @return array Array containing tags to ignore 46 */ 47 public function getIgnoreTags() { 48 return $this->ignoreTags; 49 } 50 51 /** 52 * Sets the hyphen to use. Defaults to soft-hyphen entity. 53 * @param string $hyphen The hypen to use (default: <code>­</code>) 54 * @return bool TRUE on success, FALSE on error. 55 */ 56 public function setHyphen($hyphen='­') { 57 if (strlen($hyphen) == 0) return false; 58 // update hyphenation in user dict 59 foreach ($this->dictWords as $key=>$value) { 60 $this->dictWords[$key] = str_replace($this->hyphen, $hyphen, $value); 61 } 62 $this->hyphen = $hyphen; 63 return true; 64 } 65 66 /** 67 * Sets the hyphenation constraints. 68 * @param int $leftMin Minimum letters to leave on the left side of a word (default: 2) 69 * @param int $rightMin Minimum letters to leave on the right side of a word (default: 2) 70 * @param int $charMin Minimum letters a word must have to be hyphenated (default: 2) 71 * @param int $charMax Maximum letters to search for a hyphenation possibility (default: 10) 72 */ 73 public function setConstraints($leftMin=2, $rightMin=2, $charMin=2, $charMax=10) { 74 $this->leftMin = $leftMin; 75 $this->rightMin = $rightMin; 76 $this->charMin = $charMin; 77 $this->charMax = $charMax; 78 } 79 80 /** 81 * Creates a new phpHyphenation-object. You might have to use phpHyphenation::setPatternPath() for it to find the patterns before you can instantiate the class. 82 * @param string $language Language patterns to use. A file with this name has to exist in self::$pathToPatterns. (default: en) 83 * @param string $hyphen Hyphen to use (default: <code>­</code>) 84 * @return phpHyphenation 85 */ 86 public function __construct($language='en', $hyphen='­') { 87 mb_internal_encoding('utf-8'); 88 $this->hyphen = $hyphen; 89 if (!$this->loadLanguage($language)) return false; 90 } 91 92 /** 93 * Sets a new language for hyphenation. 94 * @param string $language Language patterns to use. A file with this name has to exist in $path. 95 * @param string $path The path to the patterns. Defaults to self::$pathToPatterns. 96 * @return bool TRUE on success, FALSE on error. 97 */ 98 public function loadLanguage($language, $path = false) { 99 if ($path === false) $path = self::$pathToPatterns; 100 if (!file_exists($path . '/' . $language . '.php')) return false; 101 include($path . '/' . $language . '.php'); 102 $this->language = $language; 103 $this->patterns = $this->convertPatterns($patterns); 104 return true; 105 } 106 107 /** 108 * Loads the user-defined hyphenations from a file. (Format: one word per line, hyphenation locations marked by a slash ("/").) 109 * @param string $filename Filename of the file containing the user defined words. 110 * @param bool $append Set to TRUE to append the new words to the list. (default: false) 111 * @return bool TRUE on sucess, FALSE on error. 112 */ 113 public function loadUserDictFromFile($filename, $append=false) { 114 // get userDict 115 if (empty($filename) || !file_exists($filename)) return false; 116 $dictionary = file($filename, FILE_IGNORE_NEW_LINES); 117 return $this->loadUserDictFromArray($dictionary, $append); 118 } 119 120 /** 121 * Adds user-defined hyphenations from an array. (Format: one entry per word, hyphenation locations marked by a slash ("/").) 122 * @param array $userdict Array containing user defined words. 123 * @param bool $append Set to TRUE to append the new words to the list. (default: false) 124 * @return bool TRUE on success, FALSE on error. 125 */ 126 public function loadUserDictFromArray($userdict, $append=false) { 127 if (!is_array($userdict)) return false; 128 if (!$append) $this->dictWords = array(); 129 foreach ($userdict as $entry) { 130 $entry = mb_strtolower(trim($entry)); 131 $this->dictWords[str_replace('/', '', $entry)] = str_replace('/', $this->hyphen, $entry); 132 } 133 return true; 134 } 135 136 /** 137 * Loads the patterns from a pattern file into an associative array. 138 * @param string $patterns Patterns separated by a space character (" ") 139 * @return array Associative array with the patterns 140 */ 141 protected function convertPatterns($patterns) { 142 $patterns = mb_split(' ', $patterns); 143 $new_patterns = array(); 144 foreach ($patterns as $pattern) { 145 $new_patterns[preg_replace('/[0-9]/', '', $pattern)] = $pattern; 146 } 147 return $new_patterns; 148 } 149 150 /** 151 * Hyphenates a complete text and ignores HTML tags defined in $this->ignoreTags. 152 * @param string $text Text to hyphenate 153 * @return string Text with $this->hyphen added to the hyphenation locations 154 */ 155 public function doHyphenation($text) { 156 $result = array(); 157 $tag = ''; 158 $tagName = ''; 159 $tagJump = 0; 160 $word = ''; 161 $word_boundaries = "<>\t\n\r\0\x0B !\"§$%&/()=?….,;:-–_„”«»‘’'/\\‹›()[]{}*+´`^|©℗®™℠¹²³"; 162 $text .= ' '; 163 164 for ($i=0;$i<mb_strlen($text);$i++) { 165 $char = mb_substr($text, $i, 1); 166 if (mb_strpos($word_boundaries, $char)===false && $tag=='') { 167 $word .= $char; 168 continue; 169 } 170 if ($word != '') { 171 $result[] = $this->wordHyphenation($word); 172 $word = ''; 173 } 174 if ($tag != '' || $char == '<') { 175 $tag .= $char; 176 } 177 if ($tag != '' && $char == '>') { 178#echo 'tag closed: *' . $tag . '#' . PHP_EOL; 179 $tagSep = mb_strpos($tag, ' '); 180 $tagSep2 = mb_strpos($tag, '>'); 181 if ($tagSep === false || $tagSep2 < $tagSep) { 182 $tagSep = $tagSep2; 183 } 184 $tagName = mb_substr($tag, 1, $tagSep-1); 185#echo 'tagName: ' . $tagName . PHP_EOL; 186 if ($tagJump == 0 && in_array(mb_strtolower($tagName), $this->ignoreTags)) { 187 $tagJump = 1; 188#echo 'IGNORING TAG: ' . $tagName . PHP_EOL; 189 } elseif ($tagJump == 0 || mb_strtolower(mb_substr($tag, -mb_strlen($tagName)-3)) == '</'.mb_strtolower($tagName).'>') { 190#echo 'Tag done: *' . $tag . '#' . PHP_EOL; 191 $result[] = $tag; 192 $tag = ''; 193 $tagJump = 0; 194 } 195 } 196 if ($tag == '' && $char != '<' && $char != '>') { 197 $result[] = $char; 198 } 199 } 200 if ($tag != '') $result[] = $tag; 201 $text = join('', $result); 202 return substr($text, 0, -1); 203 } 204 205 /** 206 * Hyphenates a single word, i.e. inserts $this->hyphen at locations for hyphenation. 207 * @param string $word Single word to hyphenate 208 * @return string Hyphenated version of the word 209 */ 210 public function wordHyphenation($word) { 211 if(mb_strlen($word) < $this->charMin) return $word; 212 if(mb_strpos($word, $this->hyphen) !== false) return $word; 213 if(isset($this->dictWords[mb_strtolower($word)])) return $this->dictWords[mb_strtolower($word)]; 214 215 $text_word = '_' . $word . '_'; 216 $word_length = mb_strlen($text_word); 217 $single_character = preg_split('//u', $text_word, -1, PREG_SPLIT_NO_EMPTY); 218 $text_word = mb_strtolower($text_word); 219 $hyphenated_word = array(); 220 $numb3rs = array('0' => true, '1' => true, '2' => true, '3' => true, '4' => true, '5' => true, '6' => true, '7' => true, '8' => true, '9' => true); 221 222 for ($position=0; $position<=($word_length-$this->charMin); $position++) { 223 $maxwins = min(($word_length-$position), $this->charMax); 224 225 for ($win=$this->charMin; $win<=$maxwins; $win++) { 226 if (isset($this->patterns[mb_substr($text_word, $position, $win)])) { 227 $pattern = $this->patterns[mb_substr($text_word, $position, $win)]; 228 $digits = 1; 229 $pattern_length = mb_strlen($pattern); 230 231 for ($i=0; $i<$pattern_length; $i++) { 232 $char = $pattern[$i]; 233 if (isset($numb3rs[$char])) { 234 $zero = ($i==0)?$position-1:$position+$i-$digits; 235 if (!isset($hyphenated_word[$zero]) || $hyphenated_word[$zero]!=$char) $hyphenated_word[$zero] = $char; 236 $digits++; 237 } 238 } 239 } 240 } 241 } 242 243 $inserted = 0; 244 for ($i=$this->leftMin; $i<=(mb_strlen($word)-$this->rightMin); $i++) { 245 if (isset($hyphenated_word[$i]) && $hyphenated_word[$i]%2!=0) { 246 array_splice($single_character, $i+$inserted+1, 0, $this->hyphen); 247 $inserted++; 248 } 249 } 250 251 return implode('', array_slice($single_character, 1, -1)); 252 } 253 254 255} 256?>