* @license Creative Commons Attribution-Share Alike 2.5 Switzerland * @link http://yellowgreen.de/hyphenation-in-web/ */ class phpHyphenation { static protected $pathToPatterns = 'patterns/'; protected $language = 'en'; protected $patterns = array(); protected $dictWords = array(); protected $hyphen = '­'; protected $leftMin = 2; protected $rightMin = 2; protected $charMin = 2; protected $charMax = 10; protected $ignoreTags = array('code', 'pre', 'script', 'style'); /** * Sets the directory which contains the patterns * @param string $path Path to the directory containing the patterns * @return bool TRUE on success, FALSE if the specified $path does not exist */ public static function setPatternPath($path) { if (!is_dir($path)) return false; self::$pathToPatterns = $path; return true; } /** * Sets the tags to ignore (default: code, pre, script, style) * @param array $tags Array containing tags to ignore * @param bool $append Set to true to append the specified $tags to the ignore-list (default: false) */ public function setIgnoreTags($tags, $append=false) { if (!$append) $this->ignoreTags = array(); $this->ignoreTags = array_merge($this->ignoreTags, $tags); } /** * Returns the current ignore-list for tags * @return array Array containing tags to ignore */ public function getIgnoreTags() { return $this->ignoreTags; } /** * Sets the hyphen to use. Defaults to soft-hyphen entity. * @param string $hyphen The hypen to use (default: ­) * @return bool TRUE on success, FALSE on error. */ public function setHyphen($hyphen='­') { if (strlen($hyphen) == 0) return false; // update hyphenation in user dict foreach ($this->dictWords as $key=>$value) { $this->dictWords[$key] = str_replace($this->hyphen, $hyphen, $value); } $this->hyphen = $hyphen; return true; } /** * Sets the hyphenation constraints. * @param int $leftMin Minimum letters to leave on the left side of a word (default: 2) * @param int $rightMin Minimum letters to leave on the right side of a word (default: 2) * @param int $charMin Minimum letters a word must have to be hyphenated (default: 2) * @param int $charMax Maximum letters to search for a hyphenation possibility (default: 10) */ public function setConstraints($leftMin=2, $rightMin=2, $charMin=2, $charMax=10) { $this->leftMin = $leftMin; $this->rightMin = $rightMin; $this->charMin = $charMin; $this->charMax = $charMax; } /** * Creates a new phpHyphenation-object. You might have to use phpHyphenation::setPatternPath() for it to find the patterns before you can instantiate the class. * @param string $language Language patterns to use. A file with this name has to exist in self::$pathToPatterns. (default: en) * @param string $hyphen Hyphen to use (default: ­) * @return phpHyphenation */ public function __construct($language='en', $hyphen='­') { mb_internal_encoding('utf-8'); $this->hyphen = $hyphen; if (!$this->loadLanguage($language)) return false; } /** * Sets a new language for hyphenation. * @param string $language Language patterns to use. A file with this name has to exist in $path. * @param string $path The path to the patterns. Defaults to self::$pathToPatterns. * @return bool TRUE on success, FALSE on error. */ public function loadLanguage($language, $path = false) { if ($path === false) $path = self::$pathToPatterns; if (!file_exists($path . '/' . $language . '.php')) return false; include($path . '/' . $language . '.php'); $this->language = $language; $this->patterns = $this->convertPatterns($patterns); return true; } /** * Loads the user-defined hyphenations from a file. (Format: one word per line, hyphenation locations marked by a slash ("/").) * @param string $filename Filename of the file containing the user defined words. * @param bool $append Set to TRUE to append the new words to the list. (default: false) * @return bool TRUE on sucess, FALSE on error. */ public function loadUserDictFromFile($filename, $append=false) { // get userDict if (empty($filename) || !file_exists($filename)) return false; $dictionary = file($filename, FILE_IGNORE_NEW_LINES); return $this->loadUserDictFromArray($dictionary, $append); } /** * Adds user-defined hyphenations from an array. (Format: one entry per word, hyphenation locations marked by a slash ("/").) * @param array $userdict Array containing user defined words. * @param bool $append Set to TRUE to append the new words to the list. (default: false) * @return bool TRUE on success, FALSE on error. */ public function loadUserDictFromArray($userdict, $append=false) { if (!is_array($userdict)) return false; if (!$append) $this->dictWords = array(); foreach ($userdict as $entry) { $entry = mb_strtolower(trim($entry)); $this->dictWords[str_replace('/', '', $entry)] = str_replace('/', $this->hyphen, $entry); } return true; } /** * Loads the patterns from a pattern file into an associative array. * @param string $patterns Patterns separated by a space character (" ") * @return array Associative array with the patterns */ protected function convertPatterns($patterns) { $patterns = mb_split(' ', $patterns); $new_patterns = array(); foreach ($patterns as $pattern) { $new_patterns[preg_replace('/[0-9]/', '', $pattern)] = $pattern; } return $new_patterns; } /** * Hyphenates a complete text and ignores HTML tags defined in $this->ignoreTags. * @param string $text Text to hyphenate * @return string Text with $this->hyphen added to the hyphenation locations */ public function doHyphenation($text) { $result = array(); $tag = ''; $tagName = ''; $tagJump = 0; $word = ''; $word_boundaries = "<>\t\n\r\0\x0B !\"§$%&/()=?….,;:-–_„”«»‘’'/\\‹›()[]{}*+´`^|©℗®™℠¹²³"; $text .= ' '; for ($i=0;$iwordHyphenation($word); $word = ''; } if ($tag != '' || $char == '<') { $tag .= $char; } if ($tag != '' && $char == '>') { #echo 'tag closed: *' . $tag . '#' . PHP_EOL; $tagSep = mb_strpos($tag, ' '); $tagSep2 = mb_strpos($tag, '>'); if ($tagSep === false || $tagSep2 < $tagSep) { $tagSep = $tagSep2; } $tagName = mb_substr($tag, 1, $tagSep-1); #echo 'tagName: ' . $tagName . PHP_EOL; if ($tagJump == 0 && in_array(mb_strtolower($tagName), $this->ignoreTags)) { $tagJump = 1; #echo 'IGNORING TAG: ' . $tagName . PHP_EOL; } elseif ($tagJump == 0 || mb_strtolower(mb_substr($tag, -mb_strlen($tagName)-3)) == '') { #echo 'Tag done: *' . $tag . '#' . PHP_EOL; $result[] = $tag; $tag = ''; $tagJump = 0; } } if ($tag == '' && $char != '<' && $char != '>') { $result[] = $char; } } if ($tag != '') $result[] = $tag; $text = join('', $result); return substr($text, 0, -1); } /** * Hyphenates a single word, i.e. inserts $this->hyphen at locations for hyphenation. * @param string $word Single word to hyphenate * @return string Hyphenated version of the word */ public function wordHyphenation($word) { if(mb_strlen($word) < $this->charMin) return $word; if(mb_strpos($word, $this->hyphen) !== false) return $word; if(isset($this->dictWords[mb_strtolower($word)])) return $this->dictWords[mb_strtolower($word)]; $text_word = '_' . $word . '_'; $word_length = mb_strlen($text_word); $single_character = preg_split('//u', $text_word, -1, PREG_SPLIT_NO_EMPTY); $text_word = mb_strtolower($text_word); $hyphenated_word = array(); $numb3rs = array('0' => true, '1' => true, '2' => true, '3' => true, '4' => true, '5' => true, '6' => true, '7' => true, '8' => true, '9' => true); for ($position=0; $position<=($word_length-$this->charMin); $position++) { $maxwins = min(($word_length-$position), $this->charMax); for ($win=$this->charMin; $win<=$maxwins; $win++) { if (isset($this->patterns[mb_substr($text_word, $position, $win)])) { $pattern = $this->patterns[mb_substr($text_word, $position, $win)]; $digits = 1; $pattern_length = mb_strlen($pattern); for ($i=0; $i<$pattern_length; $i++) { $char = $pattern[$i]; if (isset($numb3rs[$char])) { $zero = ($i==0)?$position-1:$position+$i-$digits; if (!isset($hyphenated_word[$zero]) || $hyphenated_word[$zero]!=$char) $hyphenated_word[$zero] = $char; $digits++; } } } } } $inserted = 0; for ($i=$this->leftMin; $i<=(mb_strlen($word)-$this->rightMin); $i++) { if (isset($hyphenated_word[$i]) && $hyphenated_word[$i]%2!=0) { array_splice($single_character, $i+$inserted+1, 0, $this->hyphen); $inserted++; } } return implode('', array_slice($single_character, 1, -1)); } } ?>