1<?php 2/* 3 phpHyphenator 1.4 4 Developed by yellowgreen designbüro 5 PHP version of the JavaScript Hyphenator 10 (Beta) by Matthias Nater 6 7 Licensed under Creative Commons Attribution-Share Alike 2.5 Switzerland 8 http://creativecommons.org/licenses/by-sa/2.5/ch/deed.en 9 10 Associated pages: 11 http://yellowgreen.de/soft-hyphenation-generator/ 12 http://yellowgreen.de/hyphenation-in-web/ 13*/ 14 15 mb_internal_encoding("utf-8"); 16 17// FUNCTIONS 18 19 // Convert patterns 20 function convert_patterns($patterns) { 21 $patterns = mb_split(' ', $patterns); 22 $new_patterns = array(); 23 for($i = 0; $i < count($patterns); $i++) { 24 $value = $patterns[$i]; 25 $new_patterns[preg_replace('/[0-9]/', '', $value)] = $value; 26 } 27 return $new_patterns; 28 } 29 30 // Split string to array 31 function mb_split_chars($string) { 32 $strlen = mb_strlen($string); 33 while($strlen) { 34 $array[] = mb_substr($string, 0, 1, 'utf-8'); 35 $string = mb_substr($string, 1, $strlen, 'utf-8'); 36 $strlen = mb_strlen($string); 37 } 38 return $array; 39 } 40 41// GET DATA 42 // Set defaults 43 if(!isset($GLOBALS["language"])) $GLOBALS["language"] = "en"; 44 if(!isset($GLOBALS["path_to_patterns"])) $GLOBALS["path_to_patterns"] = "patterns/"; 45 if(!isset($GLOBALS["dictionary"])) $GLOBALS["dictionary"] = "dictionary.txt"; 46 if(!isset($GLOBALS["hyphen"])) $GLOBALS["hyphen"] = "­"; 47 if(!isset($GLOBALS["leftmin"])) $GLOBALS["leftmin"] = 2; 48 if(!isset($GLOBALS["rightmin"])) $GLOBALS["rightmin"] = 2; 49 if(!isset($GLOBALS["charmin"])) $GLOBALS["charmin"] = 2; 50 if(!isset($GLOBALS["charmax"])) $GLOBALS["charmax"] = 10; 51 if(!isset($GLOBALS["exclude_tags"])) $GLOBALS["exclude_tags"] = array("code", "pre", "script", "style"); 52 53 // Get patterns 54 if(file_exists($GLOBALS["path_to_patterns"] . $GLOBALS["language"] . ".php")) { include($GLOBALS["path_to_patterns"] . $GLOBALS["language"] . ".php"); $GLOBALS["patterns"] = convert_patterns($patterns); } else $GLOBALS["patterns"] = array(); 55 56 // Get dictionary 57 file_exists($GLOBALS["dictionary"]) ? $GLOBALS["dictionary"] = file($GLOBALS["dictionary"]) : $GLOBALS["dictionary"] = array(); 58 59 foreach($GLOBALS["dictionary"] as $entry) { 60 $entry = trim($entry); 61 $GLOBALS["dictionary words"][str_replace("/", "", strtolower($entry))] = str_replace("/", $GLOBALS["hyphen"], strtolower($entry)); 62 } 63 64// HYPHENATION 65 66 // Word hyphenation 67 function word_hyphenation($word) { 68 if(mb_strlen($word) < $GLOBALS["charmin"]) return $word; 69 if(mb_strpos($word, $GLOBALS["hyphen"]) !== false) return $word; 70 if(isset($GLOBALS["dictionary words"][$word])) return $GLOBALS["dictionary words"][$word]; 71 72 $text_word = '_' . $word . '_'; 73 $word_length = mb_strlen($text_word); 74 $single_character = mb_split_chars($text_word); 75 $text_word = mb_strtolower($text_word); 76 $hyphenated_word = array(); 77 $numb3rs = array('0' => true, '1' => true, '2' => true, '3' => true, '4' => true, '5' => true, '6' => true, '7' => true, '8' => true, '9' => true); 78 79 for($position = 0; $position <= ($word_length - $GLOBALS["charmin"]); $position++) { 80 $maxwins = min(($word_length - $position), $GLOBALS["charmax"]); 81 82 for($win = $GLOBALS["charmin"]; $win <= $maxwins; $win++) { 83 if(isset($GLOBALS["patterns"][mb_substr($text_word, $position, $win)])) { 84 $pattern = $GLOBALS["patterns"][mb_substr($text_word, $position, $win)]; 85 $digits = 1; 86 $pattern_length = mb_strlen($pattern); 87 88 for($i = 0; $i < $pattern_length; $i++) { 89 $char = $pattern[$i]; 90 if(isset($numb3rs[$char])) { 91 $zero = ($i == 0) ? $position - 1 : $position + $i - $digits; 92 if(!isset($hyphenated_word[$zero]) || $hyphenated_word[$zero] != $char) $hyphenated_word[$zero] = $char; 93 $digits++; 94 } 95 } 96 } 97 } 98 } 99 100 $inserted = 0; 101 for($i = $GLOBALS["leftmin"]; $i <= (mb_strlen($word) - $GLOBALS["rightmin"]); $i++) { 102 if(isset($hyphenated_word[$i]) && $hyphenated_word[$i] % 2 != 0) { 103 array_splice($single_character, $i + $inserted + 1, 0, $GLOBALS["hyphen"]); 104 $inserted++; 105 } 106 } 107 108 return implode('', array_slice($single_character, 1, -1)); 109 } 110 111 // Text hyphenation 112 function hyphenation($text) { 113 global $exclude_tags; $word = ""; $tag = ""; $tag_jump = 0; $output = array(); 114 $word_boundaries = "<>\t\n\r\0\x0B !\"§$%&/()=?….,;:-–_„”«»‘’'/\\‹›()[]{}*+´`^|©℗®™℠¹²³"; 115 $text = $text . " "; 116 117 for($i = 0; $i < mb_strlen($text); $i++) { 118 $char = mb_substr($text, $i, 1); 119 if(mb_strpos($word_boundaries, $char) === false && $tag == "") { 120 $word .= $char; 121 } else { 122 if($word != "") { $output[] = word_hyphenation($word); $word = ""; } 123 if($tag != "" || $char == "<") $tag .= $char; 124 if($tag != "" && $char == ">") { 125 $tag_name = (mb_strpos($tag, " ")) ? mb_substr($tag, 1, mb_strpos($tag, " ") - 1) : mb_substr($tag, 1, mb_strpos($tag, ">") - 1); 126 if($tag_jump == 0) { 127 if(in_array(strtolower($tag_name), $exclude_tags)) $tag_jump = 1; else { $output[] = $tag; $tag = ""; } 128 } else { $output[] = $tag; $tag = ""; } 129 } 130 if($tag == "" && $char != "<" && $char != ">") $output[] = $char; 131 } 132 } 133 134 $text = join($output); 135 return substr($text, 0, strlen($text) - 1); 136 } 137?>