1<?php
2/*
3			phpHyphenator 1.4
4			Developed by yellowgreen designbüro
5			PHP version of the JavaScript Hyphenator 10 (Beta) by Matthias Nater
6
7			Licensed under Creative Commons Attribution-Share Alike 2.5 Switzerland
8			http://creativecommons.org/licenses/by-sa/2.5/ch/deed.en
9
10			Associated pages:
11			http://yellowgreen.de/soft-hyphenation-generator/
12			http://yellowgreen.de/hyphenation-in-web/
13*/
14
15			mb_internal_encoding("utf-8");
16
17// FUNCTIONS
18
19			// Convert patterns
20			function convert_patterns($patterns) {
21				$patterns = mb_split(' ', $patterns);
22				$new_patterns = array();
23				for($i = 0; $i < count($patterns); $i++) {
24					$value = $patterns[$i];
25					$new_patterns[preg_replace('/[0-9]/', '', $value)] = $value;
26				}
27				return $new_patterns;
28			}
29
30			// Split string to array
31			function mb_split_chars($string) {
32				$strlen = mb_strlen($string);
33				while($strlen) {
34					$array[] = mb_substr($string, 0, 1, 'utf-8');
35					$string = mb_substr($string, 1, $strlen, 'utf-8');
36					$strlen = mb_strlen($string);
37				}
38				return $array;
39			}
40
41// GET DATA
42			// Set defaults
43			if(!isset($GLOBALS["language"])) $GLOBALS["language"] = "en";
44			if(!isset($GLOBALS["path_to_patterns"])) $GLOBALS["path_to_patterns"] = "patterns/";
45			if(!isset($GLOBALS["dictionary"])) $GLOBALS["dictionary"] = "dictionary.txt";
46			if(!isset($GLOBALS["hyphen"])) $GLOBALS["hyphen"] = "&shy;";
47			if(!isset($GLOBALS["leftmin"])) $GLOBALS["leftmin"] = 2;
48			if(!isset($GLOBALS["rightmin"])) $GLOBALS["rightmin"] = 2;
49			if(!isset($GLOBALS["charmin"])) $GLOBALS["charmin"] = 2;
50			if(!isset($GLOBALS["charmax"])) $GLOBALS["charmax"] = 10;
51			if(!isset($GLOBALS["exclude_tags"])) $GLOBALS["exclude_tags"] = array("code", "pre", "script", "style");
52
53			// Get patterns
54			if(file_exists($GLOBALS["path_to_patterns"] . $GLOBALS["language"] . ".php")) { include($GLOBALS["path_to_patterns"] . $GLOBALS["language"] . ".php"); $GLOBALS["patterns"] = convert_patterns($patterns); } else $GLOBALS["patterns"] = array();
55
56			// Get dictionary
57			file_exists($GLOBALS["dictionary"]) ? $GLOBALS["dictionary"] = file($GLOBALS["dictionary"]) : $GLOBALS["dictionary"] = array();
58
59			foreach($GLOBALS["dictionary"] as $entry) {
60				$entry = trim($entry);
61				$GLOBALS["dictionary words"][str_replace("/", "", strtolower($entry))] = str_replace("/", $GLOBALS["hyphen"], strtolower($entry));
62			}
63
64// HYPHENATION
65
66			// Word hyphenation
67			function word_hyphenation($word) {
68				if(mb_strlen($word) < $GLOBALS["charmin"]) return $word;
69				if(mb_strpos($word, $GLOBALS["hyphen"]) !== false) return $word;
70				if(isset($GLOBALS["dictionary words"][$word])) return $GLOBALS["dictionary words"][$word];
71
72				$text_word = '_' . $word . '_';
73				$word_length = mb_strlen($text_word);
74				$single_character = mb_split_chars($text_word);
75				$text_word = mb_strtolower($text_word);
76				$hyphenated_word = array();
77				$numb3rs = array('0' => true, '1' => true, '2' => true, '3' => true, '4' => true, '5' => true, '6' => true, '7' => true, '8' => true, '9' => true);
78
79				for($position = 0; $position <= ($word_length - $GLOBALS["charmin"]); $position++) {
80					$maxwins = min(($word_length - $position), $GLOBALS["charmax"]);
81
82					for($win = $GLOBALS["charmin"]; $win <= $maxwins; $win++) {
83						if(isset($GLOBALS["patterns"][mb_substr($text_word, $position, $win)])) {
84							$pattern = $GLOBALS["patterns"][mb_substr($text_word, $position, $win)];
85							$digits = 1;
86							$pattern_length = mb_strlen($pattern);
87
88							for($i = 0; $i < $pattern_length; $i++) {
89								$char = $pattern[$i];
90								if(isset($numb3rs[$char])) {
91									$zero = ($i == 0) ? $position - 1 : $position + $i - $digits;
92									if(!isset($hyphenated_word[$zero]) || $hyphenated_word[$zero] != $char) $hyphenated_word[$zero] = $char;
93									$digits++;
94								}
95							}
96						}
97					}
98				}
99
100				$inserted = 0;
101				for($i = $GLOBALS["leftmin"]; $i <= (mb_strlen($word) - $GLOBALS["rightmin"]); $i++) {
102					if(isset($hyphenated_word[$i]) && $hyphenated_word[$i] % 2 != 0) {
103						array_splice($single_character, $i + $inserted + 1, 0, $GLOBALS["hyphen"]);
104						$inserted++;
105					}
106				}
107
108				return implode('', array_slice($single_character, 1, -1));
109			}
110
111			// Text hyphenation
112			function hyphenation($text) {
113				global $exclude_tags; $word = ""; $tag = ""; $tag_jump = 0; $output = array();
114				$word_boundaries = "<>\t\n\r\0\x0B !\"§$%&/()=?….,;:-–_„”«»‘’'/\\‹›()[]{}*+´`^|©℗®™℠¹²³";
115				$text = $text . " ";
116
117				for($i = 0; $i < mb_strlen($text); $i++) {
118					$char = mb_substr($text, $i, 1);
119					if(mb_strpos($word_boundaries, $char) === false && $tag == "") {
120						$word .= $char;
121					} else {
122						if($word != "") { $output[] = word_hyphenation($word); $word = ""; }
123						if($tag != "" || $char == "<") $tag .= $char;
124						if($tag != "" && $char == ">") {
125							$tag_name = (mb_strpos($tag, " ")) ? mb_substr($tag, 1, mb_strpos($tag, " ") - 1) : mb_substr($tag, 1, mb_strpos($tag, ">") - 1);
126							if($tag_jump == 0) {
127								if(in_array(strtolower($tag_name), $exclude_tags)) $tag_jump = 1; else { $output[] = $tag; $tag = ""; }
128							} else { $output[] = $tag; $tag = ""; }
129						}
130						if($tag == "" && $char != "<" && $char != ">") $output[] = $char;
131					}
132				}
133
134				$text = join($output);
135				return substr($text, 0, strlen($text) - 1);
136			}
137?>