1<?php 2 3/** 4 * Import Utils 5 * 6 * @license GPL 3 (http://www.gnu.org/licenses/gpl.html) 7 * @author Thibault Dory (thibault.dory@gmail.com) 8 * @version 1.0 9 */ 10 11 12function convert_footnote($matches) { 13 14 $content = $matches[1]; 15 #Remove new lines 16 $content = preg_replace("/\n/U", " ",$content); 17 #Remove subscripts 18 $content = preg_replace("/<sub>(.*)<\/sub>/U","$1",$content); 19 #Remove superscript 20 $content = preg_replace("/<sup>(.*)<\/sup>/U","$1",$content); 21 22 return "((".$content."))"; 23} 24 25function convert_table($table) { 26 $new_table = ""; 27 $lines = explode("|-", $table); 28 $count = 1; 29 foreach ($lines as $line) { 30 31 #Remove style inside columns 32 $line = preg_replace('/\\|.*style=\\".*\\"/U', "",$line); 33 #Remove style outside table 34 $line = preg_replace("(style=\".*\")","",$line); 35 #Remove div tags 36 $line = preg_replace("/<div.*>(.*)<\/div>/U","$1",$line); 37 #Remove double new lines into table line 38 $line = preg_replace("/\|(.*)\n\n(.*)\n/","|$1 $2\n",$line); 39 #Remove lists inside table lines 40 $line = preg_replace("/\n \* /U"," ",$line); 41 $line = preg_replace("/ \* /U"," ",$line); 42 #Check if there is a header, if not use the first line as header 43 if (! strpos($line,'!') && $count == 1) { 44 $line = preg_replace("/\|/","!",$line)."\n\n"; 45 } 46 47 #Remove buggy div tags 48 #$line = preg_replace("/\|*\n>(.*)<\/div>*\n*\|/U","|$1|",$line); 49 50 #Convert ! to ^ in headings 51 $line = trim(preg_replace('/\s+!/', ' ^', $line)); 52 #Add a ^ to end the heading line 53 $line = strrev(preg_replace("/(.*\^)/U", '^ $1', strrev($line), 1)); 54 #Remove buggy new lines inside table lines 55 $line = preg_replace("/\|(.*)\n\n/","|$1\n",$line); 56 #Add NEWLINE separator between table lines 57 $line = preg_replace("/\n\n/","NEWLINE",$line); 58 ##Remove new lines into table lines 59 $line = trim(preg_replace('/\|(.*)\s+/', '|$1', $line)); 60 #Remove double NEWLINE (just below the table header) 61 $line = preg_replace("/NEWLINENEWLINE/","\n",$line); 62 #Remove the other NEWLINE separator and replace it by | followed by a new line 63 $line = preg_replace("/NEWLINE/","|\n",$line); 64 if ($count != 1) { 65 $line = $line." |\n"; 66 } else { 67 $line = $line."\n"; 68 } 69 70 #Replace == Headings == by ** Headings ** 71 $line = preg_replace("/=====(.*)=====/U","**$1**",$line); 72 $line = preg_replace("/====(.*)====/U","**$1**",$line); 73 $line = preg_replace("/===(.*)===/U","**$1**",$line); 74 $line = preg_replace("/==(.*)==/U","**$1**",$line); 75 $new_table = $new_table.$line; 76 $count += 1; 77 } 78 79 return $new_table; 80} 81 82 83function get_images_from_html($myHTMLContent, $myWikiContent){ 84 85 #Detect if an image is present in the header, if it is the case, start taking picture into account one picture farther 86 $imageInHeader = preg_match("/<DIV TYPE=HEADER>.*<IMG(.*)>.*<\/DIV>/s", $myHTMLContent); 87 if ($imageInHeader) { 88 $imageOffset = 1; 89 } else { 90 $imageOffset = 0; 91 } 92 93 94 #Get all the images names, width and height 95 preg_match_all("/<IMG SRC=\"(.*?)\".*ALIGN=(\S{1,}).*WIDTH=(\S{1,}).*HEIGHT=(\S{1,}).*?>/", $myHTMLContent, $image_tags); 96 97 $image_patterns = array(); 98 $image_names = array(); 99 for($i=$imageOffset; $i<count($image_tags[0]); $i++) { 100 if ($image_tags[2][$i] == "LEFT") { 101 $left_align = " "; 102 $righ_align = ""; 103 } elseif ($image_tags[2][$i] == "RIGHT") { 104 $left_align = ""; 105 $righ_align = " "; 106 } elseif ($image_tags[2][$i] == "CENTER") { 107 $left_align = " "; 108 $righ_align = " "; 109 } else { 110 $left_align = ""; 111 $righ_align = ""; 112 } 113 array_push($image_names, "{{".$left_align."image:".$image_tags[1][$i]."?".$image_tags[3][$i]."x".$image_tags[4][$i].$righ_align."}}"); 114 array_push($image_patterns, "/\{\{wiki:\}\}/"); 115 } 116 117 #Replace image tags with correct images 118 $myWikiContent = preg_replace($image_patterns, $image_names, $myWikiContent, 1); 119 120 #Some image align produce html align divs in the wiki text, replace them by hand 121 $myWikiContent = preg_replace("/<div align=\"right\">{{image:(.*)}}<\/div>/U", "{{ image:$1}}", $myWikiContent); 122 $myWikiContent = preg_replace("/<div align=\"left\">{{image:(.*)}}<\/div>/U", "{{image:$1 }}", $myWikiContent); 123 $myWikiContent = preg_replace("/<div align=\"center\">{{image:(.*)}}<\/div>/U", "{{ image:$1 }}", $myWikiContent); 124 125 return $myWikiContent; 126} 127 128 129function get_tagged_from_html($html, $type) { 130 $results = array(); 131 $html = str_replace("\n"," ",$html); 132 if ($type == "underlined") { 133 preg_match_all("/<U>(.*)<\/U>/sU", $html, $tagged_sentences, PREG_OFFSET_CAPTURE); 134 } elseif ($type == "italic") { 135 preg_match_all("/<I>(.*)<\/I>/sU", $html, $tagged_sentences, PREG_OFFSET_CAPTURE); 136 } 137 138 139 foreach ($tagged_sentences[1] as $sentence_info) { 140 141 #Get the offset at which begins the sentence 142 $start = $sentence_info[1]; 143 #remove new lines from sentence 144 $sentence = str_replace("\n"," ",$sentence_info[0]); 145 #Find how many times this same sentence appears before the right one 146 $pattern = preg_replace('/\\//', '\\/', preg_quote($sentence)); 147 preg_match_all("/".$pattern."/", $html, $matches, PREG_OFFSET_CAPTURE); 148 149 #Remove html tags from sentence 150 $clean_sentence = preg_replace("/<.*>/sU","",$sentence); 151 $clean_pattern = preg_replace('/\\//', '\\/', preg_quote($clean_sentence)); 152 preg_match_all("/".$clean_pattern."/", $html, $clean_matches, PREG_OFFSET_CAPTURE); 153 $count = count_from_pos($matches, $clean_matches, $start); 154 $clean_start = $clean_matches[0][$count][1]; 155 if ($type == "italic") { 156 157 #echo "sentence : ".$sentence."\n"; 158 #echo "sentence info : ".print_r($sentence_info, true)."\n"; 159 #echo "matches : ".print_r($matches, true)."\n"; 160 #echo "clean_matches : ".print_r($clean_matches, true)."\n"; 161 #echo "start : ".$start."\n"; 162 #echo "clean_start : ".$clean_start."\n"; 163 #echo "count : ".$count."\n"; 164 #echo "pattern : /".$pattern."/\n"; 165 #echo "###################################################\n"; 166 } 167 168 $results[$clean_start] = array("sentence" => $clean_sentence, "count" => $count); 169 } 170 171 172 #echo "results: ".print_r($results, true)."\n"; 173 #echo "==========================================================================\n"; 174 175 return $results; 176} 177 178 179function count_from_pos($matches, $clean_matches, $pos){ 180 $count = 0; 181 for($count=0; $count < count($clean_matches[0]); $count++) { 182 #echo "clean match : ".print_r($clean_matches[0][$count], true)."\n"; 183 #echo "is equal : ".($match[1] == $pos)." \n"; 184 #echo "++++++++++++++\n"; 185 $clean_match = $clean_matches[0][$count]; 186 foreach ($matches[0] as $match) { 187 #echo "match : ".print_r($match, true)."\n"; 188 #echo "relative pos : ".($pos+strpos($match[0], $clean_match[0]))."\n"; 189 if( ($pos + strpos($match[0], $clean_match[0])) == $clean_match[1]){ 190 return $count; 191 } 192 } 193 } 194 return $count; 195} 196 197 198function replace_from_list($myWikiContent, $list, $type) { 199 if ($type == "underlined") { 200 $tag = "__"; 201 } elseif ($type == "italic") { 202 $tag = "//"; 203 } 204 205 $occurences = array(); 206 207 foreach ($list as $pos => $sentence_info) { 208 209 $sentence = $sentence_info["sentence"]; 210 $count = $sentence_info["count"]; 211 212 if (array_key_exists($sentence, $occurences)) { 213 $occurences[$sentence] = 0; 214 } else { 215 $occurences[$sentence] = 0; 216 } 217 218 preg_match_all("/".preg_quote($sentence)."/", $myWikiContent, $matches, PREG_OFFSET_CAPTURE); 219 #Find position in wiki content 220 #echo "sentence : ".$sentence."\n"; 221 #echo "type : ".$type."\n"; 222 #echo "matches : ".print_r($matches, true)."\n"; 223 $start = $matches[0][$count + $occurences[$sentence]][1]; 224 #echo "start : ".$start."\n"; 225 #echo "##################################################################\n"; 226 $myWikiContent = substr($myWikiContent, 0, $start).str_replace($sentence, $tag.$sentence.$tag, substr($myWikiContent, $start, strlen($sentence))).substr($myWikiContent, $start+strlen($sentence)); 227 } 228 return $myWikiContent; 229} 230 231 232#Compute if there is already a string in list that contains the range $start => $end 233function is_in_list_range($list, $start, $end){ 234 foreach($list as $pos => $sentence){ 235 $current_end = $pos + strlen($sentence); 236 if ($start >= $pos && $start<= $current_end){ 237 return true; 238 } 239 } 240 return false; 241} 242 243 244?> 245