1<?php
2
3/**
4 * Import Utils
5 *
6 * @license     GPL 3 (http://www.gnu.org/licenses/gpl.html)
7 * @author      Thibault Dory (thibault.dory@gmail.com)
8 * @version     1.0
9 */
10
11
12function convert_footnote($matches) {
13
14   $content = $matches[1];
15   #Remove new lines
16   $content = preg_replace("/\n/U", " ",$content);
17   #Remove subscripts
18   $content = preg_replace("/<sub>(.*)<\/sub>/U","$1",$content);
19   #Remove superscript
20   $content = preg_replace("/<sup>(.*)<\/sup>/U","$1",$content);
21
22   return "((".$content."))";
23}
24
25function convert_table($table) {
26   $new_table = "";
27   $lines = explode("|-", $table);
28   $count = 1;
29   foreach ($lines as $line) {
30
31           #Remove style inside columns
32           $line = preg_replace('/\\|.*style=\\".*\\"/U', "",$line);
33           #Remove style outside table
34           $line = preg_replace("(style=\".*\")","",$line);
35           #Remove div tags
36           $line = preg_replace("/<div.*>(.*)<\/div>/U","$1",$line);
37           #Remove double new lines into table line
38           $line = preg_replace("/\|(.*)\n\n(.*)\n/","|$1 $2\n",$line);
39           #Remove lists inside table lines
40           $line = preg_replace("/\n  \* /U"," ",$line);
41           $line = preg_replace("/ \* /U"," ",$line);
42           #Check if there is a header, if not use the first line as header
43           if (! strpos($line,'!') && $count == 1) {
44               $line = preg_replace("/\|/","!",$line)."\n\n";
45           }
46
47           #Remove buggy div tags
48           #$line = preg_replace("/\|*\n>(.*)<\/div>*\n*\|/U","|$1|",$line);
49
50           #Convert ! to ^ in headings
51           $line = trim(preg_replace('/\s+!/', ' ^', $line));
52           #Add a ^ to end the heading line
53           $line = strrev(preg_replace("/(.*\^)/U", '^ $1', strrev($line), 1));
54           #Remove buggy new lines inside table lines
55           $line = preg_replace("/\|(.*)\n\n/","|$1\n",$line);
56           #Add NEWLINE separator between table lines
57           $line = preg_replace("/\n\n/","NEWLINE",$line);
58           ##Remove new lines into table lines
59           $line = trim(preg_replace('/\|(.*)\s+/', '|$1', $line));
60           #Remove double NEWLINE (just below the table header)
61           $line = preg_replace("/NEWLINENEWLINE/","\n",$line);
62           #Remove the other NEWLINE separator and replace it by | followed by a new line
63           $line = preg_replace("/NEWLINE/","|\n",$line);
64           if ($count != 1) {
65              $line = $line." |\n";
66           } else {
67              $line = $line."\n";
68           }
69
70           #Replace == Headings == by ** Headings **
71           $line = preg_replace("/=====(.*)=====/U","**$1**",$line);
72           $line = preg_replace("/====(.*)====/U","**$1**",$line);
73           $line = preg_replace("/===(.*)===/U","**$1**",$line);
74           $line = preg_replace("/==(.*)==/U","**$1**",$line);
75           $new_table = $new_table.$line;
76           $count += 1;
77   }
78
79   return $new_table;
80}
81
82
83function get_images_from_html($myHTMLContent, $myWikiContent){
84
85      #Detect if an image is present in the header, if it is the case, start taking picture into account one picture farther
86      $imageInHeader = preg_match("/<DIV TYPE=HEADER>.*<IMG(.*)>.*<\/DIV>/s",  $myHTMLContent);
87      if ($imageInHeader) {
88        $imageOffset = 1;
89      } else {
90        $imageOffset = 0;
91      }
92
93
94      #Get all the images names, width and height
95      preg_match_all("/<IMG SRC=\"(.*?)\".*ALIGN=(\S{1,}).*WIDTH=(\S{1,}).*HEIGHT=(\S{1,}).*?>/", $myHTMLContent, $image_tags);
96
97      $image_patterns = array();
98      $image_names = array();
99      for($i=$imageOffset; $i<count($image_tags[0]); $i++) {
100        if ($image_tags[2][$i] == "LEFT") {
101          $left_align = " ";
102          $righ_align = "";
103        } elseif ($image_tags[2][$i] == "RIGHT") {
104          $left_align = "";
105          $righ_align = " ";
106        } elseif ($image_tags[2][$i] == "CENTER") {
107          $left_align = " ";
108          $righ_align = " ";
109        } else {
110          $left_align = "";
111          $righ_align = "";
112        }
113        array_push($image_names, "{{".$left_align."image:".$image_tags[1][$i]."?".$image_tags[3][$i]."x".$image_tags[4][$i].$righ_align."}}");
114        array_push($image_patterns, "/\{\{wiki:\}\}/");
115      }
116
117      #Replace image tags with correct images
118      $myWikiContent = preg_replace($image_patterns, $image_names, $myWikiContent, 1);
119
120      #Some image align produce html align divs in the wiki text, replace them by hand
121      $myWikiContent = preg_replace("/<div align=\"right\">{{image:(.*)}}<\/div>/U", "{{ image:$1}}", $myWikiContent);
122      $myWikiContent = preg_replace("/<div align=\"left\">{{image:(.*)}}<\/div>/U", "{{image:$1 }}", $myWikiContent);
123      $myWikiContent = preg_replace("/<div align=\"center\">{{image:(.*)}}<\/div>/U", "{{ image:$1 }}", $myWikiContent);
124
125      return $myWikiContent;
126}
127
128
129function get_tagged_from_html($html, $type) {
130  $results = array();
131  $html = str_replace("\n"," ",$html);
132  if ($type == "underlined") {
133    preg_match_all("/<U>(.*)<\/U>/sU", $html, $tagged_sentences, PREG_OFFSET_CAPTURE);
134  } elseif ($type == "italic") {
135    preg_match_all("/<I>(.*)<\/I>/sU", $html, $tagged_sentences, PREG_OFFSET_CAPTURE);
136  }
137
138
139  foreach ($tagged_sentences[1] as $sentence_info) {
140
141     #Get the offset at which begins the sentence
142     $start = $sentence_info[1];
143     #remove new lines from sentence
144     $sentence = str_replace("\n"," ",$sentence_info[0]);
145     #Find how many times this same sentence appears before the right one
146     $pattern = preg_replace('/\\//', '\\/', preg_quote($sentence));
147     preg_match_all("/".$pattern."/", $html, $matches, PREG_OFFSET_CAPTURE);
148
149     #Remove html tags from sentence
150     $clean_sentence = preg_replace("/<.*>/sU","",$sentence);
151     $clean_pattern = preg_replace('/\\//', '\\/', preg_quote($clean_sentence));
152     preg_match_all("/".$clean_pattern."/", $html, $clean_matches, PREG_OFFSET_CAPTURE);
153     $count = count_from_pos($matches, $clean_matches, $start);
154     $clean_start = $clean_matches[0][$count][1];
155     if ($type == "italic") {
156
157     #echo "sentence : ".$sentence."\n";
158     #echo "sentence info : ".print_r($sentence_info, true)."\n";
159     #echo "matches : ".print_r($matches, true)."\n";
160     #echo "clean_matches : ".print_r($clean_matches, true)."\n";
161     #echo "start : ".$start."\n";
162     #echo "clean_start : ".$clean_start."\n";
163     #echo "count : ".$count."\n";
164     #echo "pattern : /".$pattern."/\n";
165     #echo "###################################################\n";
166     }
167
168     $results[$clean_start] = array("sentence" => $clean_sentence, "count" => $count);
169  }
170
171
172  #echo "results: ".print_r($results, true)."\n";
173  #echo "==========================================================================\n";
174
175  return $results;
176}
177
178
179function count_from_pos($matches, $clean_matches, $pos){
180  $count = 0;
181  for($count=0; $count < count($clean_matches[0]); $count++) {
182    #echo "clean match : ".print_r($clean_matches[0][$count], true)."\n";
183    #echo "is equal : ".($match[1] == $pos)." \n";
184    #echo "++++++++++++++\n";
185    $clean_match = $clean_matches[0][$count];
186    foreach ($matches[0] as $match) {
187      #echo "match : ".print_r($match, true)."\n";
188      #echo "relative pos : ".($pos+strpos($match[0], $clean_match[0]))."\n";
189      if( ($pos + strpos($match[0], $clean_match[0])) == $clean_match[1]){
190        return $count;
191      }
192    }
193  }
194  return $count;
195}
196
197
198function replace_from_list($myWikiContent, $list, $type) {
199  if ($type == "underlined") {
200    $tag = "__";
201  } elseif ($type == "italic") {
202    $tag = "//";
203  }
204
205  $occurences = array();
206
207  foreach ($list as $pos => $sentence_info) {
208
209     $sentence = $sentence_info["sentence"];
210     $count = $sentence_info["count"];
211
212     if (array_key_exists($sentence, $occurences)) {
213       $occurences[$sentence] = 0;
214     } else {
215       $occurences[$sentence] = 0;
216     }
217
218     preg_match_all("/".preg_quote($sentence)."/", $myWikiContent, $matches, PREG_OFFSET_CAPTURE);
219     #Find position in wiki content
220     #echo "sentence : ".$sentence."\n";
221     #echo "type : ".$type."\n";
222     #echo "matches : ".print_r($matches, true)."\n";
223     $start = $matches[0][$count + $occurences[$sentence]][1];
224     #echo "start : ".$start."\n";
225     #echo "##################################################################\n";
226     $myWikiContent = substr($myWikiContent, 0, $start).str_replace($sentence, $tag.$sentence.$tag, substr($myWikiContent, $start, strlen($sentence))).substr($myWikiContent, $start+strlen($sentence));
227  }
228  return $myWikiContent;
229}
230
231
232#Compute if there is already a string in list that contains the range $start => $end
233function is_in_list_range($list, $start, $end){
234  foreach($list as $pos => $sentence){
235    $current_end = $pos + strlen($sentence);
236    if ($start >= $pos && $start<= $current_end){
237      return true;
238    }
239  }
240  return false;
241}
242
243
244?>
245