1<?php 2function scrapeTable2Csv($dokuPageId, $fileext, $startMarker) { 3 4 $csv_data = ''; 5 $file = wikiFN($dokuPageId); 6 $data = io_readWikiPage($file, $dokuPageId, $rev=false); 7 $raw = p_render('xhtml',p_get_instructions($data),$info); 8 if ($raw == false) { 9 msg(sprintf('Failed to read page ' . $dokuPageId)); 10 return false; 11 } 12 13 $newlines = array("\t","\n","\r","\x20\x20","\0","\x0B"); 14 $content = str_replace($newlines, "", html_entity_decode($raw)); 15 16 $start = strpos($content,$startMarker); 17 $content = substr($content,$start); 18 19 $start = strpos($content,'<table '); 20 $end = strpos($content,'</table>',$start) + 8; 21 $table = substr($content,$start,$end-$start); 22 23 preg_match_all("|<tr(.*)</tr>|U",$table,$rows); 24 25 $fp = @fopen($fileext, 'w'); 26 if ($fp === false) { 27 msg(sprintf('Failed to open write file ' . $fileext)); 28 return false; 29 } 30 $row_index=0; 31 $numHeadings = 0; 32 foreach ($rows[0] as $row){ 33 if ((strpos($row,'<th')===false)) 34 preg_match_all("|<td(.*)</td>|U",$row,$cells); 35 else 36 $numHeadings = preg_match_all("|<t(.*)</t(.*)>|U",$row,$cells); 37 if ($row_index == 0) 38 $numCols = $numHeadings; 39 40 $cell_index=0; 41 foreach ($cells[0] as $cell) { 42 $mycells[$row_index][$cell_index] = trim(strip_tags($cell)); 43 ++$cell_index; 44 } 45 if ($mycells[$row_index] != '') { 46 fputcsv($fp, $mycells[$row_index]); 47 $csv_data .= strput2csv($mycells[$row_index], $numCols-1); 48 } 49 ++$row_index; 50 } 51 fclose($fp); 52 return $csv_data; 53} 54 55 function strput2csv($fields = array(), $numheadings, $delimiter = ',', $enclosure = '"') { 56 $i = 0; 57 $csvline = ''; 58 $escape_char = '\\'; 59 $field_cnt = count($fields)-1; 60 $enc_is_quote = in_array($enclosure, array('"',"'")); 61 reset($fields); 62 63 foreach( $fields AS $field ) { 64 /* enclose a field that contains a delimiter, an enclosure character, or a newline */ 65 if( is_string($field) && ( 66 strpos($field, $delimiter)!==false || 67 strpos($field, $enclosure)!==false || 68 strpos($field, $escape_char)!==false || 69 strpos($field, "\n")!==false || 70 strpos($field, "\r")!==false || 71 strpos($field, "\t")!==false || 72 strpos($field, ' ')!==false ) ) { 73 74 $field_len = strlen($field); 75 $escaped = 0; 76 $csvline .= $enclosure; 77 for( $ch = 0; $ch < $field_len; $ch++ ) { 78 if( $field[$ch] == $escape_char && $field[$ch+1] == $enclosure && $enc_is_quote ) { 79 continue; 80 }elseif( $field[$ch] == $escape_char ) { 81 $escaped = 1; 82 }elseif( !$escaped && $field[$ch] == $enclosure ) { 83 $csvline .= $enclosure; 84 }else{ 85 $escaped = 0; 86 } 87 $csvline .= $field[$ch]; 88 } 89 $csvline .= $enclosure; 90 } else { 91 $csvline .= $field; 92 } 93 if( $i++ != $field_cnt ) { 94 $csvline .= $delimiter; 95 } 96 } 97 if ($field_cnt < $numheadings) { 98 for ($i=$field_cnt+1; $i<=$numheadings; $i++) { 99 $csvline .= $delimiter; 100 } 101 } 102 103 $csvline .= "\n"; 104 return $csvline; 105} 106