1<?php
2function scrapeTable2Csv($dokuPageId, $fileext, $startMarker) {
3
4    $csv_data = '';
5    $file = wikiFN($dokuPageId);
6    $data = io_readWikiPage($file, $dokuPageId, $rev=false);
7    $raw = p_render('xhtml',p_get_instructions($data),$info);
8    if ($raw == false) {
9        msg(sprintf('Failed to read page ' . $dokuPageId));
10       return false;
11    }
12
13    $newlines = array("\t","\n","\r","\x20\x20","\0","\x0B");
14    $content = str_replace($newlines, "", html_entity_decode($raw));
15
16    $start = strpos($content,$startMarker);
17    $content = substr($content,$start);
18
19    $start = strpos($content,'<table ');
20    $end = strpos($content,'</table>',$start) + 8;
21    $table = substr($content,$start,$end-$start);
22
23    preg_match_all("|<tr(.*)</tr>|U",$table,$rows);
24
25    $fp = @fopen($fileext, 'w');
26    if ($fp === false) {
27       msg(sprintf('Failed to open write file ' . $fileext));
28       return false;
29    }
30    $row_index=0;
31    $numHeadings = 0;
32    foreach ($rows[0] as $row){
33        if ((strpos($row,'<th')===false))
34          preg_match_all("|<td(.*)</td>|U",$row,$cells);
35        else
36		  $numHeadings = preg_match_all("|<t(.*)</t(.*)>|U",$row,$cells);
37    	if ($row_index == 0)
38    	  $numCols = $numHeadings;
39
40		$cell_index=0;
41		foreach ($cells[0] as $cell) {
42    		$mycells[$row_index][$cell_index] = trim(strip_tags($cell));
43    		++$cell_index;
44    	}
45    	if ($mycells[$row_index] != '') {
46    	fputcsv($fp, $mycells[$row_index]);
47        	$csv_data .= strput2csv($mycells[$row_index], $numCols-1);
48        }
49    	++$row_index;
50    }
51    fclose($fp);
52    return $csv_data;
53}
54
55    function strput2csv($fields = array(), $numheadings, $delimiter = ',', $enclosure = '"') {
56        $i = 0;
57        $csvline = '';
58        $escape_char = '\\';
59        $field_cnt = count($fields)-1;
60        $enc_is_quote = in_array($enclosure, array('"',"'"));
61        reset($fields);
62
63        foreach( $fields AS $field ) {
64            /* enclose a field that contains a delimiter, an enclosure character, or a newline */
65            if( is_string($field) && (
66                strpos($field, $delimiter)!==false ||
67                strpos($field, $enclosure)!==false ||
68                strpos($field, $escape_char)!==false ||
69                strpos($field, "\n")!==false ||
70                strpos($field, "\r")!==false ||
71                strpos($field, "\t")!==false ||
72                strpos($field, ' ')!==false ) ) {
73
74                $field_len = strlen($field);
75                $escaped = 0;
76                $csvline .= $enclosure;
77                for( $ch = 0; $ch < $field_len; $ch++ )    {
78                    if( $field[$ch] == $escape_char && $field[$ch+1] == $enclosure && $enc_is_quote ) {
79                        continue;
80                    }elseif( $field[$ch] == $escape_char ) {
81                        $escaped = 1;
82                    }elseif( !$escaped && $field[$ch] == $enclosure ) {
83                        $csvline .= $enclosure;
84                    }else{
85                        $escaped = 0;
86                    }
87                    $csvline .= $field[$ch];
88                }
89                $csvline .= $enclosure;
90            } else {
91                $csvline .= $field;
92            }
93            if( $i++ != $field_cnt ) {
94                $csvline .= $delimiter;
95            }
96        }
97		if ($field_cnt < $numheadings) {
98    		for ($i=$field_cnt+1; $i<=$numheadings;  $i++) {
99        		$csvline .= $delimiter;
100		}
101	}
102
103        $csvline .= "\n";
104        return $csvline;
105}
106