<?php
/**
 * Automatically converts tab-delimted tables into dokuwiki tables.
 *
 * @license		GPLv3 (http://www.gnu.org/licenses/gpl.html)
 * @link		http://www.dokuwiki.org/plugin:TabTables
 * @author		Mike "Pomax" Kamermans <pomax@nihongoresources.com>
 */

if(!defined('DOKU_INC')) die();
if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
require_once(DOKU_PLUGIN.'action.php');

class action_plugin_tabtables extends DokuWiki_Action_Plugin {

	/**
	 * Set this flag to true for timing information. Note that when this is turned on,
	 * the plugin may generate output before dokuwiki's sent its own headers information,
	 * which may cause problems for other plugins or even base configuration functionality.
	 */
	var $echo_timing = false;

	/**
	 * offsets-at-character-position. recorded as tuples {charpos,offset}, where charpos is the
	 * position in the MODIFIED data, not the position in the original data, and offset is the
	 * CUMULATIVE offset at that position, not the offset relative to the previous location.
	 */
	var $offsets = array();

	/**
	 * During the run, contains the original wiki data.
	 */
	var $original;

	/**
	 * During the run, contains the modified wiki data.
	 */
	var $wikified;

	/**
	 * Required function, used by dokuwiki on the plugins configuration page.
	 */
	function getInfo() {
	  return array(
		'author' => 'Mike "Pomax" Kamermans',
		'email'  => 'pomax@nihongoresources.com',
		'date'   => '2010-10-04',
		'name'   => 'TabTables',
		'desc'   => 'Turns tab delimited table data into dokuwiki tables',
		'url'	=> 'http://www.dokuwiki.org/plugin:tabtables');
	}

	/**
	 * Preprocesses the user's written data, by hooking into the text parser at the preprocessing point
	 */
	function register(&$controller) { 
		$controller->register_hook('PARSER_WIKITEXT_PREPROCESS', 'BEFORE', $this, '_tablify');
		$controller->register_hook('PARSER_HANDLER_DONE','BEFORE', $this, '_fixsecedit');
	}

	/**
	 * Tablify - runs through the base text, and replaces tab delimited table data
	 * with proper docuwiki table syntax 
	 */
	function _tablify(&$event, $param)
	{
		$start = $this->microtime_float();
		if($this->echo_timing)  { echo "\n<!-- tabtables plugin output -->\n"; }

		$this->original = explode("\n",$event->data);
		$this->wikified = $this->original;

		// tabling administration
		$table_position = -1;
		$in_block = false;
		$_table_data = array();
		$_empty_count = 0;

		// iterate through the wiki data, line by line
		$code_blocked = false;
		$file_blocked = false;
		$nowiki_blocked = false;
		for($l=0; $l<count($this->original); $l++) {
			$line = $this->original[$l];

			// blocking?
			if(strpos($line,"<code")!==false)		{ $code_blocked = true;		}
			if(strpos($line,"<file")!==false)		{ $file_blocked = true;		}
			if(strpos($line,"<nowiki")!==false)		{ $nowiki_blocked = true;	}
			// block clearing?
			if(strpos($line,"</code>")!==false)		{ $code_blocked = false;	}
			if(strpos($line,"</file>")!==false)		{ $file_blocked = false;		}
			if(strpos($line,"</nowiki>")!==false)	{ $nowiki_blocked = false;	}
			// if blocked, immediately continue on to the next line
			if($code_blocked || $file_blocked  || $nowiki_blocked) { continue; }

			// aggregate tabling lines (a tabling line either contains tabs, or is either empty after trimming)
			if(strpos($line,"\t")!==false || ($in_block && $line=="")) {
				// set up table block if not aggregating yet
				if(!$in_block) {
					$in_block=true;
					$table_position=$l;
					$_table_data = array(); }

				// if empty line, is this the first or second consecutive empty line?
				// If the second, we need to finalise this table block
				if($line=="" && count($_table_data)>1) { $in_block = $this->_finalise($_table_data, $table_position); }

				// if we didn't just finalise, aggregate the data
				if($in_block) { $_table_data[]=$line; }}

			// last option: this was not a tabling line, but we have a filled table block that needs processing
			elseif($in_block) { $in_block = $this->_finalise($_table_data, $table_position); }
		}

		// In case the table was the last thing on the page, we still have a table block to process
		if($in_block) { $this->_finalise($_table_data, $table_position); }

		if($this->echo_timing) { echo "<!-- initial parse took ".($this->microtime_float()-$start)."μs -->\n"; }
		$start = $this->microtime_float();

		// then, some administration so that we can perform section start/end
		// marker correction after parsing is done (next event)
		$char_pos = 0;
		$text_offset = 0;
		for($l=0; $l<count($this->wikified); $l++) {
			// record offsets at the start of this line
			$this->offsets[] = array('pos'=>$char_pos,'offset'=>$text_offset);
			// pos/offset for next line will be:
			$char_pos += strlen($this->wikified[$l]) + 1;	// +1 for the missing newline
			$text_offset += strlen($this->wikified[$l]) - strlen($this->original[$l]); }

		if($this->echo_timing) { echo "<!-- second parse took ".($this->microtime_float()-$start)."μs -->\n"; }

		// and we're done... 
		$event->data = implode("\n",$this->wikified);

		// but just for good measure, unset the original/wikified variables
		unset($this->original);
		unset($this->wikified);
	}  

	/**
	 * Gets the table data rewritten, then updates the modified container. returns false, so that
	 * the iteration knows we're no longer in table data aggregation mode.
	 */
	function _finalise(&$_table_data, $table_position)
	{
		$wikified = $this->_replace($_table_data);
		for($r=0; $r<count($wikified); $r++) { $this->wikified[$table_position + $r] = $wikified[$r]; }
		return false;
	}

	/**
	 * this function does the actual syntax replacement
	 */
	function _replace($original)
	{
		$new_table_block = array();

		// preprocess check: will this use row headers? ie, is the first table "cell" an empty tab?
		$_has_row_headers = (strpos($original[0],"\t")==0) ? true : false;

		// how many cells are we actually dealing with?
		$cells = count(split("\t",$original[0]));
		$empty_line = "| " . str_repeat("|",$cells);

		// replace the tabulation with wiki table syntax
		for($r=0; $r<count($original); $r++) { 
			$row = $original[$r];
			$new_table_block[$r] = (trim($row)=="") ?
				$empty_line : "| " . str_replace("\t"," | ",$row) . " |"; }

		// is the last line for this table empty? if so, clear it so that it doesn't become an empty table line.
		if($new_table_block[count($new_table_block)-1]==$empty_line) { 
			unset($new_table_block[count($new_table_block)-1]); }

		// is the second line for this table empty? and not the last line?
		// If so, the first line contains headers rather than table data
		if(count($new_table_block)>2 && $new_table_block[1]==$empty_line) {
			$new_table_block[0] = str_replace("|","^",$new_table_block[0]);
			// make sure to clear the header/content separator line
			for($r=1; $r<count($new_table_block)-1; $r++) { $new_table_block[$r]=$new_table_block[$r+1]; }
			$new_table_block[count($new_table_block)-1]="";
		}

		// does the table have row headers? if so, we need both row and column header styling
		if($_has_row_headers) {
			$new_table_block[0] = str_replace("|","^",$new_table_block[0]); 
			$new_table_block[0] = preg_replace("/^\^ /","| ",$new_table_block[0]);
			for($r=1; $r<count($new_table_block); $r++) {
					$new_table_block[$r] = preg_replace("/^\| /","^ ",$new_table_block[$r]); }}

		// done
		return $new_table_block;
	}

	/**
	 * modifying the raw data has as side effect that the sectioning is based on the
	 * modified data, not the original. This means that after processing, we need to
	 * adjust the section start/end markers so that they point to start/end positions
	 * in the original data, not the modified data.
	 *
	 * This function is based on the correction functions in the linebreak plugin,
	 * by Christopher Smith (see http://www.dokuwiki.org/plugin:linebreak)
	 */
	function _fixsecedit(&$event, $param)
	{
		$start = $this->microtime_float();
		$calls = &$event->data->calls;
		$count = count($calls);

		if($this->echo_timing) { echo "<!-- offset correction: running through ".$count." instructions -->\n"; }

		// iterate through the instruction list and set the file offset values
		// back to the values they would be if no tabling syntax ahd been added by this plugin

		for ($i=0; $i < $count; $i++) {
			if ($calls[$i][0] == 'section_edit') {
				$calls[$i][1][0] = $this->_convert($calls[$i][1][0]);
				$calls[$i][1][1] = $this->_convert($calls[$i][1][1]);
				$calls[$i][2] = $this->_convert($calls[$i][2]); }}		

		if($this->echo_timing) { echo "<!-- offset correction took ".($this->microtime_float()-$start)."μs -->\n\n"; }
	}

	/**
	 * Convert modified raw wiki offset value ($pos) back to the unmodified value
	 */
	function _convert($pos) 
	{		
		// find the offset that applies to this character position
		$offset=0;
		foreach($this->offsets as $tuple) {
			if($pos>=$tuple['pos']) { $offset = $tuple['offset']; }
			else { break; }}

		// return offset-corrected position
		return $pos - $offset;
	}

	/**
	 * debugging helper function - gives us the microsecond
	 * timestamp (in actual microseconds, not seconds)
	 */
	function microtime_float()
	{
		list($usec, $sec) = explode(" ", microtime());
		return 1000000*((float)$usec + (float)$sec);
	}
}
?>