plugin/deflist/syntax.php

<?php
if (! class_exists('syntax_plugin_deflist')) {
	if (! defined('DOKU_PLUGIN')) {
		if (! defined('DOKU_INC')) {
			define('DOKU_INC', realpath(dirname(__FILE__) . '/../../') . '/');
		} // if
		define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
	} // if
	// include parent class
	require_once(DOKU_PLUGIN . 'syntax.php');
	define('PLUGIN_DEFLIST', 'plugin_deflist');

/**
 * <tt>syntax_plugin_deflist.php </tt>- A PHP4 class that implements
 * a <tt>DokuWiki</tt> plugin for <tt>definition list</tt> elements.
 *
 * <p>
 * Definition list pattern:<br>
 * <tt>?? Term :: Term definition !!</tt>
 * </p>
 * <pre>
 *	Copyright (C) 2005, 2007 DFG/M.Watermann, D-10247 Berlin, FRG
 *			All rights reserved
 *		EMail : &lt;support@mwat.de&gt;
 * </pre>
 * <p>
 * <em>Credits:</em> This plugin was inspired by ideas of
 * <a href="http://wiki.splitbrain.org/plugin:definition_list"
 * target="_blank">Stephane Chamberland</a> and <a target="_blank"
 * href="http://wiki.splitbrain.org/plugin:definitions">Pavel
 * Vitis</a> both of whom wrote a similar plugin that <em>almost</em>
 * worked.
 * </p>
 * <div class="disclaimer">
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either
 * <a href="http://www.gnu.org/licenses/gpl.html">version 3</a> of the
 * License, or (at your option) any later version.<br>
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 * </div>
 * @author <a href="mailto:support@mwat.de">Matthias Watermann</a>
 * @version <tt>$Id: syntax_plugin_deflist.php,v 1.14 2007/08/15 12:36:20 matthias Exp $</tt>
 * @since created 05-Aug-2005
 */
class syntax_plugin_deflist extends DokuWiki_Syntax_Plugin {

	/**
	 * @privatesection
	 */
	//@{

	/**
	 * Convert the specified <tt>$aID</tt> to a valid XHTML
	 * fragment identifier.
	 *
	 * <p>
	 * <a href="http://www.w3.org/TR/xhtml1/#guidelines" target="_blank">
	 * XHTML 1</a> (section C.8, Fragment Identifiers) gives the regex
	 * <tt>[A-Za-z][A-Za-z0-9:_.-]*</tt> for valid identifiers. Here
	 * it's slightly reduced to <tt>[A-Za-z][A-Za-z0-9_]+</tt> i.e.
	 * all non alphanumeric characters are replaced by underscores.
	 * </p>
	 * @param $aID String The raw ID string.
	 * @return String
	 * @private
	 * @since created 24-Aug-2005
	 * @see render()
	 */
	function _makeID(&$aID) {
		static $CHARS;
		if (! is_array($CHARS)) {
			$CHARS = array('|[^A-Za-z0-9_]|', // replace invalid characters
				'|_{2,}|',		// reduce multiple underscores
				'|^[^A-Za-z]+|',	// remove invalid leading chars
				'|_+$|');		// remove trailing underscores
		} // if
		// As long as DokuWiki (in contrast to W3C) doesn't allow uppercase
		// letters in internal anchor names we've to use 'strtolower()'
		// here as well to make the anchors work within DokuWiki.
		return strtolower(preg_replace($CHARS, array('_', '_'),
			utf8_deaccent($aID, 0)));
	} // _makeID()

	//@}
	/**
	 * @publicsection
	 */
	//@{

	/**
	 * Tell the parser whether the plugin accepts syntax mode
	 * <tt>$aMode</tt> within its own markup.
	 *
	 * <p>
	 * This method mostly returns <tt>TRUE</tt> since all other types
	 * are allowed within a definition list's <tt>DD</tt> sections.
	 * Only another definition list is denied since <em>nested DLs are
	 * currently not supported</em>.
	 * </p>
	 * @param $aMode String The requested syntaxmode.
	 * @return Boolean <tt>TRUE</tt> unless <tt>$aMode</tt>
	 * is <tt>plugin_deflist</tt> (which would result in a
	 * <tt>FALSE</tt> method result).
	 * @public
	 * @see getAllowedTypes()
	 */
	function accepts($aMode) {
		return (PLUGIN_DEFLIST != $aMode);
	} // accepts()

	/**
	 * Connect lookup pattern to lexer.
	 *
	 * @param $aMode String The desired rendermode.
	 * @public
	 * @see render()
	 */
	function connectTo($aMode) {
		if (PLUGIN_DEFLIST == $aMode) {
			return;
		} // if
		// We have to use assertion patterns here to make sure the DD sections
		// are UNMATCHED since only those are subject to further substitution.
		$this->Lexer->addEntryPattern(
			'\n\x20{2,}\s*\x3F\x3F(?s).+?(?=::(?s).*!!\n\n)',
			$aMode, PLUGIN_DEFLIST);
		$this->Lexer->addEntryPattern(
			'\n\t+\s*\x3F\x3F(?s).+?(?=::(?s).*!!\n\n)',
			$aMode, PLUGIN_DEFLIST);
		$this->Lexer->addPattern(
			'\n\x20{2,}\s*\x3F\x3F(?s).+?\s*(?=::(?s).*?!!)', PLUGIN_DEFLIST);
		$this->Lexer->addPattern(
			'\n\t+\s*\x3F\x3F(?s).+?\s*(?=::(?s).*?!!)', PLUGIN_DEFLIST);
	} // connectTo()

	/**
	 * Get an associative array with plugin info.
	 *
	 * <p>
	 * The returned array holds the following fields:
	 * <dl>
	 * <dt>author</dt><dd>Author of the plugin</dd>
	 * <dt>email</dt><dd>Email address to contact the author</dd>
	 * <dt>date</dt><dd>Last modified date of the plugin in
	 * <tt>YYYY-MM-DD</tt> format</dd>
	 * <dt>name</dt><dd>Name of the plugin</dd>
	 * <dt>desc</dt><dd>Short description of the plugin (Text only)</dd>
	 * <dt>url</dt><dd>Website with more information on the plugin
	 * (eg. syntax description)</dd>
	 * </dl>
	 * @return Array Information about this plugin class.
	 * @public
	 * @static
	 */
	function getInfo() {
		return array(
			'author' =>	'Matthias Watermann',
			'email' =>	'support@mwat.de',
			'date' =>	'2007-08-15',
			'name' =>	'Definition List Syntax Plugin',
			'desc' =>	'(X)HTML style Definition Lists [ ?? Term :: Definition !! ]',
			'url' =>	'http://wiki.splitbrain.org/plugin:deflist');
	} // getInfo()

	/**
	 * Define how this plugin is handled regarding paragraphs.
	 *
	 * <p>
	 * This method is important for correct XHTML nesting. It returns
	 * one of the following values:
	 * </p>
	 * <dl>
	 * <dt>normal</dt><dd>The plugin can be used inside paragraphs.</dd>
	 * <dt>block</dt><dd>Open paragraphs need to be closed before
	 * plugin output.</dd>
	 * <dt>stack</dt><dd>Special case: Plugin wraps other paragraphs.</dd>
	 * </dl>
	 * @return String <tt>'normal'</tt> instead of the (correct) 'block'
	 * since otherwise the current DokuWiki parser would put all
	 * substitutions within a DD section in separate paragraphs.
	 * @public
	 * @static
	 */
	function getPType() {
		return 'normal';
	} // getPType()

	/**
	 * Where to sort in?
	 *
	 * @return Integer <tt>18</tt>, an arbitrary value smaller
	 * <tt>Doku_Parser_Mode_preformated</tt> (20).
	 * @public
	 * @static
	 */
	function getSort() {
		return 18;
	} // getSort()

	/**
	 * Get the type of syntax this plugin defines.
	 *
	 * @return String <tt>'container'</tt>.
	 * @public
	 * @static
	 */
	function getType() {
		return 'container';
	} // getType()

	/**
	 * Handler to prepare matched data for the rendering process.
	 *
	 * <p>
	 * The <tt>$aState</tt> parameter gives the type of pattern
	 * which triggered the call to this method:
	 * </p>
	 * <dl>
	 * <dt>DOKU_LEXER_ENTER</dt>
	 * <dd>a pattern set by <tt>addEntryPattern()</tt>.</dd>
	 * <dt>DOKU_LEXER_MATCHED</dt>
	 * <dd>a pattern set by <tt>addPattern()</tt> (here: DT data).</dd>
	 * <dt>DOKU_LEXER_EXIT</dt>
	 * <dd> a pattern set by <tt>addExitPattern()</tt>.</dd>
	 * <dt>DOKU_LEXER_UNMATCHED</dt>
	 * <dd>ordinary text encountered within the plugin's syntax mode
	 * which doesn't match any pattern (here: DD data).</dd>
	 * </dl>
	 * @param $aMatch String The text matched by the patterns.
	 * @param $aState Integer The lexer state for the match.
	 * @param $aPos Integer The character position of the matched text.
	 * @param $aHandler Object Reference to the Doku_Handler object.
	 * @return Array Index <tt>[0]</tt> holds the current
	 * <tt>$aState</tt>, index <tt>[1]</tt> the match (as a list of
	 * entries) prepared for the <tt>render()</tt> method.
	 * @public
	 * @see render()
	 * @static
	 */
	function handle($aMatch, $aState, $aPos, &$aHandler) {
		static $ESCDELIMS;	// static constants to avoid the runtime overhead
		static $UNDELIMS; // of re-creating the arrays on each method call
		if (! is_array($ESCDELIMS)) {
			$ESCDELIMS = array('\?', '\!', '\:');
		} // if
		if (! is_array($UNDELIMS)) {
			$UNDELIMS = array('?', '!', ':');
		} // if
		switch ($aState) {
			case DOKU_LEXER_ENTER:
				// fall through to extract initial DTs
			case DOKU_LEXER_MATCHED:	// DTs
				$aMatch = preg_split('|\n+(\s*\?\?)\s*|', $aMatch,
					-1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
				$dts = array();
				$c = count($aMatch);
				for ($i = 0; $c > $i; ++$i) {
					if ($i & 1) {
						$dts[] = array($aMatch[$i - 1],
							str_replace($ESCDELIMS, $UNDELIMS,
								trim($aMatch[$i])));
						$aMatch[$i - 1] = $aMatch[$i] = NULL;
					} else {
						$aMatch[$i] = strlen(
							str_replace('  ', "\t", $aMatch[$i])) - 2;
					} // if
				} // for
				return array($aState, $dts);
			case DOKU_LEXER_UNMATCHED:	// DDs
				$aMatch = preg_split('|\s*(::\s*.*?!!)|s', $aMatch,
					-1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
				$mark = FALSE;	// indication for kind of DD entry
				$c = count($aMatch);
				$hits = $dds = array();
				for ($i = 0; $c > $i; ++$i) {
					if (preg_match('|::\s*(.*?)\s*!!|s', $aMatch[$i], $hits)) {
						$mark = 0;	// complete DD w/o substitution(s)
					} else if (preg_match('|::\s*(.*)|s', $aMatch[$i], $hits)) {
						$mark = -1;	// DD part before substitution(s)
					} else if (preg_match('|(.*?)\s*!!|s', $aMatch[$i], $hits)) {
						$mark = +1;	// DD part behind substitution(s)
					} else {
						$mark = TRUE;	// DD part between substitutions
						$hits[1] = $aMatch[$i];
					} // if
					$dds[] = array(
						str_replace($ESCDELIMS, $UNDELIMS, $hits[1]) => $mark);
				} // for
				return array($aState, $dds);
			case DOKU_LEXER_EXIT:
				// end of list
			default:
				return array($aState);
		} // switch
	} // handle()

	/**
	 * Add exit pattern to lexer.
	 *
	 * <p>
	 * Two consecutive linefeeds mark the end'o'list.
	 * </p>
	 * @note Access <em>public</em>
	 */
	function postConnect() {
		$this->Lexer->addExitPattern('(?<=!!)\n(?=\n)', PLUGIN_DEFLIST);
	} // postConnect()

	/**
	 * Handle the actual output creation.
	 *
	 * <p>
	 * The method tests the given <tt>$aFormat</tt> returning
	 * <tt>FALSE</tt> if it's not supported. <tt>$aRenderer</tt>
	 * contains a reference to the renderer object which is currently
	 * handling the rendering. The contents of <tt>$aData</tt> is the
	 * return value of the <tt>handle()</tt> method.
	 * </p>
	 * @param $aFormat String The output format to being tendered.
	 * @param $aRenderer Object A reference to the renderer object.
	 * @param $aData Array The data created by the <tt>handle()</tt>
	 * method.
	 * @return Boolean <tt>TRUE</tt> if rendered successfully, or
	 * <tt>FALSE</tt> otherwise.
	 * @public
	 * @see handle()
	 * @static
	 */
	function render($aFormat, &$aRenderer, &$aData) {
		if ('xhtml' != $aFormat) {
			return FALSE;
		} // if
		static $LEVEL = 1;		// current nesting level
		static $INDD = array();	// marks whether there's an open DD
		static $CHARS;	static $ENTS;	// HTML special chars
		if (! is_array($CHARS)) {
			$CHARS = array('&','<', '>');
		} // if
		if (! is_array($ENTS)) {
			$ENTS = array('&#38;', '&#60;', '&#62;');
		} // if
		// XXX: All those <p> and </p> tags handled here are just kind
		// of workaround problems with the current DokuWiki renderer.
		// Basically they are __wrong__ here but, alas, without them
		// invalid HTML would be generated :-(
		// If and when DokuWiki becomes more statefull the superflous
		// tags should be removed.
		switch ($aData[0]) {
			case DOKU_LEXER_ENTER:
				// since we have to use PType 'normal' we must close
				// the current paragraph
				$hits = array();
				if (preg_match('|\s*<p>\s*$|i', $aRenderer->doc, $hits)) {
					$aRenderer->doc = substr($aRenderer->doc,
						0, -strlen($hits[0])) . '<dl>';
				} else {
					$aRenderer->doc .= '</p><dl>';
				} // if
				// fall through to render initial DTs
			case DOKU_LEXER_MATCHED:
				foreach ($aData[1] as $dt) {
					$diff = $dt[0] - $LEVEL;
					if (0 < $diff) {
						// going UP __one__ level
						++$LEVEL;
						$hits = array();
						if (preg_match('|\s*<dd>\s*<p>\s*$|i',
							$aRenderer->doc, $hits)) {
							$aRenderer->doc = substr($aRenderer->doc,
								0, -strlen($hits[0])) . '<dd><dl>';
						} else {
							$aRenderer->doc .= (preg_match(
								'|\s*</d[dt]>\s*$|i', $aRenderer->doc))
									? '<dd><dl>'
									: '</dd><dd><dl>';
						} // if
					} else if (0 > $diff) {
						do {	// going back some levels
							--$LEVEL;
							$aRenderer->doc .= (isset($INDD[$LEVEL]))
								? '</dl>'
								: '</dl></dd>';
						} while (0 > ++$diff);
					// ELSE: no level change
					} // if
					$hits = array();
					if (preg_match('|\s*<p>\s*$|i', $aRenderer->doc, $hits)) {
						// remove unneeded P
						$aRenderer->doc = substr($aRenderer->doc,
							0, -strlen($hits[0]));
					} // if
					$id = $this->_makeID($dt[1]);
					// see http://www.w3.org/TR/xhtml1/#h-4.10
					$aRenderer->doc .= '<dt><a id="' . $id . '" name="'
						. $id . '">' . str_replace($CHARS, $ENTS, $dt[1])
						. '</a></dt>';
				} // foreach
				return TRUE;
			case DOKU_LEXER_UNMATCHED:
				$c = count($aData[1]);
				for ($i = 0; $c > $i; ++$i) {
					list($dd, $mark) = each($aData[1][$i]);
					$dd = str_replace($CHARS, $ENTS, $dd);
					if (TRUE === $mark) {
						// part between substitutions
						if (isset($INDD[$LEVEL])) {
							if (strlen($dd)) {
								$aRenderer->doc .= $dd;
							} // if
						} else {
							$tabs = str_repeat("\t", $LEVEL);
							$aRenderer->doc .= (strlen($dd))
								? '</dl><p>' . $dd
								: '</dl>';
							$INDD[--$LEVEL] = TRUE;
						} // if
					} else if (0 == $mark) {
						// complete definition w/o substitutions
						if (strlen($dd)) {
							$aRenderer->doc .= '<dd><p>' . $dd . '</p></dd>';
						} // if
					} else if (0 > $mark) {
						// DD part before substitutions
						$aRenderer->doc .= (strlen($dd))
							? '<dd><p>' . $dd
							: '<dd><p>';
						$INDD[$LEVEL] = TRUE;
					} else if (0 < $mark) {
						// DD part behind substitutions
						if (isset($INDD[$LEVEL])) {
							if (strlen($dd)) {
								$aRenderer->doc .= $dd . '</p></dd>';
							} else {
								$hits = array();
								if (preg_match('|\s*<p>\s*$|i',
									$aRenderer->doc, $hits)) {
									$aRenderer->doc = substr(
										$aRenderer->doc, 0,
										-strlen($hits[0])) . '</dd>';
								} else {
									$aRenderer->doc .= '</p></dd>';
								} // if
							} // if
							unset($INDD[$LEVEL]);
						} // if
						// ELSE: doesn't ever happen with non-empty $dd
					} // if
				} // for
				return TRUE;
			case DOKU_LEXER_EXIT:
				// Close all possibly open lists:
				while (0 < --$LEVEL) {
					$aRenderer->doc .= '</dl></dd>';
				} // while
				// Since we have to use PType 'normal' we must open
				// a new paragraph for the following text
				$aRenderer->doc = preg_replace('|\s*<p>\s*</p>\s*|', '',
					$aRenderer->doc) . '</dl><p>';
				$INDD = array();
				$LEVEL = 1;
			default:
				return TRUE;
		} // switch
	} // render()

	//@}
} // class syntax_plugin_deflist
} // if
?>