* @version 0.2
* @date 2023-05-28
*/
/**
* This class is based originally on the PHP PEAR package
* Structures_BibTeX, (c) 1997-2005 The PHP Group, Elmar Pitschke
* For more information about the original PEAR package, please visit
* http://pear.php.net/package/Structures_BibTex
*
* Some additional modifications to the original PHP PEAR package have
* been made by Raphael Reitzig in 2010 for his bib2tpl program.
* For more information about the bib2tpl program, please visit
* http://lmazy.verrech.net/bib2tpl/
*
* During transition from the original PHP PEAR package to this class forming
* part of the Dokuwiki Plugin bibtex, several unneccessary functions as the
* output to HTML and RTF have been removed, as well as the dependency on PEAR.
*
* Other functions as handling of BibTeX's @STRING patterns and a basic
* parsing for LaTeX code common for BibTeX entries (i.e. \emph{}) have been added.
*
* This class is no longer PHP 4 compatible, as was the original PEAR package.
*/
class bibtexparser_plugin_bibtex4dw
{
/**
* Handle to SQLite db
*/
public static $sqlite = array();
/**
* Array with the BibTex Data
*
* @access public
* @var array
*/
public $data = array();
/**
* String with the BibTex content
*
* @access public
* @var string
*/
public $content;
/**
* Array with the BibTex Strings
*
* @access private
* @var array
*/
private $_strings = array();
/**
* Array with the BibTex entries
*
* @access public
* @var array
*/
public $entries = array();
/**
* Array with possible Delimiters for the entries
*
* @access private
* @var array
*/
private $_delimiters;
/**
* Array with replacements for LaTeX commands in fields of entries
*
* The patterns are searched for only in LaTeX math mode ($...$)
*
* As the output is in HTML, the best is to use the named representatives
* of the respective signs.
*
* @access private
* @var array
*/
private $_latexMathmodeReplacements = array(
'\to' => '→',
'\bullet' => '•',
'\circ' => '°',
'\varepsilon' => 'ε',
'\vartheta' => 'ϑ',
'\varpi' => 'ϖ',
'\varrho' => 'ρ',
'\varsigma' => 'ς',
'\varphi' => 'φ',
'\cdot' => '·',
'\cdots' => '···',
'\rm ' => ''
);
/**
* Array with Greek letters to replace the LaTeX commands in fields of entries
*
* The greek letters are searched for only in LaTeX math mode ($...$)
*
* They will be checked both for lower and upper letters, as these differ only
* in the first character of their respective name.
*
* Note: The LaTeX mathmode replacements (see above) will be done first, thus
* it is possible to use that to deal with special greek characters as
* \varepsilon.
*
* @access private
* @var array
*/
private $_greekLetters = array(
'alpha','beta','gamma','delta','epsilon',
'zeta','eta','theta','iota','kappa',
'lambda','mu','nu','xi','omicron',
'pi','rho','sigma','tau','upsilon',
'phi','chi','psi','omega',
);
/**
* Array to store warnings
*
* @access public
* @var array
*/
public $warnings = array();
/**
* Run-time configuration options
*
* @access private
* @var array
*/
private $_options;
/**
* Array with the "allowed" entry types
*
* @access public
* @var array
*/
public $allowedEntryTypes;
/**
* Author Format Strings
*
* @access public
* @var string
*/
public $authorstring;
/**
* List of SQL statements to be inserted at once
*
* @access private
* @var array
*/
private $_sqlStatements = array();
/**
* Constructor
*
* @access public
* @return void
*/
function __construct($options = array())
{
$this->_delimiters = array('"'=>'"',
'{'=>'}');
$this->data = array();
$this->content = '';
//$this->_stripDelimiter = $stripDel;
//$this->_validate = $val;
$this->warnings = array();
$this->_options = array(
'replaceLatex' => true,
'stripDelimiter' => true,
'validate' => true,
'unwrap' => false,
'wordWrapWidth' => false,
'wordWrapBreak' => "\n",
'wordWrapCut' => 0,
'removeCurlyBraces' => true,
'extractAuthors' => true,
);
foreach ($options as $option => $value) {
$test = $this->setOption($option, $value);
}
$this->allowedEntryTypes = array(
'article',
'book',
'booklet',
'conference',
'inbook',
'incollection',
'inproceedings',
'manual',
'mastersthesis',
'misc',
'phdthesis',
'proceedings',
'techreport',
'unpublished'
);
$this->authorstring = 'VON LAST, JR, FIRST';
$this->authordelimiter = '; ';
}
/**
* Sets run-time configuration options
*
* @access public
* @param string $option option name
* @param mixed $value value for the option
* @return mixed true on success (DW msg on failure)
*/
public function setOption($option, $value)
{
$ret = true;
if (array_key_exists($option, $this->_options)) {
$this->_options[$option] = $value;
} else {
msg("Unknown option $option", 2);
$ret = false;
}
return $ret;
}
/**
* Reads a given BibTex File
*
* @access public
* @param string $filename Name of the file
* @return mixed true on success (DW msg on failure)
*/
public function loadFile($filename)
{
if (file_exists($filename)) {
if (($this->content = @file_get_contents($filename)) === false) {
msg("Could not open file $filename", 2);
} else {
$this->_pos = 0;
$this->_oldpos = 0;
return true;
}
} else {
msg("Could not find file $filename", 2);
}
}
/**
* Reads bibtex from a string variable
*
* @access public
* @param string $bib String containing bibtex
* @return boolean true
*/
public function loadString($bib)
{
$this->content = $bib;
$this->_pos = 0;
$this->_oldpos = 0;
return true; // For compatibility with loadFile
}
/**
* Parse bibliography stored in content and clear the content if the parsing is successful.
*
* @access public
* @return boolean true on success and PEAR_Error if there was a problem
*/
public function parseBibliography($sqlite = false)
{
//The amount of opening braces is compared to the amount of closing braces
//Braces inside comments are ignored
$this->warnings = array();
$this->data = array();
$valid = true;
$open = 0;
$entry = false;
$char = '';
$lastchar = '';
$buffer = '';
$inField = false;
$openInField = 0;
$lastNonWsChar = '';
for ($i = 0; $i < strlen($this->content); $i++) {
$char = substr($this->content, $i, 1);
if ((0 != $open) && ('@' == $char) && (!$inField)) {
if (!$this->_checkAt($buffer)) {
$this->_generateWarning('WARNING_MISSING_END_BRACE', '', $buffer);
//To correct the data we need to insert a closing brace
$char = '}';
$i--;
}
}
if ((0 == $open) && ('@' == $char)) { //The beginning of an entry
$entry = true;
} elseif ($entry && ('{' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is opening
$open++;
if (!$inField && ($lastNonWsChar == '=')) {
$inField = true;
} elseif ($inField) {
$openInField++;
}
} elseif ($entry && ('}' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is closing
$open--;
if ($inField) {
$openInField--;
if ($openInField == 0) {
$inField = false;
}
}
if ($open < 0) { //More are closed than opened
$valid = false;
}
if (0 == $open) { //End of entry
$entry = false;
// TODO: Some check for duplicate keys and issuing a warning if so?
if ($sqlite) {
$this->_prepareSqlStatement($buffer);
} else {
$this->_storeEntryInClass($buffer);
}
$buffer = '';
}
}
if ($entry) { //Inside entry
$buffer .= $char;
}
$lastchar = $char;
if ($char != ' ' && $char != '\t' && $char != '\n' && $char != '\r') {
$lastNonWsChar = $char;
}
}
//If open is one it may be possible that the last ending brace is missing
// TODO: Handle situation with using SQLite DB
if (1 == $open) {
$entrydata = $this->_parseEntry($buffer);
if (!$entrydata) {
$valid = false;
} else {
$this->data[] = $entrydata;
$buffer = '';
$open = 0;
}
}
if ($sqlite) {
$this->_executeSqlStatements();
}
//At this point the open should be zero
if (0 != $open) {
$valid = false;
}
//Are there multiple entries with the same cite?
// TODO: Meanwhile, as in both cases (SQLite and manual) bibtex keys are used as index,
// this situation shall no longer exist. Checking for duplicate keys needs be done above.
if ($this->_options['validate']) {
$cites = array();
foreach ($this->data as $entry) {
$cites[] = $entry['cite'];
}
$unique = array_unique($cites);
if (sizeof($cites) != sizeof($unique)) { //Some values have not been unique!
$notuniques = array();
for ($i = 0; $i < sizeof($cites); $i++) {
if ('' == $unique[$i]) {
$notuniques[] = $cites[$i];
}
}
$this->_generateWarning('WARNING_MULTIPLE_ENTRIES', implode(',',$notuniques));
}
}
if ($valid) {
$this->content = '';
return true;
} else {
return false;
}
}
/**
* Parses what is stored in content and clears the content if the parsing is successful.
*
* @access public
* @return boolean true on success and PEAR_Error if there was a problem
*/
public function parse($sqlite = false)
{
//The amount of opening braces is compared to the amount of closing braces
//Braces inside comments are ignored
$this->warnings = array();
$this->data = array();
$valid = true;
$open = 0;
$entry = false;
$char = '';
$lastchar = '';
$buffer = '';
for ($i = 0; $i < strlen($this->content); $i++) {
$char = substr($this->content, $i, 1);
if ((0 != $open) && ('@' == $char)) {
if (!$this->_checkAt($buffer)) {
$this->_generateWarning('WARNING_MISSING_END_BRACE', '', $buffer);
//To correct the data we need to insert a closing brace
$char = '}';
$i--;
}
}
if ((0 == $open) && ('@' == $char)) { //The beginning of an entry
$entry = true;
} elseif ($entry && ('{' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is opening
$open++;
} elseif ($entry && ('}' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is closing
$open--;
if ($open < 0) { //More are closed than opened
$valid = false;
}
if (0 == $open) { //End of entry
$entry = false;
if ($sqlite) {
$this->_addEntryToSQLiteDB($buffer);
} else {
$entrydata = $this->_parseEntry($buffer);
if ($entrydata) {
$this->data[] = $entrydata;
}
}
$buffer = '';
}
}
if ($entry) { //Inside entry
$buffer .= $char;
}
$lastchar = $char;
}
//If open is one it may be possible that the last ending brace is missing
// TODO: Handle situation with using SQLite DB
if (1 == $open) {
$entrydata = $this->_parseEntry($buffer);
if (!$entrydata) {
$valid = false;
} else {
$this->data[] = $entrydata;
$buffer = '';
$open = 0;
}
}
//At this point the open should be zero
if (0 != $open) {
$valid = false;
}
//Are there multiple entries with the same cite?
if ($this->_options['validate']) {
$cites = array();
foreach ($this->data as $entry) {
$cites[] = $entry['cite'];
}
$unique = array_unique($cites);
if (sizeof($cites) != sizeof($unique)) { //Some values have not been unique!
$notuniques = array();
for ($i = 0; $i < sizeof($cites); $i++) {
if ('' == $unique[$i]) {
$notuniques[] = $cites[$i];
}
}
$this->_generateWarning('WARNING_MULTIPLE_ENTRIES', implode(',',$notuniques));
}
}
if ($valid) {
$this->content = '';
return true;
} else {
return false;
}
}
/**
* Split entry in key and actual contents, call stringCallback for @string entries and bibItemCallback for all other entries.
*
* @param string $entry BibTeX entry, starting with @ and ending BEFORE the closing brace of the entry
* @param callable $stringCallback Will be called with two arguments (key, value) for @string entries
* @param callable $bibItemCallback Will be called with two arguments (key, full entry as string) for all non-@string entries
*/
private function _storeBibTeXEntry($entry, $stringCallback, $bibItemCallback)
{
if ('@string' == strtolower(substr($entry, 0, 7))) {
$matches = array();
preg_match('/^@\w+\{(.+)/', $entry, $matches);
if (count($matches) > 0) {
$m = explode('=', $matches[1], 2);
$string = trim($m[0]);
$entry = substr(trim($m[1]), 1, -1);
call_user_func($stringCallback, $string, $entry);
return;
}
} else {
$entry = $entry.'}';
// Look for key
$matches = array();
preg_match('/^@(\w+)\{(.+),/', $entry, $matches);
if (count($matches) > 0) {
$entryType = $matches[1];
$key = $matches[2];
call_user_func($bibItemCallback, $key, $entry);
return;
}
}
throw new InvalidArgumentException('Could not parse entry "'.$entry.'"');
}
/**
* Store given entry in this object's members
*
* @param string $entry BibTeX entry, starting with @ and ending BEFORE the closing brace of the entry
*/
private function _storeEntryInClass($entry)
{
$stringCallback = fn($key, $value) => $this->_strings[$key] = $value;
$bibItemCallback = fn($key, $value) => $this->entries[$key] = $value;
$this->_storeBibTeXEntry($entry, $stringCallback, $bibItemCallback);
}
/**
* Add/update entry in SQLite DB (immediately)
*/
private function _addEntryToSQLiteDB($entry)
{
$stringCallback = fn($key, $value) => $this->sqlite->query("INSERT OR REPLACE INTO strings (string, entry) VALUES (?,?)", $key, $value);
$bibItemCallback = fn($key, $value) => $this->sqlite->query("INSERT OR REPLACE INTO bibtex (key, entry) VALUES (?,?)", $key, $value);
$this->_storeBibTeXEntry($entry, $stringCallback, $bibItemCallback);
}
/**
* Prepare an SQL statement to insert/update $entry in the DB.
*/
private function _prepareSqlStatement($entry)
{
$stringCallback = fn($key, $value) => $this->_sqlStatements[] = array("INSERT OR REPLACE INTO strings (string, entry) VALUES (?,?)", array($key, $value));
$bibItemCallback = fn($key, $value) => $this->_sqlStatements[] = array("INSERT OR REPLACE INTO bibtex (key, entry) VALUES (?,?)", array($key, $value));
$this->_storeBibTeXEntry($entry, $stringCallback, $bibItemCallback);
}
/**
* Execute all statements in $this->_sqlStatments in a single transaction.
*
* A single transaction is MUCH faster than executing statements sequentially.
*/
private function _executeSqlStatements()
{
$pdo = $this->sqlite->getAdapter()->getPdo();
try {
if(!$pdo->beginTransaction()) {
msg('Sqlite error when starting transaction.', -1);
return;
}
foreach ($this->_sqlStatements as $statement) {
list($sql, $params) = $statement;
$pdo_stmt = $pdo->prepare($sql);
$pdo_stmt->execute($params);
}
if(!$pdo->commit()) {
msg('Sqlite error during commit.', -1);
return;
}
} catch (PDOException $ex) {
$pdo->rollBack();
throw $ex; // TODO handle this case, e.g., by falling back to single queries?
}
$this->_sqlStatements = array();
}
/**
* Extracting the data of one bibtex entry
*
* The parse function splits the content into its entries.
* Then every entry is parsed by this function.
* It parses the entry backwards.
* First the last '=' is searched and the value extracted from that.
* A copy is made of the entry if warnings should be generated. This takes quite
* some memory but it is needed to get good warnings. If no warnings are generated
* then you don't have to worry about memory.
* Then the last ',' is searched and the field extracted from that.
* Again the entry is shortened.
* Finally after all field=>value pairs the cite and type is extraced and the
* authors are splitted.
* If there is a problem false is returned.
*
* @access private
* @param string $entry The entry
* @return array The representation of the entry or false if there is a problem
*/
private function _parseEntry($entry)
{
$entrycopy = '';
if ($this->_options['validate']) {
$entrycopy = $entry; //We need a copy for printing the warnings
}
$ret = array('bibtex' => $entry.'}');
if ('@string' == strtolower(substr($entry, 0, 7))) {
$matches = array();
preg_match('/^@\w+\{(.+)/' ,$entry, $matches);
if ( count($matches) > 0 )
{
$m = explode('=',$matches[1],2);
$this->_strings[trim($m[0])] = substr(trim($m[1]),1,-1);
}
} elseif ('@preamble' == strtolower(substr($entry, 0, 9))) {
//Preamble not yet supported!
if ($this->_options['validate']) {
$this->_generateWarning('PREAMBLE_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}');
}
} else {
// Look for key
$matches = array();
preg_match('/^@\w+\{([\w\d]+),/' ,$entry, $matches);
if ( count($matches) > 0 )
{
$ret['entrykey'] = $matches[1];
}
//Parsing all fields
while (strrpos($entry,'=') !== false) {
$position = strrpos($entry, '=');
//Checking that the equal sign is not quoted or is not inside a equation (For example in an abstract)
$proceed = true;
if (substr($entry, $position-1, 1) == '\\') {
$proceed = false;
}
if ($proceed) {
$proceed = $this->_checkEqualSign($entry, $position);
}
while (!$proceed) {
$substring = substr($entry, 0, $position);
$position = strrpos($substring,'=');
$proceed = true;
if (substr($entry, $position-1, 1) == '\\') {
$proceed = false;
}
if ($proceed) {
$proceed = $this->_checkEqualSign($entry, $position);
}
}
$value = trim(substr($entry, $position+1));
$entry = substr($entry, 0, $position);
if (',' == substr($value, strlen($value)-1, 1)) {
$value = substr($value, 0, -1);
}
if ($this->_options['validate']) {
$this->_validateValue($value, $entrycopy);
}
// Handle string replacements
// IMPORTANT: Must precede stripDelimiter call
if (!in_array(substr($value,0,1),array_keys($this->_delimiters))) {
if (!empty($this->sqlite)) {
$stringReplacement = $this->sqlite->res2arr($this->sqlite->query("SELECT entry FROM strings WHERE string = ?",$value));
if (!empty($stringReplacement)) {
$value = $stringReplacement[0]['entry'];
}
} elseif (array_key_exists($value,$this->_strings)) {
$value = $this->_strings[$value];
}
}
if ($this->_options['replaceLatex']) {
$value = $this->_replaceLatex($value);
}
if ($this->_options['stripDelimiter']) {
$value = $this->_stripDelimiter($value);
}
if ($this->_options['unwrap']) {
$value = $this->_unwrap($value);
}
if ($this->_options['removeCurlyBraces']) {
$value = $this->_removeCurlyBraces($value);
}
$position = strrpos($entry, ',');
$field = strtolower(trim(substr($entry, $position+1)));
$ret[$field] = $value;
$entry = substr($entry, 0, $position);
}
//Parsing cite and entry type
$arr = explode('{', $entry);
$ret['cite'] = trim($arr[1]);
$ret['entrytype'] = strtolower(trim($arr[0]));
if ('@' == $ret['entrytype'][0]) {
$ret['entrytype'] = substr($ret['entrytype'], 1);
}
if ($this->_options['validate']) {
if (!$this->_checkAllowedEntryType($ret['entrytype'])) {
$this->_generateWarning('WARNING_NOT_ALLOWED_ENTRY_TYPE', $ret['entrytype'], $entry.'}');
}
}
//Handling the authors
if (in_array('author', array_keys($ret)) && $this->_options['extractAuthors']) {
// Array with all the authors in $ret['authors']
$ret['authors'] = $this->_extractAuthors($ret['author']);
// AuthorYear for sorting purposes in $ref['authoryear']
if (empty($ret['year'])) {
if (!empty($ret['date']) && preg_match('|(\d\d\d\d).*|U', $ret['date'], $matches)) {
$ret['year'] = $matches[1];
} else {
$ret['year'] = '[n.d.]';
}
}
$ret['authoryear'] = $ret['authors'][0]['last'] . $ret['year'];
// Nicely formatted authors list in $ret['author']
$tmparray = array();
foreach ($ret['authors'] as $authorentry) {
$tmparray[] = $this->_formatAuthor($authorentry);
}
$ret['author'] = implode($this->authordelimiter, $tmparray);
}
//Handling the editors
if (in_array('editor', array_keys($ret)) && $this->_options['extractAuthors']) {
// Array with all the editors in $ret['editors']
$ret['editors'] = $this->_extractAuthors($ret['editor']);
// Nicely formatted authors list in $ret['editor']
$tmparray = array();
foreach ($ret['editors'] as $editorentry) {
$tmparray[] = $this->_formatAuthor($editorentry);
}
$ret['editor'] = implode($this->authordelimiter, $tmparray);
}
}
return $ret;
}
/**
* Parsing for a subset of LaTeX code that can be found more often in BibTeX entries
*
* TODO: Extend this as necessary
*/
private function _replaceLatex($entry) {
// \emph{...} -> ...
$entry = preg_replace('/\\\emph\{([^\}]+)\}/', '$1', $entry);
// \textbf{...} -> ...
$entry = preg_replace('/\\\textbf\{([^\}]+)\}/', '$1', $entry);
// quotation marks
$entry = str_replace("``",""",$entry);
$entry = str_replace("''",""",$entry);
// \& -> &
$entry = str_replace("\&","&",$entry);
// \% -> %;
$entry = str_replace("\%","%;",$entry);
// "\ " -> " ";
$entry = str_replace("\ "," ",$entry);
// --- -> —
$entry = str_replace("---","—",$entry);
// -- -> -
$entry = str_replace("--","-",$entry);
// \url{...} -> ...
$entry = preg_replace("/\\\url\{([^\}]+)\}/",'\\1',$entry);
// Handle umlauts
$entry = preg_replace('/\\\"\{([aeiouyAEIOU])\}/',"&\\1uml;",$entry);
$entry = preg_replace('/\\\"([aeiouyAEIOU])/',"&\\1uml;",$entry);
$entry = str_replace("\ss","ß",$entry);
$entry = str_replace('"s',"ß",$entry);
// Handle accents
// Handle acute
$entry = str_replace("\'c","ć",$entry);
$entry = preg_replace("/\\\'(.?)/","&\\1acute;",$entry);
// Handle grave
$entry = preg_replace("/\\\`(.?)/","&\\1grave;",$entry);
// Handle circumflex
$entry = preg_replace("/\\\(\^)(.?)/","&\\2circ;",$entry);
// Handle hatschek
$entry = str_replace('\v{z}',"ž",$entry);
$entry = str_replace('\v{c}',"č",$entry);
// Handle cedille
$entry = preg_replace("/\\\c\{(.?)\}/","\\1̧",$entry);
// Handle tilde
$entry = preg_replace("/\\\~(.?)/","&\\1tilde;",$entry);
// ae and oe ligatures
$entry = preg_replace('/\\\([aoAO]{1}[eE]{1})/',"&\\1lig;",$entry);
// Handle i without dot
$entry = str_replace("\i","ı",$entry);
// Handle u with bar
$entry = str_replace("\={u}","ū",$entry);
// Handle \l and \L
$entry = str_replace("\l","ł",$entry);
$entry = str_replace("\L","Ł",$entry);
// \o and \O
$entry = preg_replace('/\\\([oO]{1})/',"&\\1slash;",$entry);
// \aa and \AA
$entry = preg_replace('/\\\([aA]{1})([aA]{1})/',"&\\1ring;",$entry);
// Replace remaining "~" with " "
$entry = str_replace("~"," ",$entry);
// Handle math ($...$)
preg_match('/\$([^$]+)\$/' ,$entry, $matches);
if ( count($matches) > 0 ) {
foreach ($matches as $match) {
// Fix superscript and subscript
$entry = preg_replace("/\^\{([^\}]+)\}/","\\1",$entry);
$entry = preg_replace("/_\{([^\}]+)\}/","\\1",$entry);
$entry = preg_replace("/\^([\\\]{1}\w+)/","\\1",$entry);
$entry = preg_replace("/_([\\\]{1}\w+)/","\\1",$entry);
$entry = preg_replace("/\^([^\\\]{1})/","\\1",$entry);
$entry = preg_replace("/_([^\\\]{1})/","\\1",$entry);
// Replace LaTeX math commands, e.g. "\to"
foreach ($this->_latexMathmodeReplacements as $orig => $repl) {
$entry = str_replace($orig,$repl,$entry);
}
// Replace both lowercase and uppercase Greek letters
foreach ($this->_greekLetters as $letter) {
$upLatex = '\\' . ucfirst($letter);
$upHtml = "&" . ucfirst($letter) . ";";
$loLatex = '\\' . $letter;
$loHtml = "&" . $letter . ";";
$entry = str_replace($upLatex,$upHtml,$entry);
$entry = str_replace($loLatex,$loHtml,$entry);
}
}
// Finally, remove the LaTeX mathmode $ delimiters
$entry = str_replace("$","",$entry);
}
return $entry;
}
/**
* Checking whether the position of the '=' is correct
*
* Sometimes there is a problem if a '=' is used inside an entry (for example abstract).
* This method checks if the '=' is outside braces then the '=' is correct and true is returned.
* If the '=' is inside braces it contains to a equation and therefore false is returned.
*
* @access private
* @param string $entry The text of the whole remaining entry
* @param int the current used place of the '='
* @return bool true if the '=' is correct, false if it contains to an equation
*/
private function _checkEqualSign($entry, $position)
{
$ret = true;
//This is getting tricky
//We check the string backwards until the position and count the closing an opening braces
//If we reach the position the amount of opening and closing braces should be equal
$length = strlen($entry);
$open = 0;
for ($i = $length-1; $i >= $position; $i--) {
$precedingchar = substr($entry, $i-1, 1);
$char = substr($entry, $i, 1);
if (('{' == $char) && ('\\' != $precedingchar)) {
$open++;
}
if (('}' == $char) && ('\\' != $precedingchar)) {
$open--;
}
}
if (0 != $open) {
$ret = false;
}
//There is still the posibility that the entry is delimited by double quotes.
//Then it is possible that the braces are equal even if the '=' is in an equation.
if ($ret) {
$entrycopy = trim($entry);
$lastchar = $entrycopy[strlen($entrycopy)-1];
if (',' == $lastchar) {
$lastchar = $entrycopy[strlen($entrycopy)-2];
}
if ('"' == $lastchar) {
//The return value is set to false
//If we find the closing " before the '=' it is set to true again.
//Remember we begin to search the entry backwards so the " has to show up twice - ending and beginning delimiter
$ret = false;
$found = 0;
for ($i = $length; $i >= $position; $i--) {
$precedingchar = substr($entry, $i-1, 1);
$char = substr($entry, $i, 1);
if (('"' == $char) && ('\\' != $precedingchar)) {
$found++;
}
if (2 == $found) {
$ret = true;
break;
}
}
}
}
return $ret;
}
/**
* Checking if the entry type is allowed
*
* @access private
* @param string $entry The entry to check
* @return bool true if allowed, false otherwise
*/
private function _checkAllowedEntryType($entry)
{
return in_array($entry, $this->allowedEntryTypes);
}
/**
* Checking whether an at is outside an entry
*
* Sometimes an entry misses an entry brace. Then the at of the next entry seems to be
* inside an entry. This is checked here. When it is most likely that the at is an opening
* at of the next entry this method returns true.
*
* @access private
* @param string $entry The text of the entry until the at
* @return bool true if the at is correct, false if the at is likely to begin the next entry.
*/
private function _checkAt($entry)
{
$ret = false;
$opening = array_keys($this->_delimiters);
$closing = array_values($this->_delimiters);
//Getting the value (at is only allowd in values)
if (strrpos($entry,'=') !== false) {
$position = strrpos($entry, '=');
$proceed = true;
if (substr($entry, $position-1, 1) == '\\') {
$proceed = false;
}
while (!$proceed) {
$substring = substr($entry, 0, $position);
$position = strrpos($substring,'=');
$proceed = true;
if (substr($entry, $position-1, 1) == '\\') {
$proceed = false;
}
}
$value = trim(substr($entry, $position+1));
$open = 0;
$char = '';
$lastchar = '';
for ($i = 0; $i < strlen($value); $i++) {
$char = substr($this->content, $i, 1);
if (in_array($char, $opening) && ('\\' != $lastchar)) {
$open++;
} elseif (in_array($char, $closing) && ('\\' != $lastchar)) {
$open--;
}
$lastchar = $char;
}
//if open is grater zero were are inside an entry
if ($open>0) {
$ret = true;
}
}
return $ret;
}
/**
* Stripping Delimiter
*
* @access private
* @param string $entry The entry where the Delimiter should be stripped from
* @return string Stripped entry
*/
private function _stripDelimiter($entry)
{
$beginningdels = array_keys($this->_delimiters);
$length = strlen($entry);
$firstchar = substr($entry, 0, 1);
$lastchar = substr($entry, -1, 1);
while (in_array($firstchar, $beginningdels)) { //The first character is an opening delimiter
if ($lastchar == $this->_delimiters[$firstchar]) { //Matches to closing Delimiter
$entry = substr($entry, 1, -1);
} else {
break;
}
$firstchar = substr($entry, 0, 1);
$lastchar = substr($entry, -1, 1);
}
return $entry;
}
/**
* Unwrapping entry
*
* @access private
* @param string $entry The entry to unwrap
* @return string unwrapped entry
*/
private function _unwrap($entry)
{
$entry = preg_replace('/\s+/', ' ', $entry);
return trim($entry);
}
/**
* Wordwrap an entry
*
* @access private
* @param string $entry The entry to wrap
* @return string wrapped entry
*/
private function _wordwrap($entry)
{
if ( (''!=$entry) && (is_string($entry)) ) {
$entry = wordwrap($entry, $this->_options['wordWrapWidth'], $this->_options['wordWrapBreak'], $this->_options['wordWrapCut']);
}
return $entry;
}
/**
* Extracting the authors
*
* @access private
* @param string $entry The entry with the authors
* @return array the extracted authors
*/
private function _extractAuthors($entry) {
$entry = $this->_unwrap($entry);
// Replace AND with and in author list - added 2010-12-12, till@till-bisup.de
$entry = str_replace(' AND ',' and ',$entry);
$authorarray = array();
$authorarray = explode(' and ', $entry);
for ($i = 0; $i < sizeof($authorarray); $i++) {
$author = trim($authorarray[$i]);
/*The first version of how an author could be written (First von Last)
has no commas in it*/
$first = '';
$von = '';
$last = '';
$jr = '';
if (strpos($author, ',') === false) {
$tmparray = array();
$tmparray = explode(' ', $author);
$size = sizeof($tmparray);
if (1 == $size) { //There is only a last
$last = $tmparray[0];
} elseif (2 == $size) { //There is a first and a last
$first = $tmparray[0];
$last = $tmparray[1];
} else {
$invon = false;
$inlast = false;
for ($j=0; $j<($size-1); $j++) {
if ($inlast) {
$last .= ' '.$tmparray[$j];
} elseif ($invon) {
$case = $this->_determineCase($tmparray[$j]);
if ((0 == $case) || (-1 == $case)) { //Change from von to last
//You only change when there is no more lower case there
$islast = true;
for ($k=($j+1); $k<($size-1); $k++) {
$futurecase = $this->_determineCase($tmparray[$k]);
if ($case == PHP_INT_MAX) {
// Error case. IGNORE?
} elseif (0 == $futurecase) {
$islast = false;
}
}
if ($islast) {
$inlast = true;
if (-1 == $case) { //Caseless belongs to the last
$last .= ' '.$tmparray[$j];
} else {
$von .= ' '.$tmparray[$j];
}
} else {
$von .= ' '.$tmparray[$j];
}
} else {
$von .= ' '.$tmparray[$j];
}
} else {
$case = $this->_determineCase($tmparray[$j]);
if (0 == $case) { //Change from first to von
$invon = true;
$von .= ' '.$tmparray[$j];
} else {
$first .= ' '.$tmparray[$j];
}
}
}
//The last entry is always the last!
$last .= ' '.$tmparray[$size-1];
}
} else { //Version 2 and 3
$tmparray = array();
$tmparray = explode(',', $author);
//The first entry must contain von and last
$vonlastarray = array();
$vonlastarray = explode(' ', $tmparray[0]);
$size = sizeof($vonlastarray);
if (1==$size) { //Only one entry->got to be the last
$last = $vonlastarray[0];
} else {
$inlast = false;
for ($j=0; $j<($size-1); $j++) {
if ($inlast) {
$last .= ' '.$vonlastarray[$j];
} else {
if (0 != ($this->_determineCase($vonlastarray[$j]))) { //Change from von to last
$islast = true;
for ($k=($j+1); $k<($size-1); $k++) {
$this->_determineCase($vonlastarray[$k]);
$case = $this->_determineCase($vonlastarray[$k]);
if (0 == $case) {
$islast = false;
}
}
if ($islast) {
$inlast = true;
$last .= ' '.$vonlastarray[$j];
} else {
$von .= ' '.$vonlastarray[$j];
}
} else {
$von .= ' '.$vonlastarray[$j];
}
}
}
$last .= ' '.$vonlastarray[$size-1];
}
//Now we check if it is version three (three entries in the array (two commas)
if (3==sizeof($tmparray)) {
$jr = $tmparray[1];
}
//Everything in the last entry is first
$first = $tmparray[sizeof($tmparray)-1];
}
$authorarray[$i] = array('first'=>trim($first), 'von'=>trim($von), 'last'=>trim($last), 'jr'=>trim($jr));
}
return $authorarray;
}
/**
* Case Determination according to the needs of BibTex
*
* To parse the Author(s) correctly a determination is needed
* to get the Case of a word. There are three possible values:
* - Upper Case (return value 1)
* - Lower Case (return value 0)
* - Caseless (return value -1)
*
* @access private
* @param string $word
* @return int The Case or PHP_INT_MAX if there was a problem
*/
private function _determineCase($word) {
$ret = -1;
$trimmedword = trim ($word);
/*We need this variable. Without the next of would not work
(trim changes the variable automatically to a string!)*/
if (is_string($word) && (strlen($trimmedword) > 0)) {
$i = 0;
$found = false;
$openbrace = 0;
while (!$found && ($i <= strlen($word))) {
$letter = substr($trimmedword, $i, 1);
$ord = ord($letter);
if ($ord == 123) { //Open brace
$openbrace++;
}
if ($ord == 125) { //Closing brace
$openbrace--;
}
if (($ord>=65) && ($ord<=90) && (0==$openbrace)) { //The first character is uppercase
$ret = 1;
$found = true;
} elseif ( ($ord>=97) && ($ord<=122) && (0==$openbrace) ) { //The first character is lowercase
$ret = 0;
$found = true;
} else { //Not yet found
$i++;
}
}
} else {
$ret = PHP_INT_MAX;
// $ret = PEAR::raiseError('Could not determine case on word: '.(string)$word);
}
return $ret;
}
/**
* Validation of a value
*
* There may be several problems with the value of a field.
* These problems exist but do not break the parsing.
* If a problem is detected a warning is appended to the array warnings.
*
* @access private
* @param string $entry The entry aka one line which which should be validated
* @param string $wholeentry The whole BibTex Entry which the one line is part of
* @return void
*/
private function _validateValue($entry, $wholeentry)
{
//There is no @ allowed if the entry is enclosed by braces
if (preg_match('/^{.*@.*}$/', $entry)) {
$this->_generateWarning('WARNING_AT_IN_BRACES', $entry, $wholeentry);
}
//No escaped " allowed if the entry is enclosed by double quotes
if (preg_match('/^\".*\\".*\"$/', $entry)) {
$this->_generateWarning('WARNING_ESCAPED_DOUBLE_QUOTE_INSIDE_DOUBLE_QUOTES', $entry, $wholeentry);
}
//Amount of Braces is not correct
$open = 0;
$lastchar = '';
$char = '';
for ($i = 0; $i < strlen($entry); $i++) {
$char = substr($entry, $i, 1);
if (('{' == $char) && ('\\' != $lastchar)) {
$open++;
}
if (('}' == $char) && ('\\' != $lastchar)) {
$open--;
}
$lastchar = $char;
}
if (0 != $open) {
$this->_generateWarning('WARNING_UNBALANCED_AMOUNT_OF_BRACES', $entry, $wholeentry);
}
}
/**
* Remove curly braces from entry
*
* @access private
* @param string $value The value in which curly braces to be removed
* @param string Value with removed curly braces
*/
private function _removeCurlyBraces($value)
{
//First we save the delimiters
$beginningdels = array_keys($this->_delimiters);
$firstchar = substr($value, 0, 1);
$lastchar = substr($value, -1, 1);
$begin = '';
$end = '';
while (in_array($firstchar, $beginningdels)) { //The first character is an opening delimiter
if ($lastchar == $this->_delimiters[$firstchar]) { //Matches to closing Delimiter
$begin .= $firstchar;
$end .= $lastchar;
$value = substr($value, 1, -1);
} else {
break;
}
$firstchar = substr($value, 0, 1);
$lastchar = substr($value, -1, 1);
}
//Now we get rid of the curly braces
$value = preg_replace('/[\{\}]/', '', $value);
//Reattach delimiters
$value = $begin.$value.$end;
return $value;
}
/**
* Generates a warning
*
* @access private
* @param string $type The type of the warning
* @param string $entry The line of the entry where the warning occurred
* @param string $wholeentry OPTIONAL The whole entry where the warning occurred
*/
private function _generateWarning($type, $entry, $wholeentry='')
{
$warning['warning'] = $type;
$warning['entry'] = $entry;
$warning['wholeentry'] = $wholeentry;
$this->warnings[] = $warning;
}
/**
* Cleares all warnings
*
* @access public
*/
public function clearWarnings()
{
$this->warnings = array();
}
/**
* Is there a warning?
*
* @access public
* @return true if there is, false otherwise
*/
public function hasWarning()
{
if (sizeof($this->warnings)>0) return true;
else return false;
}
/**
* Returns the author formatted
*
* The Author is formatted as setted in the authorstring
*
* @access private
* @param array $array Author array
* @return string the formatted author string
*/
private function _formatAuthor($array)
{
if (!array_key_exists('von', $array)) {
$array['von'] = '';
} else {
$array['von'] = trim($array['von']);
}
if (!array_key_exists('last', $array)) {
$array['last'] = '';
} else {
$array['last'] = trim($array['last']);
}
if (!array_key_exists('jr', $array)) {
$array['jr'] = '';
} else {
$array['jr'] = trim($array['jr']);
}
if (!array_key_exists('first', $array)) {
$array['first'] = '';
} else {
$array['first'] = trim($array['first']);
}
$ret = $this->authorstring;
$ret = str_replace("VON", $array['von'], $ret);
$ret = str_replace("LAST", $array['last'], $ret);
// Assuming that "jr" is always separated by a comma
if (!empty($array['jr'])) {
$ret = str_replace("JR", $array['jr'], $ret);
} else {
$ret = str_replace(", JR", '', $ret);
}
$ret = str_replace("FIRST", $array['first'], $ret);
return trim($ret);
}
}
?>