1<?php 2/** 3 * DokuWiki Plugin bibtex4dw (BibTeX Parser Component) 4 * 5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 6 * @author Till Biskup <till@till-biskup.de> 7 * @version 0.2 8 * @date 2023-05-28 9 */ 10 11/** 12 * This class is based originally on the PHP PEAR package 13 * Structures_BibTeX, (c) 1997-2005 The PHP Group, Elmar Pitschke 14 * For more information about the original PEAR package, please visit 15 * http://pear.php.net/package/Structures_BibTex 16 * 17 * Some additional modifications to the original PHP PEAR package have 18 * been made by Raphael Reitzig in 2010 for his bib2tpl program. 19 * For more information about the bib2tpl program, please visit 20 * http://lmazy.verrech.net/bib2tpl/ 21 * 22 * During transition from the original PHP PEAR package to this class forming 23 * part of the Dokuwiki Plugin bibtex, several unneccessary functions as the 24 * output to HTML and RTF have been removed, as well as the dependency on PEAR. 25 * 26 * Other functions as handling of BibTeX's @STRING patterns and a basic 27 * parsing for LaTeX code common for BibTeX entries (i.e. \emph{}) have been added. 28 * 29 * This class is no longer PHP 4 compatible, as was the original PEAR package. 30 */ 31 32class bibtexparser_plugin_bibtex4dw 33{ 34 /** 35 * Handle to SQLite db 36 */ 37 public static $sqlite = array(); 38 /** 39 * Array with the BibTex Data 40 * 41 * @access public 42 * @var array 43 */ 44 public $data = array(); 45 /** 46 * String with the BibTex content 47 * 48 * @access public 49 * @var string 50 */ 51 public $content; 52 /** 53 * Array with the BibTex Strings 54 * 55 * @access private 56 * @var array 57 */ 58 private $_strings = array(); 59 /** 60 * Array with the BibTex entries 61 * 62 * @access public 63 * @var array 64 */ 65 public $entries = array(); 66 /** 67 * Array with possible Delimiters for the entries 68 * 69 * @access private 70 * @var array 71 */ 72 private $_delimiters; 73 /** 74 * Array with replacements for LaTeX commands in fields of entries 75 * 76 * The patterns are searched for only in LaTeX math mode ($...$) 77 * 78 * As the output is in HTML, the best is to use the named representatives 79 * of the respective signs. 80 * 81 * @access private 82 * @var array 83 */ 84 private $_latexMathmodeReplacements = array( 85 '\to' => '→', 86 '\bullet' => '•', 87 '\circ' => '°', 88 '\varepsilon' => 'ε', 89 '\vartheta' => 'ϑ', 90 '\varpi' => 'ϖ', 91 '\varrho' => 'ρ', 92 '\varsigma' => 'ς', 93 '\varphi' => 'φ', 94 '\cdot' => '·', 95 '\cdots' => '···', 96 '\rm ' => '' 97 ); 98 /** 99 * Array with Greek letters to replace the LaTeX commands in fields of entries 100 * 101 * The greek letters are searched for only in LaTeX math mode ($...$) 102 * 103 * They will be checked both for lower and upper letters, as these differ only 104 * in the first character of their respective name. 105 * 106 * Note: The LaTeX mathmode replacements (see above) will be done first, thus 107 * it is possible to use that to deal with special greek characters as 108 * \varepsilon. 109 * 110 * @access private 111 * @var array 112 */ 113 private $_greekLetters = array( 114 'alpha','beta','gamma','delta','epsilon', 115 'zeta','eta','theta','iota','kappa', 116 'lambda','mu','nu','xi','omicron', 117 'pi','rho','sigma','tau','upsilon', 118 'phi','chi','psi','omega', 119 ); 120 /** 121 * Array to store warnings 122 * 123 * @access public 124 * @var array 125 */ 126 public $warnings = array(); 127 /** 128 * Run-time configuration options 129 * 130 * @access private 131 * @var array 132 */ 133 private $_options; 134 /** 135 * Array with the "allowed" entry types 136 * 137 * @access public 138 * @var array 139 */ 140 public $allowedEntryTypes; 141 /** 142 * Author Format Strings 143 * 144 * @access public 145 * @var string 146 */ 147 public $authorstring; 148 149 /** 150 * List of SQL statements to be inserted at once 151 * 152 * @access private 153 * @var array 154 */ 155 private $_sqlStatements = array(); 156 157 /** 158 * Constructor 159 * 160 * @access public 161 * @return void 162 */ 163 function __construct($options = array()) 164 { 165 $this->_delimiters = array('"'=>'"', 166 '{'=>'}'); 167 $this->data = array(); 168 $this->content = ''; 169 //$this->_stripDelimiter = $stripDel; 170 //$this->_validate = $val; 171 $this->warnings = array(); 172 $this->_options = array( 173 'replaceLatex' => true, 174 'stripDelimiter' => true, 175 'validate' => true, 176 'unwrap' => false, 177 'wordWrapWidth' => false, 178 'wordWrapBreak' => "\n", 179 'wordWrapCut' => 0, 180 'removeCurlyBraces' => true, 181 'extractAuthors' => true, 182 ); 183 foreach ($options as $option => $value) { 184 $test = $this->setOption($option, $value); 185 } 186 $this->allowedEntryTypes = array( 187 'article', 188 'book', 189 'booklet', 190 'conference', 191 'inbook', 192 'incollection', 193 'inproceedings', 194 'manual', 195 'mastersthesis', 196 'misc', 197 'phdthesis', 198 'proceedings', 199 'techreport', 200 'unpublished' 201 ); 202 $this->authorstring = 'VON LAST, JR, FIRST'; 203 $this->authordelimiter = '; '; 204 } 205 206 /** 207 * Sets run-time configuration options 208 * 209 * @access public 210 * @param string $option option name 211 * @param mixed $value value for the option 212 * @return mixed true on success (DW msg on failure) 213 */ 214 public function setOption($option, $value) 215 { 216 $ret = true; 217 if (array_key_exists($option, $this->_options)) { 218 $this->_options[$option] = $value; 219 } else { 220 msg("Unknown option $option", 2); 221 $ret = false; 222 } 223 return $ret; 224 } 225 226 /** 227 * Reads a given BibTex File 228 * 229 * @access public 230 * @param string $filename Name of the file 231 * @return mixed true on success (DW msg on failure) 232 */ 233 public function loadFile($filename) 234 { 235 if (file_exists($filename)) { 236 if (($this->content = @file_get_contents($filename)) === false) { 237 msg("Could not open file $filename", 2); 238 } else { 239 $this->_pos = 0; 240 $this->_oldpos = 0; 241 return true; 242 } 243 } else { 244 msg("Could not find file $filename", 2); 245 } 246 } 247 248 /** 249 * Reads bibtex from a string variable 250 * 251 * @access public 252 * @param string $bib String containing bibtex 253 * @return boolean true 254 */ 255 public function loadString($bib) 256 { 257 $this->content = $bib; 258 $this->_pos = 0; 259 $this->_oldpos = 0; 260 return true; // For compatibility with loadFile 261 } 262 263 /** 264 * Parse bibliography stored in content and clear the content if the parsing is successful. 265 * 266 * @access public 267 * @return boolean true on success and PEAR_Error if there was a problem 268 */ 269 public function parseBibliography($sqlite = false) 270 { 271 //The amount of opening braces is compared to the amount of closing braces 272 //Braces inside comments are ignored 273 $this->warnings = array(); 274 $this->data = array(); 275 $valid = true; 276 $open = 0; 277 $entry = false; 278 $char = ''; 279 $lastchar = ''; 280 $buffer = ''; 281 $inField = false; 282 $openInField = 0; 283 $lastNonWsChar = ''; 284 for ($i = 0; $i < strlen($this->content); $i++) { 285 $char = substr($this->content, $i, 1); 286 if ((0 != $open) && ('@' == $char) && (!$inField)) { 287 if (!$this->_checkAt($buffer)) { 288 $this->_generateWarning('WARNING_MISSING_END_BRACE', '', $buffer); 289 //To correct the data we need to insert a closing brace 290 $char = '}'; 291 $i--; 292 } 293 } 294 if ((0 == $open) && ('@' == $char)) { //The beginning of an entry 295 $entry = true; 296 } elseif ($entry && ('{' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is opening 297 $open++; 298 if (!$inField && ($lastNonWsChar == '=')) { 299 $inField = true; 300 } elseif ($inField) { 301 $openInField++; 302 } 303 } elseif ($entry && ('}' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is closing 304 $open--; 305 if ($inField) { 306 $openInField--; 307 if ($openInField == 0) { 308 $inField = false; 309 } 310 } 311 if ($open < 0) { //More are closed than opened 312 $valid = false; 313 } 314 if (0 == $open) { //End of entry 315 $entry = false; 316 // TODO: Some check for duplicate keys and issuing a warning if so? 317 if ($sqlite) { 318 $this->_prepareSqlStatement($buffer); 319 } else { 320 $this->_storeEntryInClass($buffer); 321 } 322 $buffer = ''; 323 } 324 } 325 if ($entry) { //Inside entry 326 $buffer .= $char; 327 } 328 $lastchar = $char; 329 if ($char != ' ' && $char != '\t' && $char != '\n' && $char != '\r') { 330 $lastNonWsChar = $char; 331 } 332 } 333 //If open is one it may be possible that the last ending brace is missing 334 // TODO: Handle situation with using SQLite DB 335 if (1 == $open) { 336 $entrydata = $this->_parseEntry($buffer); 337 if (!$entrydata) { 338 $valid = false; 339 } else { 340 $this->data[] = $entrydata; 341 $buffer = ''; 342 $open = 0; 343 } 344 } 345 if ($sqlite) { 346 $this->_executeSqlStatements(); 347 } 348 //At this point the open should be zero 349 if (0 != $open) { 350 $valid = false; 351 } 352 //Are there multiple entries with the same cite? 353 // TODO: Meanwhile, as in both cases (SQLite and manual) bibtex keys are used as index, 354 // this situation shall no longer exist. Checking for duplicate keys needs be done above. 355 if ($this->_options['validate']) { 356 $cites = array(); 357 foreach ($this->data as $entry) { 358 $cites[] = $entry['cite']; 359 } 360 $unique = array_unique($cites); 361 if (sizeof($cites) != sizeof($unique)) { //Some values have not been unique! 362 $notuniques = array(); 363 for ($i = 0; $i < sizeof($cites); $i++) { 364 if ('' == $unique[$i]) { 365 $notuniques[] = $cites[$i]; 366 } 367 } 368 $this->_generateWarning('WARNING_MULTIPLE_ENTRIES', implode(',',$notuniques)); 369 } 370 } 371 if ($valid) { 372 $this->content = ''; 373 return true; 374 } else { 375 return false; 376 } 377 } 378 379 /** 380 * Parses what is stored in content and clears the content if the parsing is successful. 381 * 382 * @access public 383 * @return boolean true on success and PEAR_Error if there was a problem 384 */ 385 public function parse($sqlite = false) 386 { 387 //The amount of opening braces is compared to the amount of closing braces 388 //Braces inside comments are ignored 389 $this->warnings = array(); 390 $this->data = array(); 391 $valid = true; 392 $open = 0; 393 $entry = false; 394 $char = ''; 395 $lastchar = ''; 396 $buffer = ''; 397 for ($i = 0; $i < strlen($this->content); $i++) { 398 $char = substr($this->content, $i, 1); 399 if ((0 != $open) && ('@' == $char)) { 400 if (!$this->_checkAt($buffer)) { 401 $this->_generateWarning('WARNING_MISSING_END_BRACE', '', $buffer); 402 //To correct the data we need to insert a closing brace 403 $char = '}'; 404 $i--; 405 } 406 } 407 if ((0 == $open) && ('@' == $char)) { //The beginning of an entry 408 $entry = true; 409 } elseif ($entry && ('{' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is opening 410 $open++; 411 } elseif ($entry && ('}' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is closing 412 $open--; 413 if ($open < 0) { //More are closed than opened 414 $valid = false; 415 } 416 if (0 == $open) { //End of entry 417 $entry = false; 418 if ($sqlite) { 419 $this->_addEntryToSQLiteDB($buffer); 420 } else { 421 $entrydata = $this->_parseEntry($buffer); 422 if ($entrydata) { 423 $this->data[] = $entrydata; 424 } 425 } 426 $buffer = ''; 427 } 428 } 429 if ($entry) { //Inside entry 430 $buffer .= $char; 431 } 432 $lastchar = $char; 433 } 434 //If open is one it may be possible that the last ending brace is missing 435 // TODO: Handle situation with using SQLite DB 436 if (1 == $open) { 437 $entrydata = $this->_parseEntry($buffer); 438 if (!$entrydata) { 439 $valid = false; 440 } else { 441 $this->data[] = $entrydata; 442 $buffer = ''; 443 $open = 0; 444 } 445 } 446 //At this point the open should be zero 447 if (0 != $open) { 448 $valid = false; 449 } 450 //Are there multiple entries with the same cite? 451 if ($this->_options['validate']) { 452 $cites = array(); 453 foreach ($this->data as $entry) { 454 $cites[] = $entry['cite']; 455 } 456 $unique = array_unique($cites); 457 if (sizeof($cites) != sizeof($unique)) { //Some values have not been unique! 458 $notuniques = array(); 459 for ($i = 0; $i < sizeof($cites); $i++) { 460 if ('' == $unique[$i]) { 461 $notuniques[] = $cites[$i]; 462 } 463 } 464 $this->_generateWarning('WARNING_MULTIPLE_ENTRIES', implode(',',$notuniques)); 465 } 466 } 467 if ($valid) { 468 $this->content = ''; 469 return true; 470 } else { 471 return false; 472 } 473 } 474 475 /** 476 * Split entry in key and actual contents, call stringCallback for @string entries and bibItemCallback for all other entries. 477 * 478 * @param string $entry BibTeX entry, starting with @ and ending BEFORE the closing brace of the entry 479 * @param callable $stringCallback Will be called with two arguments (key, value) for @string entries 480 * @param callable $bibItemCallback Will be called with two arguments (key, full entry as string) for all non-@string entries 481 */ 482 private function _storeBibTeXEntry($entry, $stringCallback, $bibItemCallback) 483 { 484 if ('@string' == strtolower(substr($entry, 0, 7))) { 485 $matches = array(); 486 preg_match('/^@\w+\{(.+)/', $entry, $matches); 487 if (count($matches) > 0) { 488 $m = explode('=', $matches[1], 2); 489 $string = trim($m[0]); 490 $entry = substr(trim($m[1]), 1, -1); 491 call_user_func($stringCallback, $string, $entry); 492 return; 493 } 494 } else { 495 $entry = $entry.'}'; 496 // Look for key 497 $matches = array(); 498 preg_match('/^@(\w+)\{(.+),/', $entry, $matches); 499 if (count($matches) > 0) { 500 $entryType = $matches[1]; 501 $key = $matches[2]; 502 call_user_func($bibItemCallback, $key, $entry); 503 return; 504 } 505 } 506 throw new InvalidArgumentException('Could not parse entry "'.$entry.'"'); 507 } 508 509 /** 510 * Store given entry in this object's members 511 * 512 * @param string $entry BibTeX entry, starting with @ and ending BEFORE the closing brace of the entry 513 */ 514 private function _storeEntryInClass($entry) 515 { 516 $stringCallback = fn($key, $value) => $this->_strings[$key] = $value; 517 $bibItemCallback = fn($key, $value) => $this->entries[$key] = $value; 518 $this->_storeBibTeXEntry($entry, $stringCallback, $bibItemCallback); 519 } 520 521 /** 522 * Add/update entry in SQLite DB (immediately) 523 */ 524 private function _addEntryToSQLiteDB($entry) 525 { 526 $stringCallback = fn($key, $value) => $this->sqlite->query("INSERT OR REPLACE INTO strings (string, entry) VALUES (?,?)", $key, $value); 527 $bibItemCallback = fn($key, $value) => $this->sqlite->query("INSERT OR REPLACE INTO bibtex (key, entry) VALUES (?,?)", $key, $value); 528 $this->_storeBibTeXEntry($entry, $stringCallback, $bibItemCallback); 529 } 530 531 /** 532 * Prepare an SQL statement to insert/update $entry in the DB. 533 */ 534 private function _prepareSqlStatement($entry) 535 { 536 $stringCallback = fn($key, $value) => $this->_sqlStatements[] = array("INSERT OR REPLACE INTO strings (string, entry) VALUES (?,?)", array($key, $value)); 537 $bibItemCallback = fn($key, $value) => $this->_sqlStatements[] = array("INSERT OR REPLACE INTO bibtex (key, entry) VALUES (?,?)", array($key, $value)); 538 $this->_storeBibTeXEntry($entry, $stringCallback, $bibItemCallback); 539 } 540 541 /** 542 * Execute all statements in $this->_sqlStatments in a single transaction. 543 * 544 * A single transaction is MUCH faster than executing statements sequentially. 545 */ 546 private function _executeSqlStatements() 547 { 548 $pdo = $this->sqlite->getAdapter()->getPdo(); 549 try { 550 if(!$pdo->beginTransaction()) { 551 msg('Sqlite error when starting transaction.', -1); 552 return; 553 } 554 foreach ($this->_sqlStatements as $statement) { 555 list($sql, $params) = $statement; 556 $pdo_stmt = $pdo->prepare($sql); 557 $pdo_stmt->execute($params); 558 } 559 if(!$pdo->commit()) { 560 msg('Sqlite error during commit.', -1); 561 return; 562 } 563 } catch (PDOException $ex) { 564 $pdo->rollBack(); 565 throw $ex; // TODO handle this case, e.g., by falling back to single queries? 566 } 567 $this->_sqlStatements = array(); 568 } 569 570 /** 571 * Extracting the data of one bibtex entry 572 * 573 * The parse function splits the content into its entries. 574 * Then every entry is parsed by this function. 575 * It parses the entry backwards. 576 * First the last '=' is searched and the value extracted from that. 577 * A copy is made of the entry if warnings should be generated. This takes quite 578 * some memory but it is needed to get good warnings. If no warnings are generated 579 * then you don't have to worry about memory. 580 * Then the last ',' is searched and the field extracted from that. 581 * Again the entry is shortened. 582 * Finally after all field=>value pairs the cite and type is extraced and the 583 * authors are splitted. 584 * If there is a problem false is returned. 585 * 586 * @access private 587 * @param string $entry The entry 588 * @return array The representation of the entry or false if there is a problem 589 */ 590 private function _parseEntry($entry) 591 { 592 $entrycopy = ''; 593 if ($this->_options['validate']) { 594 $entrycopy = $entry; //We need a copy for printing the warnings 595 } 596 $ret = array('bibtex' => $entry.'}'); 597 if ('@string' == strtolower(substr($entry, 0, 7))) { 598 $matches = array(); 599 preg_match('/^@\w+\{(.+)/' ,$entry, $matches); 600 if ( count($matches) > 0 ) 601 { 602 $m = explode('=',$matches[1],2); 603 $this->_strings[trim($m[0])] = substr(trim($m[1]),1,-1); 604 } 605 } elseif ('@preamble' == strtolower(substr($entry, 0, 9))) { 606 //Preamble not yet supported! 607 if ($this->_options['validate']) { 608 $this->_generateWarning('PREAMBLE_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}'); 609 } 610 } else { 611 // Look for key 612 $matches = array(); 613 preg_match('/^@\w+\{([\w\d]+),/' ,$entry, $matches); 614 if ( count($matches) > 0 ) 615 { 616 $ret['entrykey'] = $matches[1]; 617 } 618 619 //Parsing all fields 620 while (strrpos($entry,'=') !== false) { 621 $position = strrpos($entry, '='); 622 //Checking that the equal sign is not quoted or is not inside a equation (For example in an abstract) 623 $proceed = true; 624 if (substr($entry, $position-1, 1) == '\\') { 625 $proceed = false; 626 } 627 if ($proceed) { 628 $proceed = $this->_checkEqualSign($entry, $position); 629 } 630 while (!$proceed) { 631 $substring = substr($entry, 0, $position); 632 $position = strrpos($substring,'='); 633 $proceed = true; 634 if (substr($entry, $position-1, 1) == '\\') { 635 $proceed = false; 636 } 637 if ($proceed) { 638 $proceed = $this->_checkEqualSign($entry, $position); 639 } 640 } 641 642 $value = trim(substr($entry, $position+1)); 643 $entry = substr($entry, 0, $position); 644 645 if (',' == substr($value, strlen($value)-1, 1)) { 646 $value = substr($value, 0, -1); 647 } 648 if ($this->_options['validate']) { 649 $this->_validateValue($value, $entrycopy); 650 } 651 652 // Handle string replacements 653 // IMPORTANT: Must precede stripDelimiter call 654 if (!in_array(substr($value,0,1),array_keys($this->_delimiters))) { 655 if (!empty($this->sqlite)) { 656 $stringReplacement = $this->sqlite->res2arr($this->sqlite->query("SELECT entry FROM strings WHERE string = ?",$value)); 657 if (!empty($stringReplacement)) { 658 $value = $stringReplacement[0]['entry']; 659 } 660 } elseif (array_key_exists($value,$this->_strings)) { 661 $value = $this->_strings[$value]; 662 } 663 } 664 665 if ($this->_options['replaceLatex']) { 666 $value = $this->_replaceLatex($value); 667 } 668 669 if ($this->_options['stripDelimiter']) { 670 $value = $this->_stripDelimiter($value); 671 } 672 if ($this->_options['unwrap']) { 673 $value = $this->_unwrap($value); 674 } 675 if ($this->_options['removeCurlyBraces']) { 676 $value = $this->_removeCurlyBraces($value); 677 } 678 679 $position = strrpos($entry, ','); 680 $field = strtolower(trim(substr($entry, $position+1))); 681 $ret[$field] = $value; 682 $entry = substr($entry, 0, $position); 683 } 684 //Parsing cite and entry type 685 $arr = explode('{', $entry); 686 $ret['cite'] = trim($arr[1]); 687 $ret['entrytype'] = strtolower(trim($arr[0])); 688 if ('@' == $ret['entrytype'][0]) { 689 $ret['entrytype'] = substr($ret['entrytype'], 1); 690 } 691 if ($this->_options['validate']) { 692 if (!$this->_checkAllowedEntryType($ret['entrytype'])) { 693 $this->_generateWarning('WARNING_NOT_ALLOWED_ENTRY_TYPE', $ret['entrytype'], $entry.'}'); 694 } 695 } 696 //Handling the authors 697 if (in_array('author', array_keys($ret)) && $this->_options['extractAuthors']) { 698 // Array with all the authors in $ret['authors'] 699 $ret['authors'] = $this->_extractAuthors($ret['author']); 700 // AuthorYear for sorting purposes in $ref['authoryear'] 701 if (empty($ret['year'])) { 702 if (!empty($ret['date']) && preg_match('|(\d\d\d\d).*|U', $ret['date'], $matches)) { 703 $ret['year'] = $matches[1]; 704 } else { 705 $ret['year'] = '[n.d.]'; 706 } 707 } 708 $ret['authoryear'] = $ret['authors'][0]['last'] . $ret['year']; 709 // Nicely formatted authors list in $ret['author'] 710 $tmparray = array(); 711 foreach ($ret['authors'] as $authorentry) { 712 $tmparray[] = $this->_formatAuthor($authorentry); 713 } 714 $ret['author'] = implode($this->authordelimiter, $tmparray); 715 } 716 //Handling the editors 717 if (in_array('editor', array_keys($ret)) && $this->_options['extractAuthors']) { 718 // Array with all the editors in $ret['editors'] 719 $ret['editors'] = $this->_extractAuthors($ret['editor']); 720 // Nicely formatted authors list in $ret['editor'] 721 $tmparray = array(); 722 foreach ($ret['editors'] as $editorentry) { 723 $tmparray[] = $this->_formatAuthor($editorentry); 724 } 725 $ret['editor'] = implode($this->authordelimiter, $tmparray); 726 } 727 } 728 return $ret; 729 } 730 731 /** 732 * Parsing for a subset of LaTeX code that can be found more often in BibTeX entries 733 * 734 * TODO: Extend this as necessary 735 */ 736 private function _replaceLatex($entry) { 737 // \emph{...} -> <em>...</em> 738 $entry = preg_replace('/\\\emph\{([^\}]+)\}/', '<em>$1</em>', $entry); 739 // \textbf{...} -> <strong>...</strong> 740 $entry = preg_replace('/\\\textbf\{([^\}]+)\}/', '<strong>$1</strong>', $entry); 741 // quotation marks 742 $entry = str_replace("``",""",$entry); 743 $entry = str_replace("''",""",$entry); 744 // \& -> & 745 $entry = str_replace("\&","&",$entry); 746 // \% -> %; 747 $entry = str_replace("\%","%;",$entry); 748 // "\ " -> " "; 749 $entry = str_replace("\ "," ",$entry); 750 // --- -> — 751 $entry = str_replace("---","—",$entry); 752 // -- -> - 753 $entry = str_replace("--","-",$entry); 754 // \url{...} -> ... 755 $entry = preg_replace("/\\\url\{([^\}]+)\}/",'<a href="\\1">\\1</a>',$entry); 756 // Handle umlauts 757 $entry = preg_replace('/\\\"\{([aeiouyAEIOU])\}/',"&\\1uml;",$entry); 758 $entry = preg_replace('/\\\"([aeiouyAEIOU])/',"&\\1uml;",$entry); 759 $entry = str_replace("\ss","ß",$entry); 760 $entry = str_replace('"s',"ß",$entry); 761 // Handle accents 762 // Handle acute 763 $entry = str_replace("\'c","ć",$entry); 764 $entry = preg_replace("/\\\'(.?)/","&\\1acute;",$entry); 765 // Handle grave 766 $entry = preg_replace("/\\\`(.?)/","&\\1grave;",$entry); 767 // Handle circumflex 768 $entry = preg_replace("/\\\(\^)(.?)/","&\\2circ;",$entry); 769 // Handle hatschek 770 $entry = str_replace('\v{z}',"ž",$entry); 771 $entry = str_replace('\v{c}',"č",$entry); 772 // Handle cedille 773 $entry = preg_replace("/\\\c\{(.?)\}/","\\1̧",$entry); 774 // Handle tilde 775 $entry = preg_replace("/\\\~(.?)/","&\\1tilde;",$entry); 776 // ae and oe ligatures 777 $entry = preg_replace('/\\\([aoAO]{1}[eE]{1})/',"&\\1lig;",$entry); 778 // Handle i without dot 779 $entry = str_replace("\i","ı",$entry); 780 // Handle u with bar 781 $entry = str_replace("\={u}","ū",$entry); 782 // Handle \l and \L 783 $entry = str_replace("\l","ł",$entry); 784 $entry = str_replace("\L","Ł",$entry); 785 786 // \o and \O 787 $entry = preg_replace('/\\\([oO]{1})/',"&\\1slash;",$entry); 788 // \aa and \AA 789 $entry = preg_replace('/\\\([aA]{1})([aA]{1})/',"&\\1ring;",$entry); 790 // Replace remaining "~" with " " 791 $entry = str_replace("~"," ",$entry); 792 // Handle math ($...$) 793 preg_match('/\$([^$]+)\$/' ,$entry, $matches); 794 if ( count($matches) > 0 ) { 795 foreach ($matches as $match) { 796 // Fix superscript and subscript 797 $entry = preg_replace("/\^\{([^\}]+)\}/","<sup>\\1</sup>",$entry); 798 $entry = preg_replace("/_\{([^\}]+)\}/","<sub>\\1</sub>",$entry); 799 $entry = preg_replace("/\^([\\\]{1}\w+)/","<sup>\\1</sup>",$entry); 800 $entry = preg_replace("/_([\\\]{1}\w+)/","<sub>\\1</sub>",$entry); 801 $entry = preg_replace("/\^([^\\\]{1})/","<sup>\\1</sup>",$entry); 802 $entry = preg_replace("/_([^\\\]{1})/","<sub>\\1</sub>",$entry); 803 // Replace LaTeX math commands, e.g. "\to" 804 foreach ($this->_latexMathmodeReplacements as $orig => $repl) { 805 $entry = str_replace($orig,$repl,$entry); 806 } 807 // Replace both lowercase and uppercase Greek letters 808 foreach ($this->_greekLetters as $letter) { 809 $upLatex = '\\' . ucfirst($letter); 810 $upHtml = "&" . ucfirst($letter) . ";"; 811 $loLatex = '\\' . $letter; 812 $loHtml = "&" . $letter . ";"; 813 $entry = str_replace($upLatex,$upHtml,$entry); 814 $entry = str_replace($loLatex,$loHtml,$entry); 815 } 816 } 817 // Finally, remove the LaTeX mathmode $ delimiters 818 $entry = str_replace("$","",$entry); 819 } 820 return $entry; 821 } 822 823 /** 824 * Checking whether the position of the '=' is correct 825 * 826 * Sometimes there is a problem if a '=' is used inside an entry (for example abstract). 827 * This method checks if the '=' is outside braces then the '=' is correct and true is returned. 828 * If the '=' is inside braces it contains to a equation and therefore false is returned. 829 * 830 * @access private 831 * @param string $entry The text of the whole remaining entry 832 * @param int the current used place of the '=' 833 * @return bool true if the '=' is correct, false if it contains to an equation 834 */ 835 private function _checkEqualSign($entry, $position) 836 { 837 $ret = true; 838 //This is getting tricky 839 //We check the string backwards until the position and count the closing an opening braces 840 //If we reach the position the amount of opening and closing braces should be equal 841 $length = strlen($entry); 842 $open = 0; 843 for ($i = $length-1; $i >= $position; $i--) { 844 $precedingchar = substr($entry, $i-1, 1); 845 $char = substr($entry, $i, 1); 846 if (('{' == $char) && ('\\' != $precedingchar)) { 847 $open++; 848 } 849 if (('}' == $char) && ('\\' != $precedingchar)) { 850 $open--; 851 } 852 } 853 if (0 != $open) { 854 $ret = false; 855 } 856 //There is still the posibility that the entry is delimited by double quotes. 857 //Then it is possible that the braces are equal even if the '=' is in an equation. 858 if ($ret) { 859 $entrycopy = trim($entry); 860 $lastchar = $entrycopy[strlen($entrycopy)-1]; 861 if (',' == $lastchar) { 862 $lastchar = $entrycopy[strlen($entrycopy)-2]; 863 } 864 if ('"' == $lastchar) { 865 //The return value is set to false 866 //If we find the closing " before the '=' it is set to true again. 867 //Remember we begin to search the entry backwards so the " has to show up twice - ending and beginning delimiter 868 $ret = false; 869 $found = 0; 870 for ($i = $length; $i >= $position; $i--) { 871 $precedingchar = substr($entry, $i-1, 1); 872 $char = substr($entry, $i, 1); 873 if (('"' == $char) && ('\\' != $precedingchar)) { 874 $found++; 875 } 876 if (2 == $found) { 877 $ret = true; 878 break; 879 } 880 } 881 } 882 } 883 return $ret; 884 } 885 886 /** 887 * Checking if the entry type is allowed 888 * 889 * @access private 890 * @param string $entry The entry to check 891 * @return bool true if allowed, false otherwise 892 */ 893 private function _checkAllowedEntryType($entry) 894 { 895 return in_array($entry, $this->allowedEntryTypes); 896 } 897 898 /** 899 * Checking whether an at is outside an entry 900 * 901 * Sometimes an entry misses an entry brace. Then the at of the next entry seems to be 902 * inside an entry. This is checked here. When it is most likely that the at is an opening 903 * at of the next entry this method returns true. 904 * 905 * @access private 906 * @param string $entry The text of the entry until the at 907 * @return bool true if the at is correct, false if the at is likely to begin the next entry. 908 */ 909 private function _checkAt($entry) 910 { 911 $ret = false; 912 $opening = array_keys($this->_delimiters); 913 $closing = array_values($this->_delimiters); 914 //Getting the value (at is only allowd in values) 915 if (strrpos($entry,'=') !== false) { 916 $position = strrpos($entry, '='); 917 $proceed = true; 918 if (substr($entry, $position-1, 1) == '\\') { 919 $proceed = false; 920 } 921 while (!$proceed) { 922 $substring = substr($entry, 0, $position); 923 $position = strrpos($substring,'='); 924 $proceed = true; 925 if (substr($entry, $position-1, 1) == '\\') { 926 $proceed = false; 927 } 928 } 929 $value = trim(substr($entry, $position+1)); 930 $open = 0; 931 $char = ''; 932 $lastchar = ''; 933 for ($i = 0; $i < strlen($value); $i++) { 934 $char = substr($this->content, $i, 1); 935 if (in_array($char, $opening) && ('\\' != $lastchar)) { 936 $open++; 937 } elseif (in_array($char, $closing) && ('\\' != $lastchar)) { 938 $open--; 939 } 940 $lastchar = $char; 941 } 942 //if open is grater zero were are inside an entry 943 if ($open>0) { 944 $ret = true; 945 } 946 } 947 return $ret; 948 } 949 950 /** 951 * Stripping Delimiter 952 * 953 * @access private 954 * @param string $entry The entry where the Delimiter should be stripped from 955 * @return string Stripped entry 956 */ 957 private function _stripDelimiter($entry) 958 { 959 $beginningdels = array_keys($this->_delimiters); 960 $length = strlen($entry); 961 $firstchar = substr($entry, 0, 1); 962 $lastchar = substr($entry, -1, 1); 963 while (in_array($firstchar, $beginningdels)) { //The first character is an opening delimiter 964 if ($lastchar == $this->_delimiters[$firstchar]) { //Matches to closing Delimiter 965 $entry = substr($entry, 1, -1); 966 } else { 967 break; 968 } 969 $firstchar = substr($entry, 0, 1); 970 $lastchar = substr($entry, -1, 1); 971 } 972 return $entry; 973 } 974 975 /** 976 * Unwrapping entry 977 * 978 * @access private 979 * @param string $entry The entry to unwrap 980 * @return string unwrapped entry 981 */ 982 private function _unwrap($entry) 983 { 984 $entry = preg_replace('/\s+/', ' ', $entry); 985 return trim($entry); 986 } 987 988 /** 989 * Wordwrap an entry 990 * 991 * @access private 992 * @param string $entry The entry to wrap 993 * @return string wrapped entry 994 */ 995 private function _wordwrap($entry) 996 { 997 if ( (''!=$entry) && (is_string($entry)) ) { 998 $entry = wordwrap($entry, $this->_options['wordWrapWidth'], $this->_options['wordWrapBreak'], $this->_options['wordWrapCut']); 999 } 1000 return $entry; 1001 } 1002 1003 /** 1004 * Extracting the authors 1005 * 1006 * @access private 1007 * @param string $entry The entry with the authors 1008 * @return array the extracted authors 1009 */ 1010 private function _extractAuthors($entry) { 1011 $entry = $this->_unwrap($entry); 1012 // Replace AND with and in author list - added 2010-12-12, till@till-bisup.de 1013 $entry = str_replace(' AND ',' and ',$entry); 1014 $authorarray = array(); 1015 $authorarray = explode(' and ', $entry); 1016 for ($i = 0; $i < sizeof($authorarray); $i++) { 1017 $author = trim($authorarray[$i]); 1018 /*The first version of how an author could be written (First von Last) 1019 has no commas in it*/ 1020 $first = ''; 1021 $von = ''; 1022 $last = ''; 1023 $jr = ''; 1024 if (strpos($author, ',') === false) { 1025 $tmparray = array(); 1026 $tmparray = explode(' ', $author); 1027 $size = sizeof($tmparray); 1028 if (1 == $size) { //There is only a last 1029 $last = $tmparray[0]; 1030 } elseif (2 == $size) { //There is a first and a last 1031 $first = $tmparray[0]; 1032 $last = $tmparray[1]; 1033 } else { 1034 $invon = false; 1035 $inlast = false; 1036 for ($j=0; $j<($size-1); $j++) { 1037 if ($inlast) { 1038 $last .= ' '.$tmparray[$j]; 1039 } elseif ($invon) { 1040 $case = $this->_determineCase($tmparray[$j]); 1041 if ((0 == $case) || (-1 == $case)) { //Change from von to last 1042 //You only change when there is no more lower case there 1043 $islast = true; 1044 for ($k=($j+1); $k<($size-1); $k++) { 1045 $futurecase = $this->_determineCase($tmparray[$k]); 1046 if ($case == PHP_INT_MAX) { 1047 // Error case. IGNORE? 1048 } elseif (0 == $futurecase) { 1049 $islast = false; 1050 } 1051 } 1052 if ($islast) { 1053 $inlast = true; 1054 if (-1 == $case) { //Caseless belongs to the last 1055 $last .= ' '.$tmparray[$j]; 1056 } else { 1057 $von .= ' '.$tmparray[$j]; 1058 } 1059 } else { 1060 $von .= ' '.$tmparray[$j]; 1061 } 1062 } else { 1063 $von .= ' '.$tmparray[$j]; 1064 } 1065 } else { 1066 $case = $this->_determineCase($tmparray[$j]); 1067 if (0 == $case) { //Change from first to von 1068 $invon = true; 1069 $von .= ' '.$tmparray[$j]; 1070 } else { 1071 $first .= ' '.$tmparray[$j]; 1072 } 1073 } 1074 } 1075 //The last entry is always the last! 1076 $last .= ' '.$tmparray[$size-1]; 1077 } 1078 } else { //Version 2 and 3 1079 $tmparray = array(); 1080 $tmparray = explode(',', $author); 1081 //The first entry must contain von and last 1082 $vonlastarray = array(); 1083 $vonlastarray = explode(' ', $tmparray[0]); 1084 $size = sizeof($vonlastarray); 1085 if (1==$size) { //Only one entry->got to be the last 1086 $last = $vonlastarray[0]; 1087 } else { 1088 $inlast = false; 1089 for ($j=0; $j<($size-1); $j++) { 1090 if ($inlast) { 1091 $last .= ' '.$vonlastarray[$j]; 1092 } else { 1093 if (0 != ($this->_determineCase($vonlastarray[$j]))) { //Change from von to last 1094 $islast = true; 1095 for ($k=($j+1); $k<($size-1); $k++) { 1096 $this->_determineCase($vonlastarray[$k]); 1097 $case = $this->_determineCase($vonlastarray[$k]); 1098 if (0 == $case) { 1099 $islast = false; 1100 } 1101 } 1102 if ($islast) { 1103 $inlast = true; 1104 $last .= ' '.$vonlastarray[$j]; 1105 } else { 1106 $von .= ' '.$vonlastarray[$j]; 1107 } 1108 } else { 1109 $von .= ' '.$vonlastarray[$j]; 1110 } 1111 } 1112 } 1113 $last .= ' '.$vonlastarray[$size-1]; 1114 } 1115 //Now we check if it is version three (three entries in the array (two commas) 1116 if (3==sizeof($tmparray)) { 1117 $jr = $tmparray[1]; 1118 } 1119 //Everything in the last entry is first 1120 $first = $tmparray[sizeof($tmparray)-1]; 1121 } 1122 $authorarray[$i] = array('first'=>trim($first), 'von'=>trim($von), 'last'=>trim($last), 'jr'=>trim($jr)); 1123 } 1124 return $authorarray; 1125 } 1126 1127 /** 1128 * Case Determination according to the needs of BibTex 1129 * 1130 * To parse the Author(s) correctly a determination is needed 1131 * to get the Case of a word. There are three possible values: 1132 * - Upper Case (return value 1) 1133 * - Lower Case (return value 0) 1134 * - Caseless (return value -1) 1135 * 1136 * @access private 1137 * @param string $word 1138 * @return int The Case or PHP_INT_MAX if there was a problem 1139 */ 1140 private function _determineCase($word) { 1141 $ret = -1; 1142 $trimmedword = trim ($word); 1143 /*We need this variable. Without the next of would not work 1144 (trim changes the variable automatically to a string!)*/ 1145 if (is_string($word) && (strlen($trimmedword) > 0)) { 1146 $i = 0; 1147 $found = false; 1148 $openbrace = 0; 1149 while (!$found && ($i <= strlen($word))) { 1150 $letter = substr($trimmedword, $i, 1); 1151 $ord = ord($letter); 1152 if ($ord == 123) { //Open brace 1153 $openbrace++; 1154 } 1155 if ($ord == 125) { //Closing brace 1156 $openbrace--; 1157 } 1158 if (($ord>=65) && ($ord<=90) && (0==$openbrace)) { //The first character is uppercase 1159 $ret = 1; 1160 $found = true; 1161 } elseif ( ($ord>=97) && ($ord<=122) && (0==$openbrace) ) { //The first character is lowercase 1162 $ret = 0; 1163 $found = true; 1164 } else { //Not yet found 1165 $i++; 1166 } 1167 } 1168 } else { 1169 $ret = PHP_INT_MAX; 1170// $ret = PEAR::raiseError('Could not determine case on word: '.(string)$word); 1171 } 1172 return $ret; 1173 } 1174 1175 /** 1176 * Validation of a value 1177 * 1178 * There may be several problems with the value of a field. 1179 * These problems exist but do not break the parsing. 1180 * If a problem is detected a warning is appended to the array warnings. 1181 * 1182 * @access private 1183 * @param string $entry The entry aka one line which which should be validated 1184 * @param string $wholeentry The whole BibTex Entry which the one line is part of 1185 * @return void 1186 */ 1187 private function _validateValue($entry, $wholeentry) 1188 { 1189 //There is no @ allowed if the entry is enclosed by braces 1190 if (preg_match('/^{.*@.*}$/', $entry)) { 1191 $this->_generateWarning('WARNING_AT_IN_BRACES', $entry, $wholeentry); 1192 } 1193 //No escaped " allowed if the entry is enclosed by double quotes 1194 if (preg_match('/^\".*\\".*\"$/', $entry)) { 1195 $this->_generateWarning('WARNING_ESCAPED_DOUBLE_QUOTE_INSIDE_DOUBLE_QUOTES', $entry, $wholeentry); 1196 } 1197 //Amount of Braces is not correct 1198 $open = 0; 1199 $lastchar = ''; 1200 $char = ''; 1201 for ($i = 0; $i < strlen($entry); $i++) { 1202 $char = substr($entry, $i, 1); 1203 if (('{' == $char) && ('\\' != $lastchar)) { 1204 $open++; 1205 } 1206 if (('}' == $char) && ('\\' != $lastchar)) { 1207 $open--; 1208 } 1209 $lastchar = $char; 1210 } 1211 if (0 != $open) { 1212 $this->_generateWarning('WARNING_UNBALANCED_AMOUNT_OF_BRACES', $entry, $wholeentry); 1213 } 1214 } 1215 1216 /** 1217 * Remove curly braces from entry 1218 * 1219 * @access private 1220 * @param string $value The value in which curly braces to be removed 1221 * @param string Value with removed curly braces 1222 */ 1223 private function _removeCurlyBraces($value) 1224 { 1225 //First we save the delimiters 1226 $beginningdels = array_keys($this->_delimiters); 1227 $firstchar = substr($value, 0, 1); 1228 $lastchar = substr($value, -1, 1); 1229 $begin = ''; 1230 $end = ''; 1231 while (in_array($firstchar, $beginningdels)) { //The first character is an opening delimiter 1232 if ($lastchar == $this->_delimiters[$firstchar]) { //Matches to closing Delimiter 1233 $begin .= $firstchar; 1234 $end .= $lastchar; 1235 $value = substr($value, 1, -1); 1236 } else { 1237 break; 1238 } 1239 $firstchar = substr($value, 0, 1); 1240 $lastchar = substr($value, -1, 1); 1241 } 1242 //Now we get rid of the curly braces 1243 $value = preg_replace('/[\{\}]/', '', $value); 1244 //Reattach delimiters 1245 $value = $begin.$value.$end; 1246 return $value; 1247 } 1248 1249 /** 1250 * Generates a warning 1251 * 1252 * @access private 1253 * @param string $type The type of the warning 1254 * @param string $entry The line of the entry where the warning occurred 1255 * @param string $wholeentry OPTIONAL The whole entry where the warning occurred 1256 */ 1257 private function _generateWarning($type, $entry, $wholeentry='') 1258 { 1259 $warning['warning'] = $type; 1260 $warning['entry'] = $entry; 1261 $warning['wholeentry'] = $wholeentry; 1262 $this->warnings[] = $warning; 1263 } 1264 1265 /** 1266 * Cleares all warnings 1267 * 1268 * @access public 1269 */ 1270 public function clearWarnings() 1271 { 1272 $this->warnings = array(); 1273 } 1274 1275 /** 1276 * Is there a warning? 1277 * 1278 * @access public 1279 * @return true if there is, false otherwise 1280 */ 1281 public function hasWarning() 1282 { 1283 if (sizeof($this->warnings)>0) return true; 1284 else return false; 1285 } 1286 1287 /** 1288 * Returns the author formatted 1289 * 1290 * The Author is formatted as setted in the authorstring 1291 * 1292 * @access private 1293 * @param array $array Author array 1294 * @return string the formatted author string 1295 */ 1296 private function _formatAuthor($array) 1297 { 1298 if (!array_key_exists('von', $array)) { 1299 $array['von'] = ''; 1300 } else { 1301 $array['von'] = trim($array['von']); 1302 } 1303 if (!array_key_exists('last', $array)) { 1304 $array['last'] = ''; 1305 } else { 1306 $array['last'] = trim($array['last']); 1307 } 1308 if (!array_key_exists('jr', $array)) { 1309 $array['jr'] = ''; 1310 } else { 1311 $array['jr'] = trim($array['jr']); 1312 } 1313 if (!array_key_exists('first', $array)) { 1314 $array['first'] = ''; 1315 } else { 1316 $array['first'] = trim($array['first']); 1317 } 1318 $ret = $this->authorstring; 1319 $ret = str_replace("VON", $array['von'], $ret); 1320 $ret = str_replace("LAST", $array['last'], $ret); 1321 // Assuming that "jr" is always separated by a comma 1322 if (!empty($array['jr'])) { 1323 $ret = str_replace("JR", $array['jr'], $ret); 1324 } else { 1325 $ret = str_replace(", JR", '', $ret); 1326 } 1327 $ret = str_replace("FIRST", $array['first'], $ret); 1328 return trim($ret); 1329 } 1330 1331} 1332?> 1333