1<?php 2 /** 3 * The following changes have been made by Raphael Reitzig, 2010-2012: 4 * - fixed spelling (l184) 5 * - added source bibtex to entry in data array (l380) 6 * - added entry key to entry in data array if present (l394 ff) 7 * - fixed brace removal (l893) 8 * - removed inclusion of PEAR.PHP as we handle this caller-site 9 * - applied patch from http://pear.php.net/bugs/bug.php?id=14442 10 * - Fixed bug in l398: entry keys could previously contain only letters and 11 * digits; everything is allowed now. 12 * - Fixed parsing of authors (comma-less version was broken) 13 * - Added printable and comparable variants to author arrays 14 * - Added printable and comparable variants of authors to entries 15 */ 16 17 /** 18 * Class for working with BibTex data 19 * 20 * A class which provides common methods to access and 21 * create Strings in BibTex format 22 * 23 * PHP versions 4 and 5 24 * 25 * LICENSE: This source file is subject to version 3.0 of the PHP license 26 * that is available through the world-wide-web at the following URI: 27 * http://www.php.net/license/3_0.txt. If you did not receive a copy of 28 * the PHP License and are unable to obtain it through the web, please 29 * send a note to license@php.net so we can mail you a copy immediately. 30 * 31 * @category Structures 32 * @package Structures_BibTex 33 * @author Elmar Pitschke <elmar.pitschke@gmx.de> 34 * @copyright 1997-2005 The PHP Group 35 * @license http://www.php.net/license/3_0.txt PHP License 3.0 36 * @version CVS: $Id: BibTex.php 304756 2010-10-25 10:19:43Z clockwerx $ 37 * @link http://pear.php.net/package/Structures_BibTex 38 */ 39 40//require_once('PEAR.php'); 41/** 42 * Structures_BibTex 43 * 44 * A class which provides common methods to access and 45 * create Strings in BibTex format. 46 * Example 1: Parsing a BibTex File and returning the number of entries 47 * <code> 48 * $bibtex = new Structures_BibTex(); 49 * $ret = $bibtex->loadFile('foo.bib'); 50 * if (PEAR::isError($ret)) { 51 * die($ret->getMessage()); 52 * } 53 * $bibtex->parse(); 54 * print "There are ".$bibtex->amount()." entries"; 55 * </code> 56 * Example 2: Parsing a BibTex File and getting all Titles 57 * <code> 58 * $bibtex = new Structures_BibTex(); 59 * $ret = $bibtex->loadFile('bibtex.bib'); 60 * if (PEAR::isError($ret)) { 61 * die($ret->getMessage()); 62 * } 63 * $bibtex->parse(); 64 * foreach ($bibtex->data as $entry) { 65 * print $entry['title']."<br />"; 66 * } 67 * </code> 68 * Example 3: Adding an entry and printing it in BibTex Format 69 * <code> 70 * $bibtex = new Structures_BibTex(); 71 * $addarray = array(); 72 * $addarray['entrytype'] = 'Article'; 73 * $addarray['cite'] = 'art2'; 74 * $addarray['title'] = 'Titel2'; 75 * $addarray['author'][0]['first'] = 'John'; 76 * $addarray['author'][0]['last'] = 'Doe'; 77 * $addarray['author'][1]['first'] = 'Jane'; 78 * $addarray['author'][1]['last'] = 'Doe'; 79 * $bibtex->addEntry($addarray); 80 * print nl2br($bibtex->bibTex()); 81 * </code> 82 * 83 * @category Structures 84 * @package Structures_BibTex 85 * @author Elmar Pitschke <elmar.pitschke@gmx.de> 86 * @copyright 1997-2005 The PHP Group 87 * @license http://www.php.net/license/3_0.txt PHP License 3.0 88 * @version Release: @package_version@ 89 * @link http://pear.php.net/Structures/Structure_BibTex 90 */ 91class Structures_BibTex 92{ 93 /** 94 * Array with the BibTex Data 95 * 96 * @access public 97 * @var array 98 */ 99 var $data; 100 /** 101 * String with the BibTex content 102 * 103 * @access public 104 * @var string 105 */ 106 var $content; 107 /** 108 * Array with possible Delimiters for the entries 109 * 110 * @access private 111 * @var array 112 */ 113 var $_delimiters; 114 /** 115 * Array to store warnings 116 * 117 * @access public 118 * @var array 119 */ 120 var $warnings; 121 /** 122 * Run-time configuration options 123 * 124 * @access private 125 * @var array 126 */ 127 var $_options; 128 /** 129 * RTF Format String 130 * 131 * @access public 132 * @var string 133 */ 134 var $rtfstring; 135 /** 136 * HTML Format String 137 * 138 * @access public 139 * @var string 140 */ 141 var $htmlstring; 142 /** 143 * Array with the "allowed" entry types 144 * 145 * @access public 146 * @var array 147 */ 148 var $allowedEntryTypes; 149 /** 150 * Author Format Strings 151 * 152 * @access public 153 * @var string 154 */ 155 var $authorstring; 156 157 /** 158 * Constructor 159 * 160 * @access public 161 * @return void 162 */ 163 function Structures_BibTex($options = array()) 164 { 165 $this->_delimiters = array('"'=>'"', 166 '{'=>'}'); 167 $this->data = array(); 168 $this->content = ''; 169 //$this->_stripDelimiter = $stripDel; 170 //$this->_validate = $val; 171 $this->warnings = array(); 172 $this->_options = array( 173 'stripDelimiter' => true, 174 'validate' => true, 175 'unwrap' => false, 176 'wordWrapWidth' => false, 177 'wordWrapBreak' => "\n", 178 'wordWrapCut' => 0, 179 'removeCurlyBraces' => false, 180 'extractAuthors' => true, 181 ); 182 foreach ($options as $option => $value) { 183 $test = $this->setOption($option, $value); 184 if (PEAR::isError($test)) { 185 //Currently nothing is done here, but it could for example raise an warning 186 } 187 } 188 $this->rtfstring = 'AUTHORS, "{\b TITLE}", {\i JOURNAL}, YEAR'; 189 $this->htmlstring = 'AUTHORS, "<strong>TITLE</strong>", <em>JOURNAL</em>, YEAR<br />'; 190 $this->allowedEntryTypes = array( 191 'article', 192 'book', 193 'booklet', 194 'conference', 195 'inbook', 196 'incollection', 197 'inproceedings', 198 'manual', 199 'mastersthesis', 200 'misc', 201 'phdthesis', 202 'proceedings', 203 'techreport', 204 'unpublished' 205 ); 206 $this->authorstring = 'VON LAST, JR, FIRST'; 207 } 208 209 /** 210 * Sets run-time configuration options 211 * 212 * @access public 213 * @param string $option option name 214 * @param mixed $value value for the option 215 * @return mixed true on success PEAR_Error on failure 216 */ 217 function setOption($option, $value) 218 { 219 $ret = true; 220 if (array_key_exists($option, $this->_options)) { 221 $this->_options[$option] = $value; 222 } else { 223 $ret = PEAR::raiseError('Unknown option '.$option); 224 } 225 return $ret; 226 } 227 228 /** 229 * Reads a given BibTex File 230 * 231 * @access public 232 * @param string $filename Name of the file 233 * @return mixed true on success PEAR_Error on failure 234 */ 235 function loadFile($filename) 236 { 237 if (file_exists($filename)) { 238 if (($this->content = @file_get_contents($filename)) === false) { 239 return PEAR::raiseError('Could not open file '.$filename); 240 } else { 241 $this->_pos = 0; 242 $this->_oldpos = 0; 243 return true; 244 } 245 } else { 246 return PEAR::raiseError('Could not find file '.$filename); 247 } 248 } 249 250 /** 251 * Reads bibtex from a string variable 252 * 253 * @access public 254 * @param string $bib String containing bibtex 255 * @return boolean true 256 */ 257 function loadString($bib) 258 { 259 $this->content = $bib; 260 $this->_pos = 0; 261 $this->_oldpos = 0; 262 return true; // For compatibility with loadFile 263 } 264 265 /** 266 * Parses what is stored in content and clears the content if the parsing is successfull. 267 * 268 * @access public 269 * @return boolean true on success and PEAR_Error if there was a problem 270 */ 271 function parse() 272 { 273 //The amount of opening braces is compared to the amount of closing braces 274 //Braces inside comments are ignored 275 $this->warnings = array(); 276 $this->data = array(); 277 $valid = true; 278 $open = 0; 279 $entry = false; 280 $char = ''; 281 $lastchar = ''; 282 $buffer = ''; 283 for ($i = 0; $i < strlen($this->content); $i++) { 284 $char = substr($this->content, $i, 1); 285 if ((0 != $open) && ('@' == $char)) { 286 if (!$this->_checkAt($buffer)) { 287 $this->_generateWarning('WARNING_MISSING_END_BRACE', '', $buffer); 288 //To correct the data we need to insert a closing brace 289 $char = '}'; 290 $i--; 291 } 292 } 293 if ((0 == $open) && ('@' == $char)) { //The beginning of an entry 294 $entry = true; 295 } elseif ($entry && ('{' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is opening 296 $open++; 297 } elseif ($entry && ('}' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is closing 298 $open--; 299 if ($open < 0) { //More are closed than opened 300 $valid = false; 301 } 302 if (0 == $open) { //End of entry 303 $entry = false; 304 $entrydata = $this->_parseEntry($buffer); 305 if (!$entrydata) { 306 /** 307 * This is not yet used. 308 * We are here if the Entry is either not correct or not supported. 309 * But this should already generate a warning. 310 * Therefore it should not be necessary to do anything here 311 */ 312 } else { 313 $this->data[] = $entrydata; 314 } 315 $buffer = ''; 316 } 317 } 318 if ($entry) { //Inside entry 319 $buffer .= $char; 320 } 321 $lastchar = $char; 322 } 323 //If open is one it may be possible that the last ending brace is missing 324 if (1 == $open) { 325 $entrydata = $this->_parseEntry($buffer); 326 if (!$entrydata) { 327 $valid = false; 328 } else { 329 $this->data[] = $entrydata; 330 $buffer = ''; 331 $open = 0; 332 } 333 } 334 //At this point the open should be zero 335 if (0 != $open) { 336 $valid = false; 337 } 338 //Are there Multiple entries with the same cite? 339 if ($this->_options['validate']) { 340 $cites = array(); 341 foreach ($this->data as $entry) { 342 $cites[] = $entry['cite']; 343 } 344 $unique = array_unique($cites); 345 if (sizeof($cites) != sizeof($unique)) { //Some values have not been unique! 346 $notuniques = array(); 347 for ($i = 0; $i < sizeof($cites); $i++) { 348 if ('' == $unique[$i]) { 349 $notuniques[] = $cites[$i]; 350 } 351 } 352 $this->_generateWarning('WARNING_MULTIPLE_ENTRIES', implode(',',$notuniques)); 353 } 354 } 355 if ($valid) { 356 $this->content = ''; 357 return true; 358 } else { 359 return PEAR::raiseError('Unbalanced parenthesis'); 360 } 361 } 362 363 /** 364 * Extracting the data of one content 365 * 366 * The parse function splits the content into its entries. 367 * Then every entry is parsed by this function. 368 * It parses the entry backwards. 369 * First the last '=' is searched and the value extracted from that. 370 * A copy is made of the entry if warnings should be generated. This takes quite 371 * some memory but it is needed to get good warnings. If nor warnings are generated 372 * then you don have to worry about memory. 373 * Then the last ',' is searched and the field extracted from that. 374 * Again the entry is shortened. 375 * Finally after all field=>value pairs the cite and type is extraced and the 376 * authors are splitted. 377 * If there is a problem false is returned. 378 * 379 * @access private 380 * @param string $entry The entry 381 * @return array The representation of the entry or false if there is a problem 382 */ 383 function _parseEntry($entry) 384 { 385 $entrycopy = ''; 386 if ($this->_options['validate']) { 387 $entrycopy = $entry; //We need a copy for printing the warnings 388 } 389 $ret = array('bibtex' => $entry.'}'); 390 if ('@string' == strtolower(substr($entry, 0, 7))) { 391 //String are not yet supported! 392 if ($this->_options['validate']) { 393 $this->_generateWarning('STRING_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}'); 394 } 395 } elseif ('@preamble' == strtolower(substr($entry, 0, 9))) { 396 //Preamble not yet supported! 397 if ($this->_options['validate']) { 398 $this->_generateWarning('PREAMBLE_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}'); 399 } 400 } else { 401 // Look for key 402 $matches = array(); 403 preg_match('/^@\w+\{(.+?),/' ,$entry, $matches); 404 if ( count($matches) > 0 ) 405 { 406 $ret['entrykey'] = $matches[1]; 407 } 408 409 //Parsing all fields 410 while (strrpos($entry,'=') !== false) { 411 $position = strrpos($entry, '='); 412 //Checking that the equal sign is not quoted or is not inside a equation (For example in an abstract) 413 $proceed = true; 414 if (substr($entry, $position-1, 1) == '\\') { 415 $proceed = false; 416 } 417 if ($proceed) { 418 $proceed = $this->_checkEqualSign($entry, $position); 419 } 420 while (!$proceed) { 421 $substring = substr($entry, 0, $position); 422 $position = strrpos($substring,'='); 423 $proceed = true; 424 if (substr($entry, $position-1, 1) == '\\') { 425 $proceed = false; 426 } 427 if ($proceed) { 428 $proceed = $this->_checkEqualSign($entry, $position); 429 } 430 } 431 432 $value = trim(substr($entry, $position+1)); 433 $entry = substr($entry, 0, $position); 434 435 if (',' == substr($value, strlen($value)-1, 1)) { 436 $value = substr($value, 0, -1); 437 } 438 if ($this->_options['validate']) { 439 $this->_validateValue($value, $entrycopy); 440 } 441 if ($this->_options['stripDelimiter']) { 442 $value = $this->_stripDelimiter($value); 443 } 444 if ($this->_options['unwrap']) { 445 $value = $this->_unwrap($value); 446 } 447 if ($this->_options['removeCurlyBraces']) { 448 $value = $this->_removeCurlyBraces($value); 449 } 450 $position = strrpos($entry, ','); 451 $field = strtolower(trim(substr($entry, $position+1))); 452 $ret[$field] = $value; 453 $entry = substr($entry, 0, $position); 454 } 455 //Parsing cite and entry type 456 $arr = explode('{', $entry); 457 $ret['cite'] = trim($arr[1]); 458 $ret['entrytype'] = strtolower(trim($arr[0])); 459 if ('@' == $ret['entrytype']{0}) { 460 $ret['entrytype'] = substr($ret['entrytype'], 1); 461 } 462 if ($this->_options['validate']) { 463 if (!$this->_checkAllowedEntryType($ret['entrytype'])) { 464 $this->_generateWarning('WARNING_NOT_ALLOWED_ENTRY_TYPE', $ret['entrytype'], $entry.'}'); 465 } 466 } 467 //Handling the authors 468 if (in_array('author', array_keys($ret)) && $this->_options['extractAuthors']) { 469 $ret['author'] = $this->_extractAuthors($ret['author']); 470 $ret['niceauthor'] = join(', ', array_map(function ($a) { return $a['nice']; }, $ret['author'])); 471 $ret['sortauthor'] = join('', array_map(function ($a) { return $a['sort']; }, $ret['author'])); 472 } 473 } 474 return $ret; 475 } 476 477 /** 478 * Checking whether the position of the '=' is correct 479 * 480 * Sometimes there is a problem if a '=' is used inside an entry (for example abstract). 481 * This method checks if the '=' is outside braces then the '=' is correct and true is returned. 482 * If the '=' is inside braces it contains to a equation and therefore false is returned. 483 * 484 * @access private 485 * @param string $entry The text of the whole remaining entry 486 * @param int the current used place of the '=' 487 * @return bool true if the '=' is correct, false if it contains to an equation 488 */ 489 function _checkEqualSign($entry, $position) 490 { 491 $ret = true; 492 //This is getting tricky 493 //We check the string backwards until the position and count the closing an opening braces 494 //If we reach the position the amount of opening and closing braces should be equal 495 $length = strlen($entry); 496 $open = 0; 497 for ($i = $length-1; $i >= $position; $i--) { 498 $precedingchar = substr($entry, $i-1, 1); 499 $char = substr($entry, $i, 1); 500 if (('{' == $char) && ('\\' != $precedingchar)) { 501 $open++; 502 } 503 if (('}' == $char) && ('\\' != $precedingchar)) { 504 $open--; 505 } 506 } 507 if (0 != $open) { 508 $ret = false; 509 } 510 //There is still the posibility that the entry is delimited by double quotes. 511 //Then it is possible that the braces are equal even if the '=' is in an equation. 512 if ($ret) { 513 $entrycopy = trim($entry); 514 $lastchar = $entrycopy{strlen($entrycopy)-1}; 515 if (',' == $lastchar) { 516 $lastchar = $entrycopy{strlen($entrycopy)-2}; 517 } 518 if ('"' == $lastchar) { 519 //The return value is set to false 520 //If we find the closing " before the '=' it is set to true again. 521 //Remember we begin to search the entry backwards so the " has to show up twice - ending and beginning delimiter 522 $ret = false; 523 $found = 0; 524 for ($i = $length; $i >= $position; $i--) { 525 $precedingchar = substr($entry, $i-1, 1); 526 $char = substr($entry, $i, 1); 527 if (('"' == $char) && ('\\' != $precedingchar)) { 528 $found++; 529 } 530 if (2 == $found) { 531 $ret = true; 532 break; 533 } 534 } 535 } 536 } 537 return $ret; 538 } 539 540 /** 541 * Checking if the entry type is allowed 542 * 543 * @access private 544 * @param string $entry The entry to check 545 * @return bool true if allowed, false otherwise 546 */ 547 function _checkAllowedEntryType($entry) 548 { 549 return in_array($entry, $this->allowedEntryTypes); 550 } 551 552 /** 553 * Checking whether an at is outside an entry 554 * 555 * Sometimes an entry misses an entry brace. Then the at of the next entry seems to be 556 * inside an entry. This is checked here. When it is most likely that the at is an opening 557 * at of the next entry this method returns true. 558 * 559 * @access private 560 * @param string $entry The text of the entry until the at 561 * @return bool true if the at is correct, false if the at is likely to begin the next entry. 562 */ 563 function _checkAt($entry) 564 { 565 $ret = false; 566 $opening = array_keys($this->_delimiters); 567 $closing = array_values($this->_delimiters); 568 //Getting the value (at is only allowd in values) 569 if (strrpos($entry,'=') !== false) { 570 $position = strrpos($entry, '='); 571 $proceed = true; 572 if (substr($entry, $position-1, 1) == '\\') { 573 $proceed = false; 574 } 575 while (!$proceed) { 576 $substring = substr($entry, 0, $position); 577 $position = strrpos($substring,'='); 578 $proceed = true; 579 if (substr($entry, $position-1, 1) == '\\') { 580 $proceed = false; 581 } 582 } 583 $value = trim(substr($entry, $position+1)); 584 $open = 0; 585 $char = ''; 586 $lastchar = ''; 587 for ($i = 0; $i < strlen($value); $i++) { 588 $char = substr($this->content, $i, 1); 589 if (in_array($char, $opening) && ('\\' != $lastchar)) { 590 $open++; 591 } elseif (in_array($char, $closing) && ('\\' != $lastchar)) { 592 $open--; 593 } 594 $lastchar = $char; 595 } 596 //if open is grater zero were are inside an entry 597 if ($open>0) { 598 $ret = true; 599 } 600 } 601 return $ret; 602 } 603 604 /** 605 * Stripping Delimiter 606 * 607 * @access private 608 * @param string $entry The entry where the Delimiter should be stripped from 609 * @return string Stripped entry 610 */ 611 function _stripDelimiter($entry) 612 { 613 $beginningdels = array_keys($this->_delimiters); 614 $length = strlen($entry); 615 $firstchar = substr($entry, 0, 1); 616 $lastchar = substr($entry, -1, 1); 617 while (in_array($firstchar, $beginningdels)) { //The first character is an opening delimiter 618 if ($lastchar == $this->_delimiters[$firstchar]) { //Matches to closing Delimiter 619 $entry = substr($entry, 1, -1); 620 } else { 621 break; 622 } 623 $firstchar = substr($entry, 0, 1); 624 $lastchar = substr($entry, -1, 1); 625 } 626 return $entry; 627 } 628 629 /** 630 * Unwrapping entry 631 * 632 * @access private 633 * @param string $entry The entry to unwrap 634 * @return string unwrapped entry 635 */ 636 function _unwrap($entry) 637 { 638 $entry = preg_replace('/\s+/', ' ', $entry); 639 return trim($entry); 640 } 641 642 /** 643 * Wordwrap an entry 644 * 645 * @access private 646 * @param string $entry The entry to wrap 647 * @return string wrapped entry 648 */ 649 function _wordwrap($entry) 650 { 651 if ( (''!=$entry) && (is_string($entry)) ) { 652 $entry = wordwrap($entry, $this->_options['wordWrapWidth'], $this->_options['wordWrapBreak'], $this->_options['wordWrapCut']); 653 } 654 return $entry; 655 } 656 657 /** 658 * Extracting the authors 659 * 660 * @access private 661 * @param string $entry The entry with the authors 662 * @return array the extracted authors 663 */ 664 function _extractAuthors($entry) { 665 $entry = $this->_unwrap($entry); 666 $authorarray = array(); 667 $authorarray = explode(' and ', $entry); 668 for ($i = 0; $i < sizeof($authorarray); $i++) { 669 $author = trim($authorarray[$i]); 670 /*The first version of how an author could be written (First von Last) 671 has no commas in it*/ 672 $first = ''; 673 $von = ''; 674 $last = ''; 675 $jr = ''; 676 if (strpos($author, ',') === false) { 677 $tmparray = array(); 678 $tmparray = preg_split('/\s+/', $author); 679 $size = sizeof($tmparray); 680 if (1 == $size) { //There is only a last 681 $last = $tmparray[0]; 682 } elseif (2 == $size) { //There is a first and a last 683 $first = $tmparray[0]; 684 $last = $tmparray[1]; 685 } else { 686 $invon = false; 687 $inlast = false; 688 for ($j=0; $j<($size-1); $j++) { 689 if ($inlast) { 690 $last .= ' '.$tmparray[$j]; 691 } elseif ($invon) { 692 $case = $this->_determineCase($tmparray[$j]); 693 if (PEAR::isError($case)) { 694 // IGNORE? 695 } elseif ((0 == $case) || (-1 == $case)) { //Change from von to last 696 //You only change when there is no more lower case there 697 $islast = true; 698 for ($k=($j+1); $k<($size-1); $k++) { 699 $futurecase = $this->_determineCase($tmparray[$k]); 700 if (PEAR::isError($case)) { 701 // IGNORE? 702 } elseif (0 == $futurecase) { 703 $islast = false; 704 } 705 } 706 if ($islast) { 707 $inlast = true; 708 if (-1 == $case) { //Caseless belongs to the last 709 $last .= ' '.$tmparray[$j]; 710 } else { 711 $von .= ' '.$tmparray[$j]; 712 } 713 } else { 714 $von .= ' '.$tmparray[$j]; 715 } 716 } else { 717 $von .= ' '.$tmparray[$j]; 718 } 719 } else { 720 $case = $this->_determineCase($tmparray[$j]); 721 if (PEAR::isError($case)) { 722 // IGNORE? 723 } elseif (0 == $case) { //Change from first to von 724 $invon = true; 725 $von .= ' '.$tmparray[$j]; 726 } else { 727 $first .= ' '.$tmparray[$j]; 728 } 729 } 730 } 731 //The last entry is always the last! 732 $last .= ' '.$tmparray[$size-1]; 733 } 734 } else { //Version 2 and 3 735 $tmparray = array(); 736 $tmparray = explode(',', $author); 737 //The first entry must contain von and last 738 $vonlastarray = array(); 739 $vonlastarray = explode(' ', $tmparray[0]); 740 $size = sizeof($vonlastarray); 741 if (1==$size) { //Only one entry->got to be the last 742 $last = $vonlastarray[0]; 743 } else { 744 $inlast = false; 745 for ($j=0; $j<($size-1); $j++) { 746 if ($inlast) { 747 $last .= ' '.$vonlastarray[$j]; 748 } else { 749 if (0 != ($this->_determineCase($vonlastarray[$j]))) { //Change from von to last 750 $islast = true; 751 for ($k=($j+1); $k<($size-1); $k++) { 752 $this->_determineCase($vonlastarray[$k]); 753 $case = $this->_determineCase($vonlastarray[$k]); 754 if (PEAR::isError($case)) { 755 // IGNORE? 756 } elseif (0 == $case) { 757 $islast = false; 758 } 759 } 760 if ($islast) { 761 $inlast = true; 762 $last .= ' '.$vonlastarray[$j]; 763 } else { 764 $von .= ' '.$vonlastarray[$j]; 765 } 766 } else { 767 $von .= ' '.$vonlastarray[$j]; 768 } 769 } 770 } 771 $last .= ' '.$vonlastarray[$size-1]; 772 } 773 //Now we check if it is version three (three entries in the array (two commas) 774 if (3==sizeof($tmparray)) { 775 $jr = $tmparray[1]; 776 } 777 //Everything in the last entry is first 778 $first = $tmparray[sizeof($tmparray)-1]; 779 } 780 $authorarray[$i] = array('first'=>trim($first), 'von'=>trim($von), 'last'=>trim($last), 'jr'=>trim($jr)); 781 $authorarray[$i]['nice'] = join(' ', array_filter($authorarray[$i])); 782 $authorarray[$i]['sort'] = strtolower(trim($last).trim($first)); 783 } 784 return $authorarray; 785 } 786 787 /** 788 * Case Determination according to the needs of BibTex 789 * 790 * To parse the Author(s) correctly a determination is needed 791 * to get the Case of a word. There are three possible values: 792 * - Upper Case (return value 1) 793 * - Lower Case (return value 0) 794 * - Caseless (return value -1) 795 * 796 * @access private 797 * @param string $word 798 * @return int The Case or PEAR_Error if there was a problem 799 */ 800 function _determineCase($word) { 801 $ret = -1; 802 $trimmedword = trim ($word); 803 /*We need this variable. Without the next of would not work 804 (trim changes the variable automatically to a string!)*/ 805 if (is_string($word) && (strlen($trimmedword) > 0)) { 806 $i = 0; 807 $found = false; 808 $openbrace = 0; 809 while (!$found && ($i <= strlen($word))) { 810 $letter = substr($trimmedword, $i, 1); 811 $ord = ord($letter); 812 if ($ord == 123) { //Open brace 813 $openbrace++; 814 } 815 if ($ord == 125) { //Closing brace 816 $openbrace--; 817 } 818 if (($ord>=65) && ($ord<=90) && (0==$openbrace)) { //The first character is uppercase 819 $ret = 1; 820 $found = true; 821 } elseif ( ($ord>=97) && ($ord<=122) && (0==$openbrace) ) { //The first character is lowercase 822 $ret = 0; 823 $found = true; 824 } else { //Not yet found 825 $i++; 826 } 827 } 828 } else { 829 $ret = PEAR::raiseError('Could not determine case on word: '.(string)$word); 830 } 831 return $ret; 832 } 833 834 /** 835 * Validation of a value 836 * 837 * There may be several problems with the value of a field. 838 * These problems exist but do not break the parsing. 839 * If a problem is detected a warning is appended to the array warnings. 840 * 841 * @access private 842 * @param string $entry The entry aka one line which which should be validated 843 * @param string $wholeentry The whole BibTex Entry which the one line is part of 844 * @return void 845 */ 846 function _validateValue($entry, $wholeentry) 847 { 848 //There is no @ allowed if the entry is enclosed by braces 849 if (preg_match('/^{.*@.*}$/', $entry)) { 850 $this->_generateWarning('WARNING_AT_IN_BRACES', $entry, $wholeentry); 851 } 852 //No escaped " allowed if the entry is enclosed by double quotes 853 if (preg_match('/^\".*\\".*\"$/', $entry)) { 854 $this->_generateWarning('WARNING_ESCAPED_DOUBLE_QUOTE_INSIDE_DOUBLE_QUOTES', $entry, $wholeentry); 855 } 856 //Amount of Braces is not correct 857 $open = 0; 858 $lastchar = ''; 859 $char = ''; 860 for ($i = 0; $i < strlen($entry); $i++) { 861 $char = substr($entry, $i, 1); 862 if (('{' == $char) && ('\\' != $lastchar)) { 863 $open++; 864 } 865 if (('}' == $char) && ('\\' != $lastchar)) { 866 $open--; 867 } 868 $lastchar = $char; 869 } 870 if (0 != $open) { 871 $this->_generateWarning('WARNING_UNBALANCED_AMOUNT_OF_BRACES', $entry, $wholeentry); 872 } 873 } 874 875 /** 876 * Remove curly braces from entry 877 * 878 * @access private 879 * @param string $value The value in which curly braces to be removed 880 * @param string Value with removed curly braces 881 */ 882 function _removeCurlyBraces($value) 883 { 884 //First we save the delimiters 885 $beginningdels = array_keys($this->_delimiters); 886 $firstchar = substr($value, 0, 1); 887 $lastchar = substr($value, -1, 1); 888 $begin = ''; 889 $end = ''; 890 while (in_array($firstchar, $beginningdels)) { //The first character is an opening delimiter 891 if ($lastchar == $this->_delimiters[$firstchar]) { //Matches to closing Delimiter 892 $begin .= $firstchar; 893 $end .= $lastchar; 894 $value = substr($value, 1, -1); 895 } else { 896 break; 897 } 898 $firstchar = substr($value, 0, 1); 899 $lastchar = substr($value, -1, 1); 900 } 901 //Now we get rid of the curly braces 902 $value = preg_replace('/[\{\}]/', '', $value); 903 //Reattach delimiters 904 $value = $begin.$value.$end; 905 return $value; 906 } 907 908 /** 909 * Generates a warning 910 * 911 * @access private 912 * @param string $type The type of the warning 913 * @param string $entry The line of the entry where the warning occurred 914 * @param string $wholeentry OPTIONAL The whole entry where the warning occurred 915 */ 916 function _generateWarning($type, $entry, $wholeentry='') 917 { 918 $warning['warning'] = $type; 919 $warning['entry'] = $entry; 920 $warning['wholeentry'] = $wholeentry; 921 $this->warnings[] = $warning; 922 } 923 924 /** 925 * Cleares all warnings 926 * 927 * @access public 928 */ 929 function clearWarnings() 930 { 931 $this->warnings = array(); 932 } 933 934 /** 935 * Is there a warning? 936 * 937 * @access public 938 * @return true if there is, false otherwise 939 */ 940 function hasWarning() 941 { 942 if (sizeof($this->warnings)>0) return true; 943 else return false; 944 } 945 946 /** 947 * Returns the amount of available BibTex entries 948 * 949 * @access public 950 * @return int The amount of available BibTex entries 951 */ 952 function amount() 953 { 954 return sizeof($this->data); 955 } 956 957 /** 958 * Returns the author formatted 959 * 960 * The Author is formatted as setted in the authorstring 961 * 962 * @access private 963 * @param array $array Author array 964 * @return string the formatted author string 965 */ 966 function _formatAuthor($array) 967 { 968 if (!array_key_exists('von', $array)) { 969 $array['von'] = ''; 970 } else { 971 $array['von'] = trim($array['von']); 972 } 973 if (!array_key_exists('last', $array)) { 974 $array['last'] = ''; 975 } else { 976 $array['last'] = trim($array['last']); 977 } 978 if (!array_key_exists('jr', $array)) { 979 $array['jr'] = ''; 980 } else { 981 $array['jr'] = trim($array['jr']); 982 } 983 if (!array_key_exists('first', $array)) { 984 $array['first'] = ''; 985 } else { 986 $array['first'] = trim($array['first']); 987 } 988 $ret = $this->authorstring; 989 $ret = str_replace("VON", $array['von'], $ret); 990 $ret = str_replace("LAST", $array['last'], $ret); 991 $ret = str_replace("JR", $array['jr'], $ret); 992 $ret = str_replace("FIRST", $array['first'], $ret); 993 return trim($ret); 994 } 995 996 /** 997 * Converts the stored BibTex entries to a BibTex String 998 * 999 * In the field list, the author is the last field. 1000 * 1001 * @access public 1002 * @return string The BibTex string 1003 */ 1004 function bibTex() 1005 { 1006 $bibtex = ''; 1007 foreach ($this->data as $entry) { 1008 //Intro 1009 $bibtex .= '@'.strtolower($entry['entrytype']).' { '.$entry['cite'].",\n"; 1010 //Other fields except author 1011 foreach ($entry as $key=>$val) { 1012 if ($this->_options['wordWrapWidth']>0) { 1013 $val = $this->_wordWrap($val); 1014 } 1015 if (!in_array($key, array('cite','entrytype','author'))) { 1016 $bibtex .= "\t".$key.' = {'.$val."},\n"; 1017 } 1018 } 1019 //Author 1020 if (array_key_exists('author', $entry)) { 1021 if ($this->_options['extractAuthors']) { 1022 $tmparray = array(); //In this array the authors are saved and the joind with an and 1023 foreach ($entry['author'] as $authorentry) { 1024 $tmparray[] = $this->_formatAuthor($authorentry); 1025 } 1026 $author = join(' and ', $tmparray); 1027 } else { 1028 $author = $entry['author']; 1029 } 1030 } else { 1031 $author = ''; 1032 } 1033 $bibtex .= "\tauthor = {".$author."}\n"; 1034 $bibtex.="}\n\n"; 1035 } 1036 return $bibtex; 1037 } 1038 1039 /** 1040 * Adds a new BibTex entry to the data 1041 * 1042 * @access public 1043 * @param array $newentry The new data to add 1044 * @return void 1045 */ 1046 function addEntry($newentry) 1047 { 1048 $this->data[] = $newentry; 1049 } 1050 1051 /** 1052 * Returns statistic 1053 * 1054 * This functions returns a hash table. The keys are the different 1055 * entry types and the values are the amount of these entries. 1056 * 1057 * @access public 1058 * @return array Hash Table with the data 1059 */ 1060 function getStatistic() 1061 { 1062 $ret = array(); 1063 foreach ($this->data as $entry) { 1064 if (array_key_exists($entry['entrytype'], $ret)) { 1065 $ret[$entry['entrytype']]++; 1066 } else { 1067 $ret[$entry['entrytype']] = 1; 1068 } 1069 } 1070 return $ret; 1071 } 1072 1073 /** 1074 * Returns the stored data in RTF format 1075 * 1076 * This method simply returns a RTF formatted string. This is done very 1077 * simple and is not intended for heavy using and fine formatting. This 1078 * should be done by BibTex! It is intended to give some kind of quick 1079 * preview or to send someone a reference list as word/rtf format (even 1080 * some people in the scientific field still use word). If you want to 1081 * change the default format you have to override the class variable 1082 * "rtfstring". This variable is used and the placeholders simply replaced. 1083 * Lines with no data cause an warning! 1084 * 1085 * @return string the RTF Strings 1086 */ 1087 function rtf() 1088 { 1089 $ret = "{\\rtf\n"; 1090 foreach ($this->data as $entry) { 1091 $line = $this->rtfstring; 1092 $title = ''; 1093 $journal = ''; 1094 $year = ''; 1095 $authors = ''; 1096 if (array_key_exists('title', $entry)) { 1097 $title = $this->_unwrap($entry['title']); 1098 } 1099 if (array_key_exists('journal', $entry)) { 1100 $journal = $this->_unwrap($entry['journal']); 1101 } 1102 if (array_key_exists('year', $entry)) { 1103 $year = $this->_unwrap($entry['year']); 1104 } 1105 if (array_key_exists('author', $entry)) { 1106 if ($this->_options['extractAuthors']) { 1107 $tmparray = array(); //In this array the authors are saved and the joind with an and 1108 foreach ($entry['author'] as $authorentry) { 1109 $tmparray[] = $this->_formatAuthor($authorentry); 1110 } 1111 $authors = join(', ', $tmparray); 1112 } else { 1113 $authors = $entry['author']; 1114 } 1115 } 1116 if ((''!=$title) || (''!=$journal) || (''!=$year) || (''!=$authors)) { 1117 $line = str_replace("TITLE", $title, $line); 1118 $line = str_replace("JOURNAL", $journal, $line); 1119 $line = str_replace("YEAR", $year, $line); 1120 $line = str_replace("AUTHORS", $authors, $line); 1121 $line .= "\n\\par\n"; 1122 $ret .= $line; 1123 } else { 1124 $this->_generateWarning('WARNING_LINE_WAS_NOT_CONVERTED', '', print_r($entry,1)); 1125 } 1126 } 1127 $ret .= '}'; 1128 return $ret; 1129 } 1130 1131 /** 1132 * Returns the stored data in HTML format 1133 * 1134 * This method simply returns a HTML formatted string. This is done very 1135 * simple and is not intended for heavy using and fine formatting. This 1136 * should be done by BibTex! It is intended to give some kind of quick 1137 * preview. If you want to change the default format you have to override 1138 * the class variable "htmlstring". This variable is used and the placeholders 1139 * simply replaced. 1140 * Lines with no data cause an warning! 1141 * 1142 * @return string the HTML Strings 1143 */ 1144 function html() 1145 { 1146 $ret = "<p>\n"; 1147 foreach ($this->data as $entry) { 1148 $line = $this->htmlstring; 1149 $title = ''; 1150 $journal = ''; 1151 $year = ''; 1152 $authors = ''; 1153 if (array_key_exists('title', $entry)) { 1154 $title = $this->_unwrap($entry['title']); 1155 } 1156 if (array_key_exists('journal', $entry)) { 1157 $journal = $this->_unwrap($entry['journal']); 1158 } 1159 if (array_key_exists('year', $entry)) { 1160 $year = $this->_unwrap($entry['year']); 1161 } 1162 if (array_key_exists('author', $entry)) { 1163 if ($this->_options['extractAuthors']) { 1164 $tmparray = array(); //In this array the authors are saved and the joind with an and 1165 foreach ($entry['author'] as $authorentry) { 1166 $tmparray[] = $this->_formatAuthor($authorentry); 1167 } 1168 $authors = join(', ', $tmparray); 1169 } else { 1170 $authors = $entry['author']; 1171 } 1172 } 1173 if ((''!=$title) || (''!=$journal) || (''!=$year) || (''!=$authors)) { 1174 $line = str_replace("TITLE", $title, $line); 1175 $line = str_replace("JOURNAL", $journal, $line); 1176 $line = str_replace("YEAR", $year, $line); 1177 $line = str_replace("AUTHORS", $authors, $line); 1178 $line .= "\n"; 1179 $ret .= $line; 1180 } else { 1181 $this->_generateWarning('WARNING_LINE_WAS_NOT_CONVERTED', '', print_r($entry,1)); 1182 } 1183 } 1184 $ret .= "</p>\n"; 1185 return $ret; 1186 } 1187} 1188?> 1189