1<?php
2  /**
3   * The following changes have been made by Raphael Reitzig, 2010-2012:
4   * - fixed spelling (l184)
5   * - added source bibtex to entry in data array (l380)
6   * - added entry key to entry in data array if present (l394 ff)
7   * - fixed brace removal (l893)
8   * - removed inclusion of PEAR.PHP as we handle this caller-site
9   * - applied patch from http://pear.php.net/bugs/bug.php?id=14442
10   * - Fixed bug in l398: entry keys could previously contain only letters and
11   *                      digits; everything is allowed now.
12   * - Fixed parsing of authors (comma-less version was broken)
13   * - Added printable and comparable variants to author arrays
14   * - Added printable and comparable variants of authors to entries
15   */
16
17  /**
18   * Class for working with BibTex data
19   *
20   * A class which provides common methods to access and
21   * create Strings in BibTex format
22   *
23   * PHP versions 4 and 5
24   *
25   * LICENSE: This source file is subject to version 3.0 of the PHP license
26   * that is available through the world-wide-web at the following URI:
27   * http://www.php.net/license/3_0.txt.  If you did not receive a copy of
28   * the PHP License and are unable to obtain it through the web, please
29   * send a note to license@php.net so we can mail you a copy immediately.
30   *
31   * @category   Structures
32   * @package    Structures_BibTex
33   * @author     Elmar Pitschke <elmar.pitschke@gmx.de>
34   * @copyright  1997-2005 The PHP Group
35   * @license    http://www.php.net/license/3_0.txt  PHP License 3.0
36   * @version    CVS: $Id: BibTex.php 304756 2010-10-25 10:19:43Z clockwerx $
37   * @link       http://pear.php.net/package/Structures_BibTex
38   */
39
40//require_once('PEAR.php');
41/**
42 * Structures_BibTex
43 *
44 * A class which provides common methods to access and
45 * create Strings in BibTex format.
46 * Example 1: Parsing a BibTex File and returning the number of entries
47 * <code>
48 * $bibtex = new Structures_BibTex();
49 * $ret    = $bibtex->loadFile('foo.bib');
50 * if (PEAR::isError($ret)) {
51 *   die($ret->getMessage());
52 * }
53 * $bibtex->parse();
54 * print "There are ".$bibtex->amount()." entries";
55 * </code>
56 * Example 2: Parsing a BibTex File and getting all Titles
57 * <code>
58 * $bibtex = new Structures_BibTex();
59 * $ret    = $bibtex->loadFile('bibtex.bib');
60 * if (PEAR::isError($ret)) {
61 *   die($ret->getMessage());
62 * }
63 * $bibtex->parse();
64 * foreach ($bibtex->data as $entry) {
65 *  print $entry['title']."<br />";
66 * }
67 * </code>
68 * Example 3: Adding an entry and printing it in BibTex Format
69 * <code>
70 * $bibtex                         = new Structures_BibTex();
71 * $addarray                       = array();
72 * $addarray['entrytype']          = 'Article';
73 * $addarray['cite']               = 'art2';
74 * $addarray['title']              = 'Titel2';
75 * $addarray['author'][0]['first'] = 'John';
76 * $addarray['author'][0]['last']  = 'Doe';
77 * $addarray['author'][1]['first'] = 'Jane';
78 * $addarray['author'][1]['last']  = 'Doe';
79 * $bibtex->addEntry($addarray);
80 * print nl2br($bibtex->bibTex());
81 * </code>
82 *
83 * @category   Structures
84 * @package    Structures_BibTex
85 * @author     Elmar Pitschke <elmar.pitschke@gmx.de>
86 * @copyright  1997-2005 The PHP Group
87 * @license    http://www.php.net/license/3_0.txt  PHP License 3.0
88 * @version    Release: @package_version@
89 * @link       http://pear.php.net/Structures/Structure_BibTex
90 */
91class Structures_BibTex
92{
93    /**
94     * Array with the BibTex Data
95     *
96     * @access public
97     * @var array
98     */
99    var $data;
100    /**
101     * String with the BibTex content
102     *
103     * @access public
104     * @var string
105     */
106    var $content;
107    /**
108     * Array with possible Delimiters for the entries
109     *
110     * @access private
111     * @var array
112     */
113    var $_delimiters;
114    /**
115     * Array to store warnings
116     *
117     * @access public
118     * @var array
119     */
120    var $warnings;
121    /**
122     * Run-time configuration options
123     *
124     * @access private
125     * @var array
126     */
127    var $_options;
128    /**
129     * RTF Format String
130     *
131     * @access public
132     * @var string
133     */
134    var $rtfstring;
135    /**
136     * HTML Format String
137     *
138     * @access public
139     * @var string
140     */
141    var $htmlstring;
142    /**
143     * Array with the "allowed" entry types
144     *
145     * @access public
146     * @var array
147     */
148    var $allowedEntryTypes;
149    /**
150     * Author Format Strings
151     *
152     * @access public
153     * @var string
154     */
155    var $authorstring;
156
157    /**
158     * Constructor
159     *
160     * @access public
161     * @return void
162     */
163    function Structures_BibTex($options = array())
164    {
165        $this->_delimiters     = array('"'=>'"',
166                                        '{'=>'}');
167        $this->data            = array();
168        $this->content         = '';
169        //$this->_stripDelimiter = $stripDel;
170        //$this->_validate       = $val;
171        $this->warnings        = array();
172        $this->_options        = array(
173            'stripDelimiter'    => true,
174            'validate'          => true,
175            'unwrap'            => false,
176            'wordWrapWidth'     => false,
177            'wordWrapBreak'     => "\n",
178            'wordWrapCut'       => 0,
179            'removeCurlyBraces' => false,
180            'extractAuthors'    => true,
181        );
182        foreach ($options as $option => $value) {
183            $test = $this->setOption($option, $value);
184            if (PEAR::isError($test)) {
185                //Currently nothing is done here, but it could for example raise an warning
186            }
187        }
188        $this->rtfstring         = 'AUTHORS, "{\b TITLE}", {\i JOURNAL}, YEAR';
189        $this->htmlstring        = 'AUTHORS, "<strong>TITLE</strong>", <em>JOURNAL</em>, YEAR<br />';
190        $this->allowedEntryTypes = array(
191            'article',
192            'book',
193            'booklet',
194            'conference',
195            'inbook',
196            'incollection',
197            'inproceedings',
198            'manual',
199            'mastersthesis',
200            'misc',
201            'phdthesis',
202            'proceedings',
203            'techreport',
204            'unpublished'
205        );
206        $this->authorstring = 'VON LAST, JR, FIRST';
207    }
208
209    /**
210     * Sets run-time configuration options
211     *
212     * @access public
213     * @param string $option option name
214     * @param mixed  $value value for the option
215     * @return mixed true on success PEAR_Error on failure
216     */
217    function setOption($option, $value)
218    {
219        $ret = true;
220        if (array_key_exists($option, $this->_options)) {
221            $this->_options[$option] = $value;
222        } else {
223            $ret = PEAR::raiseError('Unknown option '.$option);
224        }
225        return $ret;
226    }
227
228    /**
229     * Reads a given BibTex File
230     *
231     * @access public
232     * @param string $filename Name of the file
233     * @return mixed true on success PEAR_Error on failure
234     */
235    function loadFile($filename)
236    {
237        if (file_exists($filename)) {
238            if (($this->content = @file_get_contents($filename)) === false) {
239                return PEAR::raiseError('Could not open file '.$filename);
240            } else {
241                $this->_pos    = 0;
242                $this->_oldpos = 0;
243                return true;
244            }
245        } else {
246            return PEAR::raiseError('Could not find file '.$filename);
247        }
248    }
249
250    /**
251     * Reads bibtex from a string variable
252     *
253     * @access public
254     * @param string $bib String containing bibtex
255     * @return boolean true
256     */
257    function loadString($bib)
258    {
259      $this->content = $bib;
260      $this->_pos    = 0;
261      $this->_oldpos = 0;
262      return true; // For compatibility with loadFile
263    }
264
265    /**
266     * Parses what is stored in content and clears the content if the parsing is successfull.
267     *
268     * @access public
269     * @return boolean true on success and PEAR_Error if there was a problem
270     */
271    function parse()
272    {
273        //The amount of opening braces is compared to the amount of closing braces
274        //Braces inside comments are ignored
275        $this->warnings = array();
276        $this->data     = array();
277        $valid          = true;
278        $open           = 0;
279        $entry          = false;
280        $char           = '';
281        $lastchar       = '';
282        $buffer         = '';
283        for ($i = 0; $i < strlen($this->content); $i++) {
284            $char = substr($this->content, $i, 1);
285            if ((0 != $open) && ('@' == $char)) {
286                if (!$this->_checkAt($buffer)) {
287                    $this->_generateWarning('WARNING_MISSING_END_BRACE', '', $buffer);
288                    //To correct the data we need to insert a closing brace
289                    $char     = '}';
290                    $i--;
291                }
292            }
293            if ((0 == $open) && ('@' == $char)) { //The beginning of an entry
294                $entry = true;
295            } elseif ($entry && ('{' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is opening
296                $open++;
297            } elseif ($entry && ('}' == $char) && ('\\' != $lastchar)) { //Inside an entry and non quoted brace is closing
298                $open--;
299                if ($open < 0) { //More are closed than opened
300                    $valid = false;
301                }
302                if (0 == $open) { //End of entry
303                    $entry     = false;
304                    $entrydata = $this->_parseEntry($buffer);
305                    if (!$entrydata) {
306                        /**
307                         * This is not yet used.
308                         * We are here if the Entry is either not correct or not supported.
309                         * But this should already generate a warning.
310                         * Therefore it should not be necessary to do anything here
311                         */
312                    } else {
313                        $this->data[] = $entrydata;
314                    }
315                    $buffer = '';
316                }
317            }
318            if ($entry) { //Inside entry
319                $buffer .= $char;
320            }
321            $lastchar = $char;
322        }
323        //If open is one it may be possible that the last ending brace is missing
324        if (1 == $open) {
325            $entrydata = $this->_parseEntry($buffer);
326            if (!$entrydata) {
327                $valid = false;
328            } else {
329                $this->data[] = $entrydata;
330                $buffer = '';
331                $open   = 0;
332            }
333        }
334        //At this point the open should be zero
335        if (0 != $open) {
336            $valid = false;
337        }
338        //Are there Multiple entries with the same cite?
339        if ($this->_options['validate']) {
340            $cites = array();
341            foreach ($this->data as $entry) {
342                $cites[] = $entry['cite'];
343            }
344            $unique = array_unique($cites);
345            if (sizeof($cites) != sizeof($unique)) { //Some values have not been unique!
346                $notuniques = array();
347                for ($i = 0; $i < sizeof($cites); $i++) {
348                    if ('' == $unique[$i]) {
349                        $notuniques[] = $cites[$i];
350                    }
351                }
352                $this->_generateWarning('WARNING_MULTIPLE_ENTRIES', implode(',',$notuniques));
353            }
354        }
355        if ($valid) {
356            $this->content = '';
357            return true;
358        } else {
359            return PEAR::raiseError('Unbalanced parenthesis');
360        }
361    }
362
363    /**
364     * Extracting the data of one content
365     *
366     * The parse function splits the content into its entries.
367     * Then every entry is parsed by this function.
368     * It parses the entry backwards.
369     * First the last '=' is searched and the value extracted from that.
370     * A copy is made of the entry if warnings should be generated. This takes quite
371     * some memory but it is needed to get good warnings. If nor warnings are generated
372     * then you don have to worry about memory.
373     * Then the last ',' is searched and the field extracted from that.
374     * Again the entry is shortened.
375     * Finally after all field=>value pairs the cite and type is extraced and the
376     * authors are splitted.
377     * If there is a problem false is returned.
378     *
379     * @access private
380     * @param string $entry The entry
381     * @return array The representation of the entry or false if there is a problem
382     */
383    function _parseEntry($entry)
384    {
385        $entrycopy = '';
386        if ($this->_options['validate']) {
387            $entrycopy = $entry; //We need a copy for printing the warnings
388        }
389        $ret = array('bibtex' => $entry.'}');
390        if ('@string' ==  strtolower(substr($entry, 0, 7))) {
391            //String are not yet supported!
392            if ($this->_options['validate']) {
393                $this->_generateWarning('STRING_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}');
394            }
395        } elseif ('@preamble' ==  strtolower(substr($entry, 0, 9))) {
396            //Preamble not yet supported!
397            if ($this->_options['validate']) {
398                $this->_generateWarning('PREAMBLE_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}');
399            }
400        } else {
401            // Look for key
402            $matches = array();
403            preg_match('/^@\w+\{(.+?),/' ,$entry, $matches);
404            if ( count($matches) > 0 )
405            {
406              $ret['entrykey'] = $matches[1];
407            }
408
409            //Parsing all fields
410            while (strrpos($entry,'=') !== false) {
411                $position = strrpos($entry, '=');
412                //Checking that the equal sign is not quoted or is not inside a equation (For example in an abstract)
413                $proceed  = true;
414                if (substr($entry, $position-1, 1) == '\\') {
415                    $proceed = false;
416                }
417                if ($proceed) {
418                    $proceed = $this->_checkEqualSign($entry, $position);
419                }
420                while (!$proceed) {
421                    $substring = substr($entry, 0, $position);
422                    $position  = strrpos($substring,'=');
423                    $proceed   = true;
424                    if (substr($entry, $position-1, 1) == '\\') {
425                        $proceed = false;
426                    }
427                    if ($proceed) {
428                        $proceed = $this->_checkEqualSign($entry, $position);
429                    }
430                }
431
432                $value = trim(substr($entry, $position+1));
433                $entry = substr($entry, 0, $position);
434
435                if (',' == substr($value, strlen($value)-1, 1)) {
436                    $value = substr($value, 0, -1);
437                }
438                if ($this->_options['validate']) {
439                    $this->_validateValue($value, $entrycopy);
440                }
441                if ($this->_options['stripDelimiter']) {
442                    $value = $this->_stripDelimiter($value);
443                }
444                if ($this->_options['unwrap']) {
445                    $value = $this->_unwrap($value);
446                }
447                if ($this->_options['removeCurlyBraces']) {
448                    $value = $this->_removeCurlyBraces($value);
449                }
450                $position    = strrpos($entry, ',');
451                $field       = strtolower(trim(substr($entry, $position+1)));
452                $ret[$field] = $value;
453                $entry       = substr($entry, 0, $position);
454            }
455            //Parsing cite and entry type
456            $arr = explode('{', $entry);
457            $ret['cite'] = trim($arr[1]);
458            $ret['entrytype'] = strtolower(trim($arr[0]));
459            if ('@' == $ret['entrytype']{0}) {
460                $ret['entrytype'] = substr($ret['entrytype'], 1);
461            }
462            if ($this->_options['validate']) {
463                if (!$this->_checkAllowedEntryType($ret['entrytype'])) {
464                    $this->_generateWarning('WARNING_NOT_ALLOWED_ENTRY_TYPE', $ret['entrytype'], $entry.'}');
465                }
466            }
467            //Handling the authors
468            if (in_array('author', array_keys($ret)) && $this->_options['extractAuthors']) {
469                $ret['author'] = $this->_extractAuthors($ret['author']);
470                $ret['niceauthor'] = join(', ', array_map(function ($a) { return $a['nice']; }, $ret['author']));
471                $ret['sortauthor'] = join('', array_map(function ($a) { return $a['sort']; }, $ret['author']));
472            }
473        }
474        return $ret;
475    }
476
477    /**
478     * Checking whether the position of the '=' is correct
479     *
480     * Sometimes there is a problem if a '=' is used inside an entry (for example abstract).
481     * This method checks if the '=' is outside braces then the '=' is correct and true is returned.
482     * If the '=' is inside braces it contains to a equation and therefore false is returned.
483     *
484     * @access private
485     * @param string $entry The text of the whole remaining entry
486     * @param int the current used place of the '='
487     * @return bool true if the '=' is correct, false if it contains to an equation
488     */
489    function _checkEqualSign($entry, $position)
490    {
491        $ret = true;
492        //This is getting tricky
493        //We check the string backwards until the position and count the closing an opening braces
494        //If we reach the position the amount of opening and closing braces should be equal
495        $length = strlen($entry);
496        $open   = 0;
497        for ($i = $length-1; $i >= $position; $i--) {
498            $precedingchar = substr($entry, $i-1, 1);
499            $char          = substr($entry, $i, 1);
500            if (('{' == $char) && ('\\' != $precedingchar)) {
501                $open++;
502            }
503            if (('}' == $char) && ('\\' != $precedingchar)) {
504                $open--;
505            }
506        }
507        if (0 != $open) {
508            $ret = false;
509        }
510        //There is still the posibility that the entry is delimited by double quotes.
511        //Then it is possible that the braces are equal even if the '=' is in an equation.
512        if ($ret) {
513            $entrycopy = trim($entry);
514            $lastchar  = $entrycopy{strlen($entrycopy)-1};
515            if (',' == $lastchar) {
516                $lastchar = $entrycopy{strlen($entrycopy)-2};
517            }
518            if ('"' == $lastchar) {
519                //The return value is set to false
520                //If we find the closing " before the '=' it is set to true again.
521                //Remember we begin to search the entry backwards so the " has to show up twice - ending and beginning delimiter
522                $ret = false;
523                $found = 0;
524                for ($i = $length; $i >= $position; $i--) {
525                    $precedingchar = substr($entry, $i-1, 1);
526                    $char          = substr($entry, $i, 1);
527                    if (('"' == $char) && ('\\' != $precedingchar)) {
528                        $found++;
529                    }
530                    if (2 == $found) {
531                        $ret = true;
532                        break;
533                    }
534                }
535            }
536        }
537        return $ret;
538    }
539
540    /**
541     * Checking if the entry type is allowed
542     *
543     * @access private
544     * @param string $entry The entry to check
545     * @return bool true if allowed, false otherwise
546     */
547    function _checkAllowedEntryType($entry)
548    {
549        return in_array($entry, $this->allowedEntryTypes);
550    }
551
552    /**
553     * Checking whether an at is outside an entry
554     *
555     * Sometimes an entry misses an entry brace. Then the at of the next entry seems to be
556     * inside an entry. This is checked here. When it is most likely that the at is an opening
557     * at of the next entry this method returns true.
558     *
559     * @access private
560     * @param string $entry The text of the entry until the at
561     * @return bool true if the at is correct, false if the at is likely to begin the next entry.
562     */
563    function _checkAt($entry)
564    {
565        $ret     = false;
566        $opening = array_keys($this->_delimiters);
567        $closing = array_values($this->_delimiters);
568        //Getting the value (at is only allowd in values)
569        if (strrpos($entry,'=') !== false) {
570            $position = strrpos($entry, '=');
571            $proceed  = true;
572            if (substr($entry, $position-1, 1) == '\\') {
573                $proceed = false;
574            }
575            while (!$proceed) {
576                $substring = substr($entry, 0, $position);
577                $position  = strrpos($substring,'=');
578                $proceed   = true;
579                if (substr($entry, $position-1, 1) == '\\') {
580                    $proceed = false;
581                }
582            }
583            $value    = trim(substr($entry, $position+1));
584            $open     = 0;
585            $char     = '';
586            $lastchar = '';
587            for ($i = 0; $i < strlen($value); $i++) {
588                $char = substr($this->content, $i, 1);
589                if (in_array($char, $opening) && ('\\' != $lastchar)) {
590                    $open++;
591                } elseif (in_array($char, $closing) && ('\\' != $lastchar)) {
592                    $open--;
593                }
594                $lastchar = $char;
595            }
596            //if open is grater zero were are inside an entry
597            if ($open>0) {
598                $ret = true;
599            }
600        }
601        return $ret;
602    }
603
604    /**
605     * Stripping Delimiter
606     *
607     * @access private
608     * @param string $entry The entry where the Delimiter should be stripped from
609     * @return string Stripped entry
610     */
611    function _stripDelimiter($entry)
612    {
613        $beginningdels = array_keys($this->_delimiters);
614        $length        = strlen($entry);
615        $firstchar     = substr($entry, 0, 1);
616        $lastchar      = substr($entry, -1, 1);
617        while (in_array($firstchar, $beginningdels)) { //The first character is an opening delimiter
618            if ($lastchar == $this->_delimiters[$firstchar]) { //Matches to closing Delimiter
619                $entry = substr($entry, 1, -1);
620            } else {
621                break;
622            }
623            $firstchar = substr($entry, 0, 1);
624            $lastchar  = substr($entry, -1, 1);
625        }
626        return $entry;
627    }
628
629    /**
630     * Unwrapping entry
631     *
632     * @access private
633     * @param string $entry The entry to unwrap
634     * @return string unwrapped entry
635     */
636    function _unwrap($entry)
637    {
638        $entry = preg_replace('/\s+/', ' ', $entry);
639        return trim($entry);
640    }
641
642    /**
643     * Wordwrap an entry
644     *
645     * @access private
646     * @param string $entry The entry to wrap
647     * @return string wrapped entry
648     */
649    function _wordwrap($entry)
650    {
651        if ( (''!=$entry) && (is_string($entry)) ) {
652            $entry = wordwrap($entry, $this->_options['wordWrapWidth'], $this->_options['wordWrapBreak'], $this->_options['wordWrapCut']);
653        }
654        return $entry;
655    }
656
657    /**
658     * Extracting the authors
659     *
660     * @access private
661     * @param string $entry The entry with the authors
662     * @return array the extracted authors
663     */
664    function _extractAuthors($entry) {
665        $entry       = $this->_unwrap($entry);
666        $authorarray = array();
667        $authorarray = explode(' and ', $entry);
668        for ($i = 0; $i < sizeof($authorarray); $i++) {
669            $author = trim($authorarray[$i]);
670            /*The first version of how an author could be written (First von Last)
671             has no commas in it*/
672            $first    = '';
673            $von      = '';
674            $last     = '';
675            $jr       = '';
676            if (strpos($author, ',') === false) {
677                $tmparray = array();
678                $tmparray = preg_split('/\s+/', $author);
679                $size     = sizeof($tmparray);
680                if (1 == $size) { //There is only a last
681                    $last = $tmparray[0];
682                } elseif (2 == $size) { //There is a first and a last
683                    $first = $tmparray[0];
684                    $last  = $tmparray[1];
685                } else {
686                    $invon  = false;
687                    $inlast = false;
688                    for ($j=0; $j<($size-1); $j++) {
689                        if ($inlast) {
690                            $last .= ' '.$tmparray[$j];
691                        } elseif ($invon) {
692                            $case = $this->_determineCase($tmparray[$j]);
693                            if (PEAR::isError($case)) {
694                                // IGNORE?
695                            } elseif ((0 == $case) || (-1 == $case)) { //Change from von to last
696                                //You only change when there is no more lower case there
697                                $islast = true;
698                                for ($k=($j+1); $k<($size-1); $k++) {
699                                    $futurecase = $this->_determineCase($tmparray[$k]);
700                                    if (PEAR::isError($case)) {
701                                        // IGNORE?
702                                    } elseif (0 == $futurecase) {
703                                        $islast = false;
704                                    }
705                                }
706                                if ($islast) {
707                                    $inlast = true;
708                                    if (-1 == $case) { //Caseless belongs to the last
709                                        $last .= ' '.$tmparray[$j];
710                                    } else {
711                                        $von  .= ' '.$tmparray[$j];
712                                    }
713                                } else {
714                                    $von    .= ' '.$tmparray[$j];
715                                }
716                            } else {
717                                $von .= ' '.$tmparray[$j];
718                            }
719                        } else {
720                            $case = $this->_determineCase($tmparray[$j]);
721                            if (PEAR::isError($case)) {
722                                // IGNORE?
723                            } elseif (0 == $case) { //Change from first to von
724                                $invon = true;
725                                $von   .= ' '.$tmparray[$j];
726                            } else {
727                                $first .= ' '.$tmparray[$j];
728                            }
729                        }
730                    }
731                    //The last entry is always the last!
732                    $last .= ' '.$tmparray[$size-1];
733                }
734            } else { //Version 2 and 3
735                $tmparray     = array();
736                $tmparray     = explode(',', $author);
737                //The first entry must contain von and last
738                $vonlastarray = array();
739                $vonlastarray = explode(' ', $tmparray[0]);
740                $size         = sizeof($vonlastarray);
741                if (1==$size) { //Only one entry->got to be the last
742                    $last = $vonlastarray[0];
743                } else {
744                    $inlast = false;
745                    for ($j=0; $j<($size-1); $j++) {
746                        if ($inlast) {
747                            $last .= ' '.$vonlastarray[$j];
748                        } else {
749                            if (0 != ($this->_determineCase($vonlastarray[$j]))) { //Change from von to last
750                                $islast = true;
751                                for ($k=($j+1); $k<($size-1); $k++) {
752                                    $this->_determineCase($vonlastarray[$k]);
753                                    $case = $this->_determineCase($vonlastarray[$k]);
754                                    if (PEAR::isError($case)) {
755                                        // IGNORE?
756                                    } elseif (0 == $case) {
757                                        $islast = false;
758                                    }
759                                }
760                                if ($islast) {
761                                    $inlast = true;
762                                    $last   .= ' '.$vonlastarray[$j];
763                                } else {
764                                    $von    .= ' '.$vonlastarray[$j];
765                                }
766                            } else {
767                                $von    .= ' '.$vonlastarray[$j];
768                            }
769                        }
770                    }
771                    $last .= ' '.$vonlastarray[$size-1];
772                }
773                //Now we check if it is version three (three entries in the array (two commas)
774                if (3==sizeof($tmparray)) {
775                    $jr = $tmparray[1];
776                }
777                //Everything in the last entry is first
778                $first = $tmparray[sizeof($tmparray)-1];
779            }
780            $authorarray[$i] = array('first'=>trim($first), 'von'=>trim($von), 'last'=>trim($last), 'jr'=>trim($jr));
781            $authorarray[$i]['nice'] = join(' ', array_filter($authorarray[$i]));
782            $authorarray[$i]['sort'] = strtolower(trim($last).trim($first));
783        }
784        return $authorarray;
785    }
786
787    /**
788     * Case Determination according to the needs of BibTex
789     *
790     * To parse the Author(s) correctly a determination is needed
791     * to get the Case of a word. There are three possible values:
792     * - Upper Case (return value 1)
793     * - Lower Case (return value 0)
794     * - Caseless   (return value -1)
795     *
796     * @access private
797     * @param string $word
798     * @return int The Case or PEAR_Error if there was a problem
799     */
800    function _determineCase($word) {
801        $ret         = -1;
802        $trimmedword = trim ($word);
803        /*We need this variable. Without the next of would not work
804         (trim changes the variable automatically to a string!)*/
805        if (is_string($word) && (strlen($trimmedword) > 0)) {
806            $i         = 0;
807            $found     = false;
808            $openbrace = 0;
809            while (!$found && ($i <= strlen($word))) {
810                $letter = substr($trimmedword, $i, 1);
811                $ord    = ord($letter);
812                if ($ord == 123) { //Open brace
813                    $openbrace++;
814                }
815                if ($ord == 125) { //Closing brace
816                    $openbrace--;
817                }
818                if (($ord>=65) && ($ord<=90) && (0==$openbrace)) { //The first character is uppercase
819                    $ret   = 1;
820                    $found = true;
821                } elseif ( ($ord>=97) && ($ord<=122) && (0==$openbrace) ) { //The first character is lowercase
822                    $ret   = 0;
823                    $found = true;
824                } else { //Not yet found
825                    $i++;
826                }
827            }
828        } else {
829            $ret = PEAR::raiseError('Could not determine case on word: '.(string)$word);
830        }
831        return $ret;
832    }
833
834    /**
835     * Validation of a value
836     *
837     * There may be several problems with the value of a field.
838     * These problems exist but do not break the parsing.
839     * If a problem is detected a warning is appended to the array warnings.
840     *
841     * @access private
842     * @param string $entry The entry aka one line which which should be validated
843     * @param string $wholeentry The whole BibTex Entry which the one line is part of
844     * @return void
845     */
846    function _validateValue($entry, $wholeentry)
847    {
848        //There is no @ allowed if the entry is enclosed by braces
849        if (preg_match('/^{.*@.*}$/', $entry)) {
850            $this->_generateWarning('WARNING_AT_IN_BRACES', $entry, $wholeentry);
851        }
852        //No escaped " allowed if the entry is enclosed by double quotes
853        if (preg_match('/^\".*\\".*\"$/', $entry)) {
854            $this->_generateWarning('WARNING_ESCAPED_DOUBLE_QUOTE_INSIDE_DOUBLE_QUOTES', $entry, $wholeentry);
855        }
856        //Amount of Braces is not correct
857        $open     = 0;
858        $lastchar = '';
859        $char     = '';
860        for ($i = 0; $i < strlen($entry); $i++) {
861            $char = substr($entry, $i, 1);
862            if (('{' == $char) && ('\\' != $lastchar)) {
863                $open++;
864            }
865            if (('}' == $char) && ('\\' != $lastchar)) {
866                $open--;
867            }
868            $lastchar = $char;
869        }
870        if (0 != $open) {
871            $this->_generateWarning('WARNING_UNBALANCED_AMOUNT_OF_BRACES', $entry, $wholeentry);
872        }
873    }
874
875    /**
876     * Remove curly braces from entry
877     *
878     * @access private
879     * @param string $value The value in which curly braces to be removed
880     * @param string Value with removed curly braces
881     */
882    function _removeCurlyBraces($value)
883    {
884        //First we save the delimiters
885        $beginningdels = array_keys($this->_delimiters);
886        $firstchar     = substr($value, 0, 1);
887        $lastchar      = substr($value, -1, 1);
888        $begin         = '';
889        $end           = '';
890        while (in_array($firstchar, $beginningdels)) { //The first character is an opening delimiter
891            if ($lastchar == $this->_delimiters[$firstchar]) { //Matches to closing Delimiter
892                $begin .= $firstchar;
893                $end   .= $lastchar;
894                $value  = substr($value, 1, -1);
895            } else {
896                break;
897            }
898            $firstchar = substr($value, 0, 1);
899            $lastchar  = substr($value, -1, 1);
900        }
901        //Now we get rid of the curly braces
902        $value = preg_replace('/[\{\}]/', '', $value);
903        //Reattach delimiters
904        $value       = $begin.$value.$end;
905        return $value;
906    }
907
908    /**
909     * Generates a warning
910     *
911     * @access private
912     * @param string $type The type of the warning
913     * @param string $entry The line of the entry where the warning occurred
914     * @param string $wholeentry OPTIONAL The whole entry where the warning occurred
915     */
916    function _generateWarning($type, $entry, $wholeentry='')
917    {
918        $warning['warning']    = $type;
919        $warning['entry']      = $entry;
920        $warning['wholeentry'] = $wholeentry;
921        $this->warnings[]      = $warning;
922    }
923
924    /**
925     * Cleares all warnings
926     *
927     * @access public
928     */
929    function clearWarnings()
930    {
931        $this->warnings = array();
932    }
933
934    /**
935     * Is there a warning?
936     *
937     * @access public
938     * @return true if there is, false otherwise
939     */
940    function hasWarning()
941    {
942        if (sizeof($this->warnings)>0) return true;
943        else return false;
944    }
945
946    /**
947     * Returns the amount of available BibTex entries
948     *
949     * @access public
950     * @return int The amount of available BibTex entries
951     */
952    function amount()
953    {
954        return sizeof($this->data);
955    }
956
957    /**
958     * Returns the author formatted
959     *
960     * The Author is formatted as setted in the authorstring
961     *
962     * @access private
963     * @param array $array Author array
964     * @return string the formatted author string
965     */
966    function _formatAuthor($array)
967    {
968        if (!array_key_exists('von', $array)) {
969            $array['von'] = '';
970        } else {
971            $array['von'] = trim($array['von']);
972        }
973        if (!array_key_exists('last', $array)) {
974            $array['last'] = '';
975        } else {
976            $array['last'] = trim($array['last']);
977        }
978        if (!array_key_exists('jr', $array)) {
979            $array['jr'] = '';
980        } else {
981            $array['jr'] = trim($array['jr']);
982        }
983        if (!array_key_exists('first', $array)) {
984            $array['first'] = '';
985        } else {
986            $array['first'] = trim($array['first']);
987        }
988        $ret = $this->authorstring;
989        $ret = str_replace("VON", $array['von'], $ret);
990        $ret = str_replace("LAST", $array['last'], $ret);
991        $ret = str_replace("JR", $array['jr'], $ret);
992        $ret = str_replace("FIRST", $array['first'], $ret);
993        return trim($ret);
994    }
995
996    /**
997     * Converts the stored BibTex entries to a BibTex String
998     *
999     * In the field list, the author is the last field.
1000     *
1001     * @access public
1002     * @return string The BibTex string
1003     */
1004    function bibTex()
1005    {
1006        $bibtex = '';
1007        foreach ($this->data as $entry) {
1008            //Intro
1009            $bibtex .= '@'.strtolower($entry['entrytype']).' { '.$entry['cite'].",\n";
1010            //Other fields except author
1011            foreach ($entry as $key=>$val) {
1012                if ($this->_options['wordWrapWidth']>0) {
1013                    $val = $this->_wordWrap($val);
1014                }
1015                if (!in_array($key, array('cite','entrytype','author'))) {
1016                    $bibtex .= "\t".$key.' = {'.$val."},\n";
1017                }
1018            }
1019            //Author
1020            if (array_key_exists('author', $entry)) {
1021                if ($this->_options['extractAuthors']) {
1022                    $tmparray = array(); //In this array the authors are saved and the joind with an and
1023                    foreach ($entry['author'] as $authorentry) {
1024                        $tmparray[] = $this->_formatAuthor($authorentry);
1025                    }
1026                    $author = join(' and ', $tmparray);
1027                } else {
1028                    $author = $entry['author'];
1029                }
1030            } else {
1031                $author = '';
1032            }
1033            $bibtex .= "\tauthor = {".$author."}\n";
1034            $bibtex.="}\n\n";
1035        }
1036        return $bibtex;
1037    }
1038
1039    /**
1040     * Adds a new BibTex entry to the data
1041     *
1042     * @access public
1043     * @param array $newentry The new data to add
1044     * @return void
1045     */
1046    function addEntry($newentry)
1047    {
1048        $this->data[] = $newentry;
1049    }
1050
1051    /**
1052     * Returns statistic
1053     *
1054     * This functions returns a hash table. The keys are the different
1055     * entry types and the values are the amount of these entries.
1056     *
1057     * @access public
1058     * @return array Hash Table with the data
1059     */
1060    function getStatistic()
1061    {
1062        $ret = array();
1063        foreach ($this->data as $entry) {
1064            if (array_key_exists($entry['entrytype'], $ret)) {
1065                $ret[$entry['entrytype']]++;
1066            } else {
1067                $ret[$entry['entrytype']] = 1;
1068            }
1069        }
1070        return $ret;
1071    }
1072
1073    /**
1074     * Returns the stored data in RTF format
1075     *
1076     * This method simply returns a RTF formatted string. This is done very
1077     * simple and is not intended for heavy using and fine formatting. This
1078     * should be done by BibTex! It is intended to give some kind of quick
1079     * preview or to send someone a reference list as word/rtf format (even
1080     * some people in the scientific field still use word). If you want to
1081     * change the default format you have to override the class variable
1082     * "rtfstring". This variable is used and the placeholders simply replaced.
1083     * Lines with no data cause an warning!
1084     *
1085     * @return string the RTF Strings
1086     */
1087    function rtf()
1088    {
1089        $ret = "{\\rtf\n";
1090        foreach ($this->data as $entry) {
1091            $line    = $this->rtfstring;
1092            $title   = '';
1093            $journal = '';
1094            $year    = '';
1095            $authors = '';
1096            if (array_key_exists('title', $entry)) {
1097                $title = $this->_unwrap($entry['title']);
1098            }
1099            if (array_key_exists('journal', $entry)) {
1100                $journal = $this->_unwrap($entry['journal']);
1101            }
1102            if (array_key_exists('year', $entry)) {
1103                $year = $this->_unwrap($entry['year']);
1104            }
1105            if (array_key_exists('author', $entry)) {
1106                if ($this->_options['extractAuthors']) {
1107                    $tmparray = array(); //In this array the authors are saved and the joind with an and
1108                    foreach ($entry['author'] as $authorentry) {
1109                        $tmparray[] = $this->_formatAuthor($authorentry);
1110                    }
1111                    $authors = join(', ', $tmparray);
1112                } else {
1113                    $authors = $entry['author'];
1114                }
1115            }
1116            if ((''!=$title) || (''!=$journal) || (''!=$year) || (''!=$authors)) {
1117                $line = str_replace("TITLE", $title, $line);
1118                $line = str_replace("JOURNAL", $journal, $line);
1119                $line = str_replace("YEAR", $year, $line);
1120                $line = str_replace("AUTHORS", $authors, $line);
1121                $line .= "\n\\par\n";
1122                $ret  .= $line;
1123            } else {
1124                $this->_generateWarning('WARNING_LINE_WAS_NOT_CONVERTED', '', print_r($entry,1));
1125            }
1126        }
1127        $ret .= '}';
1128        return $ret;
1129    }
1130
1131    /**
1132     * Returns the stored data in HTML format
1133     *
1134     * This method simply returns a HTML formatted string. This is done very
1135     * simple and is not intended for heavy using and fine formatting. This
1136     * should be done by BibTex! It is intended to give some kind of quick
1137     * preview. If you want to change the default format you have to override
1138     * the class variable "htmlstring". This variable is used and the placeholders
1139     * simply replaced.
1140     * Lines with no data cause an warning!
1141     *
1142     * @return string the HTML Strings
1143     */
1144    function html()
1145    {
1146        $ret = "<p>\n";
1147        foreach ($this->data as $entry) {
1148            $line    = $this->htmlstring;
1149            $title   = '';
1150            $journal = '';
1151            $year    = '';
1152            $authors = '';
1153            if (array_key_exists('title', $entry)) {
1154                $title = $this->_unwrap($entry['title']);
1155            }
1156            if (array_key_exists('journal', $entry)) {
1157                $journal = $this->_unwrap($entry['journal']);
1158            }
1159            if (array_key_exists('year', $entry)) {
1160                $year = $this->_unwrap($entry['year']);
1161            }
1162            if (array_key_exists('author', $entry)) {
1163                if ($this->_options['extractAuthors']) {
1164                    $tmparray = array(); //In this array the authors are saved and the joind with an and
1165                    foreach ($entry['author'] as $authorentry) {
1166                        $tmparray[] = $this->_formatAuthor($authorentry);
1167                    }
1168                    $authors = join(', ', $tmparray);
1169                } else {
1170                    $authors = $entry['author'];
1171                }
1172            }
1173            if ((''!=$title) || (''!=$journal) || (''!=$year) || (''!=$authors)) {
1174                $line = str_replace("TITLE", $title, $line);
1175                $line = str_replace("JOURNAL", $journal, $line);
1176                $line = str_replace("YEAR", $year, $line);
1177                $line = str_replace("AUTHORS", $authors, $line);
1178                $line .= "\n";
1179                $ret  .= $line;
1180            } else {
1181                $this->_generateWarning('WARNING_LINE_WAS_NOT_CONVERTED', '', print_r($entry,1));
1182            }
1183        }
1184        $ret .= "</p>\n";
1185        return $ret;
1186    }
1187}
1188?>
1189