1<?php
2/*
3 * By Raphael Reitzig, 2012
4 * version 2.0
5 * code@verrech.net
6 * http://lmazy.verrech.net
7 */
8?>
9<?php
10/*
11    This program is free software: you can redistribute it and/or modify
12    it under the terms of the GNU General Public License as published by
13    the Free Software Foundation, either version 3 of the License, or
14    (at your option) any later version.
15
16    This program is distributed in the hope that it will be useful,
17    but WITHOUT ANY WARRANTY; without even the implied warranty of
18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19    GNU General Public License for more details.
20
21    You should have received a copy of the GNU General Public License
22    along with this program.  If not, see <http://www.gnu.org/licenses/>.
23*/
24?>
25<?php
26
27// Use the slightly modified BibTex parser from PEAR.
28require_once('lib/PEAR5.php');
29require_once('lib/PEAR.php');
30require_once('lib/BibTex.php');
31
32// Some stupid functions
33require_once('helper.inc.php');
34
35/**
36 * This class provides a method that parses bibtex files to
37 * other text formats based on a template language. See
38 *   http://lmazy.verrech.net/bib2tpl/
39 * for documentation.
40 *
41 * @author Raphael Reitzig
42 * @version 2.0
43 */
44class BibtexConverter {
45  /**
46   * BibTex parser
47   *
48   * @access private
49   * @var Structures_BibTex
50   */
51  private static $parser;
52
53  /**
54   * Options array. May contain the following pairs:
55   *   only  => array([$field => $regexp], ...)
56   *   group => (none|firstauthor|entrytype|$field)
57   *   order_groups => (asc|desc)
58   *   sort_by => (DATE|$field)
59   *   order => (asc|desc)
60   *   lang => xy (where lang/xy.php exists)
61   * @access private
62   * @var array
63   */
64  private $options;
65
66  /**
67   * Callback to a function that takes a string (taken from a
68   * BibTeX field) and clears it up for output.
69   * @access private
70   * @var callback
71   */
72  private $sanitise;
73
74  /**
75   * Helper object with support functions.
76   * @access private
77   * @var Helper
78   */
79  private $helper;
80
81  /**
82   * Array with author names and replacement.
83   * @access private
84   * @var assoc. array
85   */
86  private $authorlist;
87
88  /**
89   * Constructor.
90   *
91   * @access public
92   * @param array $options Options array. May contain the following pairs:
93   *                       - only  => array([$field => $regexp], ...)
94   *                       - group => (none|year|firstauthor|entrytype|$field)
95   *                       - order_groups => (asc|desc)
96   *                       - sort_by => (DATE|$field)
97   *                       - order => (asc|desc)
98   *                       - lang  => any string as long as proper lang/$s.php exists
99   *                       For details see documentation.
100   * @param callback $sanitise Callback to a function that takes a string (taken from a
101   *                           BibTeX field) and clears it up for output. Default is the
102   *                           identity function.
103   */
104  function __construct($options=array(), $sanitise=null, $authors=null) {
105    // Default options
106    $this->options = array(
107      'only'  => array(),
108      'group' => 'year',
109      'order_groups' => 'desc',
110      'sort_by' => 'DATE',
111      'order' => 'desc',
112      'lang' => 'en'
113    );
114
115    // lame replacement for non-constant default parameter
116    if ( !empty($sanitise) ) {
117      $this->sanitise = $sanitise;
118    }
119    else {
120      $this->sanitise = create_function('$i', 'return $i;');
121    }
122
123    // Overwrite default options
124    foreach ( $this->options as $key => $value ) {
125      if ( !empty($options[$key]) ) {
126        $this->options[$key] = $options[$key];
127      }
128    }
129
130    /* Load translations.
131     * We assume that the english language file is always there.
132     */
133    if ( is_readable(dirname(__FILE__).'/lang/'.$this->options['lang'].'.php') ) {
134      require('lang/'.$this->options['lang'].'.php');
135    }
136    else {
137      require('lang/en.php');
138    }
139    $this->options['lang'] = $translations;
140
141    $this->helper = new Helper($this->options);
142
143
144    $this->authorlist = array();
145    foreach(preg_split("/((\r?\n)|(\r\n?))/", $authors) as $line){
146      $tmp = explode(" ",$line,2);
147      $this->authorlist[$tmp[1]] = "[[".$tmp[0]."|".$tmp[1]."]]";
148    }
149
150  }
151
152  /**
153   * Parses the specified BibTeX string into an array with entries of the form
154   * $entrykey => $entry. The result can be used with BibtexConverter::convert.
155   *
156   * @access public
157   * @param string $bibtex BibTeX code
158   * @return array Array with data from passed BibTeX
159   */
160  static function parse(&$bibtex) {
161    if ( !isset(self::$parser) ) {
162      self::$parser = new Structures_BibTex(array('removeCurlyBraces' => false));
163    }
164
165    self::$parser->loadString($bibtex);
166    $stat = self::$parser->parse();
167
168    if ( PEAR::isError($stat) ) {
169      return $stat;
170    }
171
172    $parsed = self::$parser->data;
173    $result = array();
174    foreach ( $parsed as &$entry ) {
175      $result[$entry['entrykey']] = $entry;
176    }
177
178    return $result;
179  }
180
181  /**
182   * Parses the given BibTeX string and applies its data to the passed template string.
183   * If $bibtex is an array (which has to be parsed by BibtexConverter::parse)
184   * parsing is skipped.
185   *
186   * @access public
187   * @param string|array $bibtex BibTeX code or parsed array
188   * @param string       $template template code
189   * @param array  $replacementKeys An array with entries of the form $entrykey => $newKey.
190   *                                If an entrykey occurrs here, it will be replaced by
191   *                                its correspoding newKey in the output.
192   * @return string|PEAR_Error Result string or PEAR_Error on failure
193   */
194  function convert($bibtex, &$template, &$replacementKeys=array()) {
195    // If there are no grouping tags, disable grouping.
196    if ( preg_match('/@\{group@/s', $template) + preg_match('/@\}group@/s', $template) < 2 ) {
197      $groupingDisabled = $this->options['group'];
198      $this->options['group'] = 'none';
199    }
200
201    // If grouping is off, remove grouping tags.
202    if ( $this->options['group'] === 'none' ) {
203      $template = preg_replace(array('/@\{group@/s', '/@\}group@/s'), '', $template);
204    }
205
206    // Parse if necessary
207    if ( is_array($bibtex) ) {
208      $data = $bibtex;
209    }
210    else {
211      $data = self::parse($bibtex);
212    }
213
214    $data   = $this->filter($data, $replacementKeys);
215    $data   = $this->group($data);
216    $data   = $this->sort($data);
217    $result = $this->translate($data, $template);
218
219    /* If grouping was disabled because of the template, restore the former
220     * setting for future calls. */
221    if ( !empty($groupingDisabled) ) {
222      $this->options['group'] = $groupingDisabled;
223    }
224
225    return $result;
226  }
227
228  /**
229   * This function filters data from the specified array that should
230   * not be shown. Filter criteria are specified at object creation.
231   *
232   * Furthermore, entries whose entrytype is not translated in the specified
233   * language file are put into a distinct group.
234   *
235   * @access private
236   * @param array data Unfiltered data, that is array of entries
237   * @param replacementKeys An array with entries of the form $entrykey => $newKey.
238   *                        If an entrykey occurrs here, it will be replaced by
239   *                        its correspoding newKey in the output.
240   * @return array Filtered data as array of entries
241   */
242  private function filter(&$data, &$replacementKeys=array()) {
243    $result = array();
244
245    $id = 0;
246    foreach ( $data as $entry ) {
247      // Some additions/corrections
248      if ( empty($this->options['lang']['entrytypes'][$entry['entrytype']]) ) {
249        $entry['entrytype'] = $this->options['lang']['entrytypes']['unknown'];
250      }
251
252      // Check wether this entry should be included
253      $keep = true;
254      foreach ( $this->options['only'] as $field => $regexp ) {
255        if ( !empty($entry[$field]) ) {
256          $val =   $field === 'author'
257                 ? $entry['niceauthor']
258                 : $entry[$field];
259
260          $keep = $keep && preg_match('/'.$regexp.'/i', $val);
261        }
262        else {
263          /* If the considered field does not even exist, consider this a fail.
264           * That enables to use $field => '.*' as existence check. */
265          $keep = false;
266        }
267      }
268
269      if ( $keep === true ) {
270        if ( !empty($replacementKeys[$entry['entrykey']]) ) {
271          $entry['entrykey'] = $replacementKeys[$entry['entrykey']];
272        }
273
274        $result[] = $entry;
275      }
276    }
277
278    return $result;
279  }
280
281  /**
282   * This function groups the passed entries according to the criteria
283   * passed at object creation.
284   *
285   * @access private
286   * @param array data An array of entries
287   * @return array An array of arrays of entries
288   */
289  private function group(&$data) {
290    $result = array();
291
292    if ( $this->options['group'] !== 'none' ) {
293      foreach ( $data as $entry ) {
294        if ( !empty($entry[$this->options['group']]) || $this->options['group'] === 'firstauthor' ) {
295          if ( $this->options['group'] === 'firstauthor' ) {
296            $target = $entry['author'][0]['nice'];
297          }
298          elseif ( $this->options['group'] === 'author' ) {
299            $target = $entry['niceauthor'];
300          }
301          else {
302            $target =  $entry[$this->options['group']];
303          }
304        }
305        else {
306          $target = $this->options['lang']['rest'];
307        }
308
309        if ( empty($result[$target]) ) {
310          $result[$target] = array();
311        }
312
313        $result[$target][] = $entry;
314      }
315    }
316    else {
317      $result[$this->options['lang']['all']] = $data;
318    }
319
320    return $result;
321  }
322
323  /**
324   * This function sorts the passed group of entries and the individual
325   * groups if there are any.
326   *
327   * @access private
328   * @param array data An array of arrays of entries
329   * @return array A sorted array of sorted arrays of entries
330   */
331  private function sort($data) {
332    // Sort groups if there are any
333    if ( $this->options['group'] !== 'none' ) {
334      uksort($data, array($this->helper, 'group_cmp'));
335    }
336
337    // Sort individual groups
338    foreach ( $data as &$group ) {
339      uasort($group, array($this->helper, 'entry_cmp'));
340    }
341
342    return $data;
343  }
344
345  /**
346   * This function inserts the specified data into the specified template.
347   * For template syntax see class documentation or examples.
348   *
349   * @access private
350   * @param array data An array of arrays of entries
351   * @param string template The used template
352   * @return string The data represented in terms of the template
353   */
354  private function translate(&$data, &$template) {
355    $result = $template;
356
357    // Replace global values
358    $result = preg_replace(array('/@globalcount@/', '/@globalgroupcount@/'),
359                           array(Helper::lcount($data, 2), count($data)),
360                           $result);
361
362    if ( $this->options['group'] !== 'none' ) {
363      $pattern = '/@\{group@(.*?)@\}group@/s';
364
365      // Extract group templates
366      $group_tpl = array();
367      preg_match($pattern, $result, $group_tpl);
368
369      // For all occurrences of an group template
370      while ( !empty($group_tpl) ) {
371        // Translate all groups
372        $groups = '';
373        $id = 0;
374        foreach ( $data as $groupkey => $group ) {
375          $groups .= $this->translate_group($groupkey, $id++, $group, $group_tpl[1]);
376        }
377
378        $result = preg_replace($pattern, $groups, $result, 1);
379        preg_match($pattern, $result, $group_tpl);
380      }
381
382      return $result;
383    }
384    else {
385      $groups = '';
386      foreach ( $data as $groupkey => $group ) { // loop will only be run once
387        $groups .= $this->translate_group($groupkey, 0, $group, $template);
388      }
389      return $groups;
390    }
391  }
392
393  /**
394   * This function translates one entry group
395   *
396   * @access private
397   * @param string key The rendered group's key
398   * @param int id A unique ID for this group
399   * @param array data Array of entries in this group
400   * @param string template The group part of the template
401   * @return string String representing the passed group wrt template
402   */
403  private function translate_group($key, $id, &$data, $template) {
404    $result = $template;
405
406    // Replace group values
407    if ( $this->options['group'] === 'entrytype' ) {
408      $key = $this->options['lang']['entrytypes'][$key];
409    }
410    $result = preg_replace(array('/@groupkey@/', '/@groupid@/', '/@groupcount@/'),
411                           array($key, $id, count($data)),
412                           $result);
413
414    $pattern = '/@\{entry@(.*?)@\}entry@/s';
415
416    // Extract entry templates
417    $entry_tpl = array();
418    preg_match($pattern, $result, $entry_tpl);
419
420    // For all occurrences of an entry template
421    while ( !empty($entry_tpl) ) {
422      // Translate all entries
423      $entries = '';
424      foreach ( $data as $entry ) {
425        $entries .= $this->translate_entry($entry, $entry_tpl[1]);
426      }
427
428      $result = preg_replace($pattern, $entries, $result, 1);
429      preg_match($pattern, $result, $entry_tpl);
430    }
431
432    return $result;
433  }
434
435  /**
436   * This function translates one entry
437   *
438   * @access private
439   * @param array entry Array of fields
440   * @param string template The entry part of the template
441   * @return string String representing the passed entry wrt template
442   */
443  private function translate_entry(&$entry, $template) {
444    $result = $template;
445
446    // Resolve all conditions
447    $result = $this->resolve_conditions($entry, $result);
448
449    // Replace all possible unconditional fields
450    $patterns = array();
451    $replacements = array();
452
453    foreach ( $entry as $key => $value ) {
454      if ( $key === 'author' ) {
455        $value = $entry['niceauthor'];
456        $value = $this->authorlink($value);
457      }
458      if ( $key == 'bibtex') {
459        $patterns []= '/@'.$key.'@/';
460        $replacements []= $value;
461      }
462      else {
463        $patterns []= '/@'.$key.'@/';
464        $replacements []= call_user_func($this->sanitise, $value);
465      }
466    }
467
468    return preg_replace($patterns, $replacements, $result);
469  }
470
471  /**
472   * This function eliminates conditions in template parts.
473   *
474   * @access private
475   * @param array entry Entry with respect to which conditions are to be
476   *                    solved.
477   * @param string template The entry part of the template.
478   * @return string Template string without conditions.
479   */
480  private function resolve_conditions(&$entry, &$string) {
481    $pattern = '/@\?(\w+)(?:(<=|>=|==|!=|~)(.*?))?@(.*?)(?:@:\1@(.*?))?@;\1@/s';
482    /* There are two possibilities for mode: existential or value check
483     * Then, there can be an else part or not.
484     *          Existential       Value Check      RegExp
485     * Group 1  field             field            \w+
486     * Group 2  then              operator         .*?  /  <=|>=|==|!=|~
487     * Group 3  [else]            value            .*?
488     * Group 4   ---              then             .*?
489     * Group 5   ---              [else]           .*?
490     */
491
492    $match = array();
493
494    /* Would like to do
495     *    preg_match_all($pattern, $string, $matches);
496     * to get all matches at once but that results in Segmentation
497     * fault. Therefore iteratively:
498     */
499    while ( preg_match($pattern, $string, $match) )
500    {
501      $resolved = '';
502
503      $evalcond = !empty($entry[$match[1]]);
504      $then = count($match) > 3 ? 4 : 2;
505      $else = count($match) > 3 ? 5 : 3;
506
507      if ( $evalcond && count($match) > 3 ) {
508        if ( $match[2] === '==' ) {
509          $evalcond = $entry[$match[1]] === $match[3];
510        }
511        elseif ( $match[2] === '!=' ) {
512          $evalcond = $entry[$match[1]] !== $match[3];
513        }
514        elseif ( $match[2] === '<=' ) {
515          $evalcond =    is_numeric($entry[$match[1]])
516                      && is_numeric($match[3])
517                      && (int)$entry[$match[1]] <= (int)$match[3];
518        }
519        elseif ( $match[2] === '>=' ) {
520          $evalcond =    is_numeric($entry[$match[1]])
521                      && is_numeric($match[3])
522                      && (int)$entry[$match[1]] >= (int)$match[3];
523        }
524        elseif ( $match[2] === '~' ) {
525          $evalcond = preg_match('/'.$match[3].'/', $entry[$match[1]]) > 0;
526        }
527      }
528
529      if ( $evalcond )
530      {
531        $resolved = $match[$then];
532      }
533      elseif ( !empty($match[$else]) )
534      {
535        $resolved = $match[$else];
536      }
537
538      // Recurse to cope with nested conditions
539      $resolved = $this->resolve_conditions($entry, $resolved);
540
541      $string = str_replace($match[0], $resolved, $string);
542    }
543
544    return $string;
545  }
546
547  /**
548   * This function adds links to co-author websites where available.
549   *
550   * @access private
551   * @param string data Formatted author line without links.
552   * @return string data Formatted author line with links.
553   */
554  private function authorlink($data) {
555    $data = str_replace(array_keys($this->authorlist),$this->authorlist,$data);
556    return $data;
557  }
558}
559
560?>
561