1<?php
2
3/**
4 * Plugin RefNotes: BibTeX parser
5 *
6 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
7 * @author     Mykola Ostrovskyy <dwpforge@gmail.com>
8 */
9
10////////////////////////////////////////////////////////////////////////////////////////////////////
11class refnotes_bibtex_parser extends \dokuwiki\Parsing\Parser {
12
13    private static $instance = NULL;
14
15    /**
16     *
17     */
18    public static function getInstance() {
19        if (self::$instance == NULL) {
20            self::$instance = new refnotes_bibtex_parser();
21        }
22
23        return self::$instance;
24    }
25
26    /**
27     * Constructor
28     */
29    public function __construct() {
30        $this->handler = new refnotes_bibtex_handler();
31        $this->lexer = new refnotes_bibtex_lexer($this->handler, 'base', true);
32
33        $this->addBibtexMode(new refnotes_bibtex_outside_mode());
34        $this->addBibtexMode(new refnotes_bibtex_entry_mode('parented'));
35        $this->addBibtexMode(new refnotes_bibtex_entry_mode('braced'));
36        $this->addBibtexMode(new refnotes_bibtex_field_mode());
37        $this->addBibtexMode(new refnotes_bibtex_integer_value_mode());
38        $this->addBibtexMode(new refnotes_bibtex_string_value_mode('quoted'));
39        $this->addBibtexMode(new refnotes_bibtex_string_value_mode('braced'));
40        $this->addBibtexMode(new refnotes_bibtex_nested_braces_mode('quoted'));
41        $this->addBibtexMode(new refnotes_bibtex_nested_braces_mode('braced'));
42        $this->addBibtexMode(new refnotes_bibtex_concatenation_mode());
43    }
44
45    /**
46     *
47     */
48    private function addBibtexMode($mode) {
49        $this->addMode($mode->getName(), $mode);
50    }
51
52    /**
53     *
54     */
55    public function connectModes() {
56        if (!$this->connected) {
57            $this->modes['outside']->connectTo('base');
58            $this->modes['entry_parented']->connectTo('base');
59            $this->modes['entry_braced']->connectTo('base');
60
61            parent::connectModes();
62        }
63    }
64
65    /**
66     *
67     */
68    public function parse($text) {
69        $this->connectModes();
70
71        $this->handler->reset();
72        $this->lexer->parse(str_replace("\r\n", "\n", $text));
73
74        return $this->handler->finalize();
75    }
76}
77
78////////////////////////////////////////////////////////////////////////////////////////////////////
79class refnotes_bibtex_lexer extends \dokuwiki\Parsing\Lexer\Lexer {
80
81    /**
82     *
83     */
84    public function parse($text) {
85        $lastMode = '';
86
87        while (is_array($parsed = $this->reduce($text))) {
88            list($unmatched, $matched, $mode) = $parsed;
89
90            if (!$this->dispatchTokens($unmatched, $matched, $mode, 0, 0)) {
91                return false;
92            }
93
94            if (empty($unmatched) && empty($matched) && ($lastMode == $this->modeStack->getCurrent())) {
95                return false;
96            }
97
98            $lastMode = $this->modeStack->getCurrent();
99        }
100
101        if (!$parsed) {
102            return false;
103        }
104
105        return $this->invokeHandler($text, DOKU_LEXER_UNMATCHED, 0);
106    }
107
108    /**
109     *
110     */
111    protected function invokeHandler($text, $state, $pos) {
112        if ($text == "" && $state == DOKU_LEXER_UNMATCHED) {
113            return true;
114        }
115
116        $mode = $this->modeStack->getCurrent();
117        $handler = isset($this->mode_handlers[$mode]) ? $this->mode_handlers[$mode] : $mode;
118
119        return $this->handler->$handler($text, $state, $pos);
120    }
121}
122
123////////////////////////////////////////////////////////////////////////////////////////////////////
124class refnotes_bibtex_mode extends \dokuwiki\Parsing\ParserMode\AbstractMode {
125
126    protected $name;
127    protected $handler;
128    protected $specialPattern;
129    protected $entryPattern;
130    protected $exitPattern;
131
132    /**
133     * Constructor
134     */
135    public function __construct() {
136        $this->name = preg_replace('/refnotes_bibtex_(\w+)_mode/', '$1', get_class($this));
137        $this->handler = '';
138
139        $this->specialPattern = array();
140        $this->entryPattern = array();
141        $this->exitPattern = array();
142    }
143
144    /**
145     *
146     */
147    public function getSort() {
148        return 0;
149    }
150
151    /**
152     *
153     */
154    public function getName() {
155        return $this->name;
156    }
157
158    /**
159     *
160     */
161    public function connectTo($mode) {
162        foreach ($this->specialPattern as $pattern) {
163            $this->Lexer->addSpecialPattern($pattern, $mode, $this->name);
164        }
165
166        foreach ($this->entryPattern as $pattern) {
167            $this->Lexer->addEntryPattern($pattern, $mode, $this->name);
168        }
169
170        if ($this->handler != '') {
171            $this->Lexer->mapHandler($this->name, $this->handler);
172        }
173    }
174
175    /**
176     *
177     */
178    public function postConnect() {
179        foreach ($this->exitPattern as $pattern) {
180            $this->Lexer->addExitPattern($pattern, $this->name);
181        }
182    }
183}
184
185////////////////////////////////////////////////////////////////////////////////////////////////////
186class refnotes_bibtex_outside_mode extends refnotes_bibtex_mode {
187
188    /**
189     * Constructor
190     */
191    public function __construct() {
192        parent::__construct();
193
194        $this->specialPattern[] = '[^@]+(?=@)';
195    }
196
197    /**
198     *
199     */
200    public function connectTo($mode) {
201        parent::connectTo($mode);
202
203        $this->Lexer->mapHandler('base', $this->name);
204    }
205}
206
207////////////////////////////////////////////////////////////////////////////////////////////////////
208class refnotes_bibtex_entry_mode extends refnotes_bibtex_mode {
209
210    /**
211     * Constructor
212     */
213    public function __construct($type) {
214        parent::__construct();
215
216        $this->handler = $this->name;
217        $this->name .= '_' . $type;
218
219        list($open, $close) = ($type == 'parented') ? array('\(', '\)') : array('{', '}');
220
221        $this->entryPattern[] = '^@\w+\s*' . $open . '(?=.*' . $close . ')';
222        $this->exitPattern[] = '\s*(?:' . $close . '|(?=@))';
223
224        $this->allowedModes = array('field');
225    }
226}
227
228////////////////////////////////////////////////////////////////////////////////////////////////////
229class refnotes_bibtex_field_mode extends refnotes_bibtex_mode {
230
231    /**
232     * Constructor
233     */
234    public function __construct() {
235        parent::__construct();
236
237        $this->entryPattern[] = '^\s*\w[\w-]+\s*=\s*';
238        $this->exitPattern[] = '\s*(?:,|(?=[\)}@]))';
239
240        $this->allowedModes = array('integer_value', 'string_value_quoted', 'string_value_braced', 'concatenation');
241    }
242}
243
244////////////////////////////////////////////////////////////////////////////////////////////////////
245class refnotes_bibtex_integer_value_mode extends refnotes_bibtex_mode {
246
247    /**
248     * Constructor
249     */
250    public function __construct() {
251        parent::__construct();
252
253        $this->specialPattern[] = '^\d+';
254    }
255}
256
257////////////////////////////////////////////////////////////////////////////////////////////////////
258class refnotes_bibtex_string_value_mode extends refnotes_bibtex_mode {
259
260    /**
261     * Constructor
262     */
263    public function __construct($type) {
264        parent::__construct();
265
266        $this->handler = $this->name;
267        $this->name .= '_' . $type;
268
269        list($open, $close, $exit) = ($type == 'quoted') ? array('"', '"', '"') : array('{', '}', '(?:}|(?=@))');
270
271        $this->entryPattern[] = '^' . $open . '(?=.*' . $close . ')';
272        $this->exitPattern[] = $exit;
273
274        $this->allowedModes = array('nested_braces_' . $type);
275    }
276}
277
278////////////////////////////////////////////////////////////////////////////////////////////////////
279class refnotes_bibtex_nested_braces_mode extends refnotes_bibtex_mode {
280
281    /**
282     * Constructor
283     */
284    public function __construct($type) {
285        parent::__construct();
286
287        $this->handler = $this->name;
288        $this->name .= '_' . $type;
289
290        $this->entryPattern[] = '{(?=.*})';
291        $this->exitPattern[] = ($type == 'quoted') ? '}' : '(?:}|(?=@))';
292
293        $this->allowedModes = array($this->name);
294    }
295}
296
297////////////////////////////////////////////////////////////////////////////////////////////////////
298class refnotes_bibtex_concatenation_mode extends refnotes_bibtex_mode {
299
300    /**
301     * Constructor
302     */
303    public function __construct() {
304        parent::__construct();
305
306        $this->specialPattern[] = '\s*#\s*';
307    }
308}
309
310////////////////////////////////////////////////////////////////////////////////////////////////////
311class refnotes_bibtex_handler {
312
313    private $entries;
314    private $entry;
315    private $field;
316
317    /**
318     * Constructor
319     */
320    public function __construct() {
321        $this->reset();
322    }
323
324    /**
325     *
326     */
327    public function reset() {
328        $this->entries = new refnotes_bibtex_entry_stash();
329        $this->entry = NULL;
330        $this->field = NULL;
331    }
332
333    /**
334     *
335     */
336    public function finalize() {
337        $entries = $this->entries->getEntries();
338
339        foreach ($entries as &$entry) {
340            if (array_key_exists('author', $entry)) {
341                $authors = explode(' and ', $entry['author']);
342
343                foreach ($authors as &$author) {
344                    $author = implode(' ', array_reverse(explode(', ', $author)));
345                }
346
347                $entry['author'] = implode(', ', $authors);
348            }
349        }
350
351        return $entries;
352    }
353
354    /**
355     *
356     */
357    public function outside($match, $state) {
358        /* Ignore everything outside the entries */
359        return true;
360    }
361
362    /**
363     *
364     */
365    public function entry($match, $state) {
366        switch ($state) {
367            case DOKU_LEXER_ENTER:
368                $this->entry = new refnotes_bibtex_entry(preg_replace('/@(\w+)\W+/', '$1', $match));
369                break;
370
371            case DOKU_LEXER_UNMATCHED:
372                $this->entry->handleUnmatched($match);
373                break;
374
375            case DOKU_LEXER_EXIT:
376                $this->entries->add($this->entry);
377                $this->entry = NULL;
378                break;
379        }
380
381        return true;
382    }
383
384    /**
385     *
386     */
387    public function field($match, $state) {
388        switch ($state) {
389            case DOKU_LEXER_ENTER:
390                $this->field = new refnotes_bibtex_field(preg_replace('/\W*(\w[\w-]+)\W*/', '$1', $match));
391                break;
392
393            case DOKU_LEXER_UNMATCHED:
394                $this->field->addToken('unmatched', $match);
395                break;
396
397            case DOKU_LEXER_EXIT:
398                $this->entry->addField($this->field);
399                $this->field = NULL;
400                break;
401        }
402
403        return true;
404    }
405
406    /**
407     *
408     */
409    public function integer_value($match, $state) {
410        $this->field->addToken('integer', $match);
411
412        return true;
413    }
414
415    /**
416     *
417     */
418    public function string_value($match, $state) {
419        if ($state == DOKU_LEXER_UNMATCHED) {
420            $this->field->addToken('string', $match);
421        }
422
423        return true;
424    }
425
426    /**
427     *
428     */
429    public function nested_braces($match, $state) {
430        if ($state == DOKU_LEXER_UNMATCHED) {
431            $this->field->addToken('braces', $match);
432        }
433
434        return true;
435    }
436
437    /**
438     *
439     */
440    public function concatenation($match, $state) {
441        /* Nothing special to do, concatenation will happen anyway */
442        return true;
443    }
444}
445
446////////////////////////////////////////////////////////////////////////////////////////////////////
447class refnotes_bibtex_entry_stash {
448
449    private $entry;
450    private $strings;
451    private $namespace;
452
453    /**
454     * Constructor
455     */
456    public function __construct() {
457        $this->entry = array();
458        $this->strings = new refnotes_bibtex_strings();
459        $this->namespace = ':';
460    }
461
462    /**
463     *
464     */
465    public function getEntries() {
466        return $this->entry;
467    }
468
469    /**
470     *
471     */
472    public function add($entry) {
473        static $entryType = array(
474            'article', 'book', 'booklet', 'conference', 'inbook', 'incollection', 'inproceedings', 'manual',
475            'mastersthesis', 'misc', 'phdthesis', 'proceedings', 'techreport', 'unpublished');
476
477        $type = $entry->getType();
478        $name = $entry->getName();
479
480        if (in_array($type, $entryType)) {
481            if ($this->isValidRefnotesName($name)) {
482                if ($name[0] != ':') {
483                    $name = $this->namespace . $name;
484                }
485
486                $this->entry[] = array_merge(array('note-name' => $name), $entry->getData($this->strings));
487            }
488        }
489        elseif ($type == 'string') {
490            $data = $entry->getData($this->strings);
491            $name = reset(array_keys($data));
492
493            if ($this->isValidStringName($name)) {
494                $this->strings->add($name, $data[$name]);
495            }
496        }
497        elseif (($type == 'comment') && (strtolower($name) == 'refnotes')) {
498            $data = $entry->getData($this->strings);
499
500            if (isset($data['namespace']) && $this->isValidRefnotesName($data['namespace'])) {
501                $this->namespace = refnotes_namespace::canonizeName($data['namespace']);
502            }
503        }
504    }
505
506    /**
507     *
508     */
509    private function isValidRefnotesName($name) {
510        return preg_match('/^' . refnotes_note::getNamePattern('full-extended') . '$/', $name) == 1;
511    }
512
513    /**
514     *
515     */
516    private function isValidStringName($name) {
517        return preg_match('/^[[:alpha:]]\w*$/', $name) == 1;
518    }
519}
520
521////////////////////////////////////////////////////////////////////////////////////////////////////
522class refnotes_bibtex_entry {
523
524    private $type;
525    private $name;
526    private $field;
527
528    /**
529     * Constructor
530     */
531    public function __construct($type) {
532        $this->type = strtolower($type);
533        $this->name = '';
534        $this->field = array();
535    }
536
537    /**
538     *
539     */
540    public function getType() {
541        return $this->type;
542    }
543
544    /**
545     *
546     */
547    public function getName() {
548        return $this->name;
549    }
550
551    /**
552     *
553     */
554    public function getData($strings) {
555        $data = array();
556
557        foreach ($this->field as $field) {
558            $data[$field->getName()] = $field->getValue($strings);
559        }
560
561        return $data;
562    }
563
564    /**
565     *
566     */
567    public function handleUnmatched($token) {
568        if (($this->name == '') && (preg_match('/\s*([^\s,]+)\s*,/', $token, $match) == 1)) {
569            $this->name = $match[1];
570        }
571    }
572
573    /**
574     *
575     */
576    public function addField($field) {
577        $this->field[] = $field;
578    }
579}
580
581////////////////////////////////////////////////////////////////////////////////////////////////////
582class refnotes_bibtex_field {
583
584    private $name;
585    private $token;
586
587    /**
588     * Constructor
589     */
590    public function __construct($name) {
591        $this->name = strtolower($name);
592        $this->token = array();
593    }
594
595    /**
596     *
597     */
598    public function getName() {
599        return $this->name;
600    }
601
602    /**
603     *
604     */
605    public function getValue($strings) {
606        $value = '';
607
608        foreach ($this->token as $token) {
609            $text = $token->text;
610
611            if ($token->type == 'unmatched') {
612                $text = $strings->lookup(strtolower(trim($text)));
613            }
614
615            $value .= $text;
616        }
617
618        return preg_replace('/\s+/', ' ', trim($value));
619    }
620
621    /**
622     *
623     */
624    public function addToken($type, $text) {
625        $this->token[] = new refnotes_bibtex_field_token($type, $text);
626    }
627}
628
629////////////////////////////////////////////////////////////////////////////////////////////////////
630class refnotes_bibtex_field_token {
631
632    public $type;
633    public $text;
634
635    /**
636     * Constructor
637     */
638    public function __construct($type, $text) {
639        $this->type = $type;
640        $this->text = $text;
641    }
642}
643
644////////////////////////////////////////////////////////////////////////////////////////////////////
645class refnotes_bibtex_strings {
646
647    private $string;
648
649    /**
650     * Constructor
651     */
652    public function __construct() {
653        $this->string = array();
654    }
655
656    /**
657     *
658     */
659    public function add($name, $value) {
660        $this->string[$name] = $value;
661    }
662
663    /**
664     *
665     */
666    public function lookup($name) {
667        return array_key_exists($name, $this->string) ? $this->string[$name] : '';
668    }
669}
670