1<?php 2 3/** 4 * Plugin RefNotes: BibTeX parser 5 * 6 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 7 * @author Mykola Ostrovskyy <dwpforge@gmail.com> 8 */ 9 10//////////////////////////////////////////////////////////////////////////////////////////////////// 11class refnotes_bibtex_parser extends \dokuwiki\Parsing\Parser { 12 13 private static $instance = NULL; 14 15 /** 16 * 17 */ 18 public static function getInstance() { 19 if (self::$instance == NULL) { 20 self::$instance = new refnotes_bibtex_parser(); 21 } 22 23 return self::$instance; 24 } 25 26 /** 27 * Constructor 28 */ 29 public function __construct() { 30 $this->handler = new refnotes_bibtex_handler(); 31 $this->lexer = new refnotes_bibtex_lexer($this->handler, 'base', true); 32 33 $this->addBibtexMode(new refnotes_bibtex_outside_mode()); 34 $this->addBibtexMode(new refnotes_bibtex_entry_mode('parented')); 35 $this->addBibtexMode(new refnotes_bibtex_entry_mode('braced')); 36 $this->addBibtexMode(new refnotes_bibtex_field_mode()); 37 $this->addBibtexMode(new refnotes_bibtex_integer_value_mode()); 38 $this->addBibtexMode(new refnotes_bibtex_string_value_mode('quoted')); 39 $this->addBibtexMode(new refnotes_bibtex_string_value_mode('braced')); 40 $this->addBibtexMode(new refnotes_bibtex_nested_braces_mode('quoted')); 41 $this->addBibtexMode(new refnotes_bibtex_nested_braces_mode('braced')); 42 $this->addBibtexMode(new refnotes_bibtex_concatenation_mode()); 43 } 44 45 /** 46 * 47 */ 48 private function addBibtexMode($mode) { 49 $this->addMode($mode->getName(), $mode); 50 } 51 52 /** 53 * 54 */ 55 public function connectModes() { 56 if (!$this->connected) { 57 $this->modes['outside']->connectTo('base'); 58 $this->modes['entry_parented']->connectTo('base'); 59 $this->modes['entry_braced']->connectTo('base'); 60 61 parent::connectModes(); 62 } 63 } 64 65 /** 66 * 67 */ 68 public function parse($text) { 69 $this->connectModes(); 70 71 $this->handler->reset(); 72 $this->lexer->parse(str_replace("\r\n", "\n", $text)); 73 74 return $this->handler->finalize(); 75 } 76} 77 78//////////////////////////////////////////////////////////////////////////////////////////////////// 79class refnotes_bibtex_lexer extends \dokuwiki\Parsing\Lexer\Lexer { 80 81 /** 82 * 83 */ 84 public function parse($text) { 85 $lastMode = ''; 86 87 while (is_array($parsed = $this->reduce($text))) { 88 list($unmatched, $matched, $mode) = $parsed; 89 90 if (!$this->dispatchTokens($unmatched, $matched, $mode, 0, 0)) { 91 return false; 92 } 93 94 if (empty($unmatched) && empty($matched) && ($lastMode == $this->modeStack->getCurrent())) { 95 return false; 96 } 97 98 $lastMode = $this->modeStack->getCurrent(); 99 } 100 101 if (!$parsed) { 102 return false; 103 } 104 105 return $this->invokeHandler($text, DOKU_LEXER_UNMATCHED, 0); 106 } 107 108 /** 109 * 110 */ 111 protected function invokeHandler($text, $state, $pos) { 112 if ($text == "" && $state == DOKU_LEXER_UNMATCHED) { 113 return true; 114 } 115 116 $mode = $this->modeStack->getCurrent(); 117 $handler = isset($this->mode_handlers[$mode]) ? $this->mode_handlers[$mode] : $mode; 118 119 return $this->handler->$handler($text, $state, $pos); 120 } 121} 122 123//////////////////////////////////////////////////////////////////////////////////////////////////// 124class refnotes_bibtex_mode extends \dokuwiki\Parsing\ParserMode\AbstractMode { 125 126 protected $name; 127 protected $handler; 128 protected $specialPattern; 129 protected $entryPattern; 130 protected $exitPattern; 131 132 /** 133 * Constructor 134 */ 135 public function __construct() { 136 $this->name = preg_replace('/refnotes_bibtex_(\w+)_mode/', '$1', get_class($this)); 137 $this->handler = ''; 138 139 $this->specialPattern = array(); 140 $this->entryPattern = array(); 141 $this->exitPattern = array(); 142 } 143 144 /** 145 * 146 */ 147 public function getSort() { 148 return 0; 149 } 150 151 /** 152 * 153 */ 154 public function getName() { 155 return $this->name; 156 } 157 158 /** 159 * 160 */ 161 public function connectTo($mode) { 162 foreach ($this->specialPattern as $pattern) { 163 $this->Lexer->addSpecialPattern($pattern, $mode, $this->name); 164 } 165 166 foreach ($this->entryPattern as $pattern) { 167 $this->Lexer->addEntryPattern($pattern, $mode, $this->name); 168 } 169 170 if ($this->handler != '') { 171 $this->Lexer->mapHandler($this->name, $this->handler); 172 } 173 } 174 175 /** 176 * 177 */ 178 public function postConnect() { 179 foreach ($this->exitPattern as $pattern) { 180 $this->Lexer->addExitPattern($pattern, $this->name); 181 } 182 } 183} 184 185//////////////////////////////////////////////////////////////////////////////////////////////////// 186class refnotes_bibtex_outside_mode extends refnotes_bibtex_mode { 187 188 /** 189 * Constructor 190 */ 191 public function __construct() { 192 parent::__construct(); 193 194 $this->specialPattern[] = '[^@]+(?=@)'; 195 } 196 197 /** 198 * 199 */ 200 public function connectTo($mode) { 201 parent::connectTo($mode); 202 203 $this->Lexer->mapHandler('base', $this->name); 204 } 205} 206 207//////////////////////////////////////////////////////////////////////////////////////////////////// 208class refnotes_bibtex_entry_mode extends refnotes_bibtex_mode { 209 210 /** 211 * Constructor 212 */ 213 public function __construct($type) { 214 parent::__construct(); 215 216 $this->handler = $this->name; 217 $this->name .= '_' . $type; 218 219 list($open, $close) = ($type == 'parented') ? array('\(', '\)') : array('{', '}'); 220 221 $this->entryPattern[] = '^@\w+\s*' . $open . '(?=.*' . $close . ')'; 222 $this->exitPattern[] = '\s*(?:' . $close . '|(?=@))'; 223 224 $this->allowedModes = array('field'); 225 } 226} 227 228//////////////////////////////////////////////////////////////////////////////////////////////////// 229class refnotes_bibtex_field_mode extends refnotes_bibtex_mode { 230 231 /** 232 * Constructor 233 */ 234 public function __construct() { 235 parent::__construct(); 236 237 $this->entryPattern[] = '^\s*\w[\w-]+\s*=\s*'; 238 $this->exitPattern[] = '\s*(?:,|(?=[\)}@]))'; 239 240 $this->allowedModes = array('integer_value', 'string_value_quoted', 'string_value_braced', 'concatenation'); 241 } 242} 243 244//////////////////////////////////////////////////////////////////////////////////////////////////// 245class refnotes_bibtex_integer_value_mode extends refnotes_bibtex_mode { 246 247 /** 248 * Constructor 249 */ 250 public function __construct() { 251 parent::__construct(); 252 253 $this->specialPattern[] = '^\d+'; 254 } 255} 256 257//////////////////////////////////////////////////////////////////////////////////////////////////// 258class refnotes_bibtex_string_value_mode extends refnotes_bibtex_mode { 259 260 /** 261 * Constructor 262 */ 263 public function __construct($type) { 264 parent::__construct(); 265 266 $this->handler = $this->name; 267 $this->name .= '_' . $type; 268 269 list($open, $close, $exit) = ($type == 'quoted') ? array('"', '"', '"') : array('{', '}', '(?:}|(?=@))'); 270 271 $this->entryPattern[] = '^' . $open . '(?=.*' . $close . ')'; 272 $this->exitPattern[] = $exit; 273 274 $this->allowedModes = array('nested_braces_' . $type); 275 } 276} 277 278//////////////////////////////////////////////////////////////////////////////////////////////////// 279class refnotes_bibtex_nested_braces_mode extends refnotes_bibtex_mode { 280 281 /** 282 * Constructor 283 */ 284 public function __construct($type) { 285 parent::__construct(); 286 287 $this->handler = $this->name; 288 $this->name .= '_' . $type; 289 290 $this->entryPattern[] = '{(?=.*})'; 291 $this->exitPattern[] = ($type == 'quoted') ? '}' : '(?:}|(?=@))'; 292 293 $this->allowedModes = array($this->name); 294 } 295} 296 297//////////////////////////////////////////////////////////////////////////////////////////////////// 298class refnotes_bibtex_concatenation_mode extends refnotes_bibtex_mode { 299 300 /** 301 * Constructor 302 */ 303 public function __construct() { 304 parent::__construct(); 305 306 $this->specialPattern[] = '\s*#\s*'; 307 } 308} 309 310//////////////////////////////////////////////////////////////////////////////////////////////////// 311class refnotes_bibtex_handler { 312 313 private $entries; 314 private $entry; 315 private $field; 316 317 /** 318 * Constructor 319 */ 320 public function __construct() { 321 $this->reset(); 322 } 323 324 /** 325 * 326 */ 327 public function reset() { 328 $this->entries = new refnotes_bibtex_entry_stash(); 329 $this->entry = NULL; 330 $this->field = NULL; 331 } 332 333 /** 334 * 335 */ 336 public function finalize() { 337 $entries = $this->entries->getEntries(); 338 339 foreach ($entries as &$entry) { 340 if (array_key_exists('author', $entry)) { 341 $authors = explode(' and ', $entry['author']); 342 343 foreach ($authors as &$author) { 344 $author = implode(' ', array_reverse(explode(', ', $author))); 345 } 346 347 $entry['author'] = implode(', ', $authors); 348 } 349 } 350 351 return $entries; 352 } 353 354 /** 355 * 356 */ 357 public function outside($match, $state) { 358 /* Ignore everything outside the entries */ 359 return true; 360 } 361 362 /** 363 * 364 */ 365 public function entry($match, $state) { 366 switch ($state) { 367 case DOKU_LEXER_ENTER: 368 $this->entry = new refnotes_bibtex_entry(preg_replace('/@(\w+)\W+/', '$1', $match)); 369 break; 370 371 case DOKU_LEXER_UNMATCHED: 372 $this->entry->handleUnmatched($match); 373 break; 374 375 case DOKU_LEXER_EXIT: 376 $this->entries->add($this->entry); 377 $this->entry = NULL; 378 break; 379 } 380 381 return true; 382 } 383 384 /** 385 * 386 */ 387 public function field($match, $state) { 388 switch ($state) { 389 case DOKU_LEXER_ENTER: 390 $this->field = new refnotes_bibtex_field(preg_replace('/\W*(\w[\w-]+)\W*/', '$1', $match)); 391 break; 392 393 case DOKU_LEXER_UNMATCHED: 394 $this->field->addToken('unmatched', $match); 395 break; 396 397 case DOKU_LEXER_EXIT: 398 $this->entry->addField($this->field); 399 $this->field = NULL; 400 break; 401 } 402 403 return true; 404 } 405 406 /** 407 * 408 */ 409 public function integer_value($match, $state) { 410 $this->field->addToken('integer', $match); 411 412 return true; 413 } 414 415 /** 416 * 417 */ 418 public function string_value($match, $state) { 419 if ($state == DOKU_LEXER_UNMATCHED) { 420 $this->field->addToken('string', $match); 421 } 422 423 return true; 424 } 425 426 /** 427 * 428 */ 429 public function nested_braces($match, $state) { 430 if ($state == DOKU_LEXER_UNMATCHED) { 431 $this->field->addToken('braces', $match); 432 } 433 434 return true; 435 } 436 437 /** 438 * 439 */ 440 public function concatenation($match, $state) { 441 /* Nothing special to do, concatenation will happen anyway */ 442 return true; 443 } 444} 445 446//////////////////////////////////////////////////////////////////////////////////////////////////// 447class refnotes_bibtex_entry_stash { 448 449 private $entry; 450 private $strings; 451 private $namespace; 452 453 /** 454 * Constructor 455 */ 456 public function __construct() { 457 $this->entry = array(); 458 $this->strings = new refnotes_bibtex_strings(); 459 $this->namespace = ':'; 460 } 461 462 /** 463 * 464 */ 465 public function getEntries() { 466 return $this->entry; 467 } 468 469 /** 470 * 471 */ 472 public function add($entry) { 473 static $entryType = array( 474 'article', 'book', 'booklet', 'conference', 'inbook', 'incollection', 'inproceedings', 'manual', 475 'mastersthesis', 'misc', 'phdthesis', 'proceedings', 'techreport', 'unpublished'); 476 477 $type = $entry->getType(); 478 $name = $entry->getName(); 479 480 if (in_array($type, $entryType)) { 481 if ($this->isValidRefnotesName($name)) { 482 if ($name[0] != ':') { 483 $name = $this->namespace . $name; 484 } 485 486 $this->entry[] = array_merge(array('note-name' => $name), $entry->getData($this->strings)); 487 } 488 } 489 elseif ($type == 'string') { 490 $data = $entry->getData($this->strings); 491 $name = reset(array_keys($data)); 492 493 if ($this->isValidStringName($name)) { 494 $this->strings->add($name, $data[$name]); 495 } 496 } 497 elseif (($type == 'comment') && (strtolower($name) == 'refnotes')) { 498 $data = $entry->getData($this->strings); 499 500 if (isset($data['namespace']) && $this->isValidRefnotesName($data['namespace'])) { 501 $this->namespace = refnotes_namespace::canonizeName($data['namespace']); 502 } 503 } 504 } 505 506 /** 507 * 508 */ 509 private function isValidRefnotesName($name) { 510 return preg_match('/^' . refnotes_note::getNamePattern('full-extended') . '$/', $name) == 1; 511 } 512 513 /** 514 * 515 */ 516 private function isValidStringName($name) { 517 return preg_match('/^[[:alpha:]]\w*$/', $name) == 1; 518 } 519} 520 521//////////////////////////////////////////////////////////////////////////////////////////////////// 522class refnotes_bibtex_entry { 523 524 private $type; 525 private $name; 526 private $field; 527 528 /** 529 * Constructor 530 */ 531 public function __construct($type) { 532 $this->type = strtolower($type); 533 $this->name = ''; 534 $this->field = array(); 535 } 536 537 /** 538 * 539 */ 540 public function getType() { 541 return $this->type; 542 } 543 544 /** 545 * 546 */ 547 public function getName() { 548 return $this->name; 549 } 550 551 /** 552 * 553 */ 554 public function getData($strings) { 555 $data = array(); 556 557 foreach ($this->field as $field) { 558 $data[$field->getName()] = $field->getValue($strings); 559 } 560 561 return $data; 562 } 563 564 /** 565 * 566 */ 567 public function handleUnmatched($token) { 568 if (($this->name == '') && (preg_match('/\s*([^\s,]+)\s*,/', $token, $match) == 1)) { 569 $this->name = $match[1]; 570 } 571 } 572 573 /** 574 * 575 */ 576 public function addField($field) { 577 $this->field[] = $field; 578 } 579} 580 581//////////////////////////////////////////////////////////////////////////////////////////////////// 582class refnotes_bibtex_field { 583 584 private $name; 585 private $token; 586 587 /** 588 * Constructor 589 */ 590 public function __construct($name) { 591 $this->name = strtolower($name); 592 $this->token = array(); 593 } 594 595 /** 596 * 597 */ 598 public function getName() { 599 return $this->name; 600 } 601 602 /** 603 * 604 */ 605 public function getValue($strings) { 606 $value = ''; 607 608 foreach ($this->token as $token) { 609 $text = $token->text; 610 611 if ($token->type == 'unmatched') { 612 $text = $strings->lookup(strtolower(trim($text))); 613 } 614 615 $value .= $text; 616 } 617 618 return preg_replace('/\s+/', ' ', trim($value)); 619 } 620 621 /** 622 * 623 */ 624 public function addToken($type, $text) { 625 $this->token[] = new refnotes_bibtex_field_token($type, $text); 626 } 627} 628 629//////////////////////////////////////////////////////////////////////////////////////////////////// 630class refnotes_bibtex_field_token { 631 632 public $type; 633 public $text; 634 635 /** 636 * Constructor 637 */ 638 public function __construct($type, $text) { 639 $this->type = $type; 640 $this->text = $text; 641 } 642} 643 644//////////////////////////////////////////////////////////////////////////////////////////////////// 645class refnotes_bibtex_strings { 646 647 private $string; 648 649 /** 650 * Constructor 651 */ 652 public function __construct() { 653 $this->string = array(); 654 } 655 656 /** 657 * 658 */ 659 public function add($name, $value) { 660 $this->string[$name] = $value; 661 } 662 663 /** 664 * 665 */ 666 public function lookup($name) { 667 return array_key_exists($name, $this->string) ? $this->string[$name] : ''; 668 } 669} 670