1<?php 2/* 3Released through http://bibliophile.sourceforge.net under the GPL licence. 4Do whatever you like with this -- some credit to the author(s) would be appreciated. 5 6A collection of PHP classes to manipulate bibtex files. 7 8If you make improvements, please consider contacting the administrators at bibliophile.sourceforge.net so that your improvements can be added to the release package. 9 10Mark Grimshaw 2004/2005 11http://bibliophile.sourceforge.net 12 1328/04/2005 - Mark Grimshaw. Efficiency improvements. 14*/ 15// For a quick command-line test (php -f PARSECREATORS.php) after installation, uncomment these lines: 16 17/*********************** 18 $authors = "Mark N. Grimshaw and Bush III, G.W. & M. C. Hammer Jr. and von Frankenstein, Ferdinand Cecil, P.H. & Charles Louis Xavier Joseph de la Vallee Poussin"; 19 $creator = new PARSECREATORS(); 20 $creatorArray = $creator->parse($authors); 21 print_r($creatorArray); 22***********************/ 23 24class PARSECREATORS 25{ 26 function PARSECREATORS() 27 { 28 } 29/* Create writer arrays from bibtex input. 30'author field can be (delimiters between authors are 'and' or '&'): 311. <first-tokens> <von-tokens> <last-tokens> 322. <von-tokens> <last-tokens>, <first-tokens> 333. <von-tokens> <last-tokens>, <jr-tokens>, <first-tokens> 34*/ 35 function parse($input) 36 { 37 $input = trim($input); 38// split on ' and ' 39 $authorArray = preg_split("/\s(and|&)\s/i", $input); 40 foreach($authorArray as $value) 41 { 42 $appellation = $prefix = $surname = $firstname = $initials = ''; 43 $this->prefix = array(); 44 $author = explode(",", preg_replace("/\s{2,}/", ' ', trim($value))); 45 $size = sizeof($author); 46// No commas therefore something like Mark Grimshaw, Mark Nicholas Grimshaw, M N Grimshaw, Mark N. Grimshaw 47 if($size == 1) 48 { 49// Is complete surname enclosed in {...} 50 if(preg_match("/(.*){(.*)}/", $value, $matches)) 51 { 52 $author = split(" ", $matches[1]); 53 $surname = $matches[2]; 54 } 55 else 56 { 57 $author = split(" ", $value); 58// last of array is surname (no prefix if entered correctly) 59 $surname = array_pop($author); 60 } 61 } 62// Something like Grimshaw, Mark or Grimshaw, Mark Nicholas or Grimshaw, M N or Grimshaw, Mark N. 63 else if($size == 2) 64 { 65// first of array is surname (perhaps with prefix) 66 list($surname, $prefix) = $this->grabSurname(array_shift($author)); 67 } 68// If $size is 3, we're looking at something like Bush, Jr. III, George W 69 else 70 { 71// middle of array is 'Jr.', 'IV' etc. 72 $appellation = join(' ', array_splice($author, 1, 1)); 73// first of array is surname (perhaps with prefix) 74 list($surname, $prefix) = $this->grabSurname(array_shift($author)); 75 } 76 $remainder = join(" ", $author); 77 list($firstname, $initials) = $this->grabFirstnameInitials($remainder); 78 if(!empty($this->prefix)) 79 $prefix = join(' ', $this->prefix); 80 $surname = $surname . ' ' . $appellation; 81 $creators[] = array("$firstname", "$initials", "$surname", "$prefix"); 82 } 83 if(isset($creators)) 84 return $creators; 85 return FALSE; 86 } 87// grab firstname and initials which may be of form "A.B.C." or "A. B. C. " or " A B C " etc. 88 function grabFirstnameInitials($remainder) 89 { 90 $firstname = $initials = ''; 91 $array = split(" ", $remainder); 92 foreach($array as $value) 93 { 94 $firstChar = substr($value, 0, 1); 95 if((ord($firstChar) >= 97) && (ord($firstChar) <= 122)) 96 $this->prefix[] = $value; 97 else if(preg_match("/[a-zA-Z\-]{2,}/", trim($value))) 98 $firstnameArray[] = trim($value); 99 else 100 $initialsArray[] = str_replace(".", " ", trim($value)); 101 } 102 if(isset($initialsArray)) 103 { 104 foreach($initialsArray as $initial) 105 $initials .= ' ' . trim($initial); 106 } 107 if(isset($firstnameArray)) 108 $firstname = join(" ", $firstnameArray); 109 return array($firstname, $initials); 110 } 111// surname may have title such as 'den', 'von', 'de la' etc. - characterised by first character lowercased. Any 112// uppercased part means lowercased parts following are part of the surname (e.g. Van den Bussche) 113 function grabSurname($input) 114 { 115 $surnameArray = split(" ", $input); 116 $noPrefix = $surname = FALSE; 117 foreach($surnameArray as $value) 118 { 119 $firstChar = substr($value, 0, 1); 120 if(!$noPrefix && (ord($firstChar) >= 97) && (ord($firstChar) <= 122)) 121 $prefix[] = $value; 122 else 123 { 124 $surname[] = $value; 125 $noPrefix = TRUE; 126 } 127 } 128 if($surname) 129 $surname = join(" ", $surname); 130 if(isset($prefix)) 131 { 132 $prefix = join(" ", $prefix); 133 return array($surname, $prefix); 134 } 135 return array($surname, FALSE); 136 } 137} 138?> 139