1<?php
2/*
3Released through http://bibliophile.sourceforge.net under the GPL licence.
4Do whatever you like with this -- some credit to the author(s) would be appreciated.
5
6A collection of PHP classes to manipulate bibtex files.
7
8If you make improvements, please consider contacting the administrators at bibliophile.sourceforge.net so that your improvements can be added to the release package.
9
10Mark Grimshaw 2004/2005
11http://bibliophile.sourceforge.net
12
1328/04/2005 - Mark Grimshaw.  Efficiency improvements.
14*/
15// For a quick command-line test (php -f PARSECREATORS.php) after installation, uncomment these lines:
16
17/***********************
18	$authors = "Mark N. Grimshaw and Bush III, G.W. & M. C. Hammer Jr. and von Frankenstein, Ferdinand Cecil, P.H. & Charles Louis Xavier Joseph de la Vallee Poussin";
19	$creator = new PARSECREATORS();
20	$creatorArray = $creator->parse($authors);
21	print_r($creatorArray);
22***********************/
23
24class PARSECREATORS
25{
26	function PARSECREATORS()
27	{
28	}
29/* Create writer arrays from bibtex input.
30'author field can be (delimiters between authors are 'and' or '&'):
311. <first-tokens> <von-tokens> <last-tokens>
322. <von-tokens> <last-tokens>, <first-tokens>
333. <von-tokens> <last-tokens>, <jr-tokens>, <first-tokens>
34*/
35	function parse($input)
36	{
37		$input = trim($input);
38// split on ' and '
39		$authorArray = preg_split("/\s(and|&)\s/i", $input);
40		foreach($authorArray as $value)
41		{
42			$appellation = $prefix = $surname = $firstname = $initials = '';
43			$this->prefix = array();
44			$author = explode(",", preg_replace("/\s{2,}/", ' ', trim($value)));
45			$size = sizeof($author);
46// No commas therefore something like Mark Grimshaw, Mark Nicholas Grimshaw, M N Grimshaw, Mark N. Grimshaw
47			if($size == 1)
48			{
49// Is complete surname enclosed in {...}
50				if(preg_match("/(.*){(.*)}/", $value, $matches))
51				{
52					$author = split(" ", $matches[1]);
53					$surname = $matches[2];
54				}
55				else
56				{
57					$author = split(" ", $value);
58// last of array is surname (no prefix if entered correctly)
59					$surname = array_pop($author);
60				}
61			}
62// Something like Grimshaw, Mark or Grimshaw, Mark Nicholas  or Grimshaw, M N or Grimshaw, Mark N.
63			else if($size == 2)
64			{
65// first of array is surname (perhaps with prefix)
66				list($surname, $prefix) = $this->grabSurname(array_shift($author));
67			}
68// If $size is 3, we're looking at something like Bush, Jr. III, George W
69			else
70			{
71// middle of array is 'Jr.', 'IV' etc.
72				$appellation = join(' ', array_splice($author, 1, 1));
73// first of array is surname (perhaps with prefix)
74				list($surname, $prefix) = $this->grabSurname(array_shift($author));
75			}
76			$remainder = join(" ", $author);
77			list($firstname, $initials) = $this->grabFirstnameInitials($remainder);
78			if(!empty($this->prefix))
79				$prefix = join(' ', $this->prefix);
80			$surname = $surname . ' ' . $appellation;
81			$creators[] = array("$firstname", "$initials", "$surname", "$prefix");
82		}
83		if(isset($creators))
84			return $creators;
85		return FALSE;
86	}
87// grab firstname and initials which may be of form "A.B.C." or "A. B. C. " or " A B C " etc.
88	function grabFirstnameInitials($remainder)
89	{
90		$firstname = $initials = '';
91		$array = split(" ", $remainder);
92		foreach($array as $value)
93		{
94			$firstChar = substr($value, 0, 1);
95			if((ord($firstChar) >= 97) && (ord($firstChar) <= 122))
96				$this->prefix[] = $value;
97			else if(preg_match("/[a-zA-Z\-]{2,}/", trim($value)))
98				$firstnameArray[] = trim($value);
99			else
100				$initialsArray[] = str_replace(".", " ", trim($value));
101		}
102		if(isset($initialsArray))
103		{
104			foreach($initialsArray as $initial)
105				$initials .= ' ' . trim($initial);
106		}
107		if(isset($firstnameArray))
108			$firstname = join(" ", $firstnameArray);
109		return array($firstname, $initials);
110	}
111// surname may have title such as 'den', 'von', 'de la' etc. - characterised by first character lowercased.  Any
112// uppercased part means lowercased parts following are part of the surname (e.g. Van den Bussche)
113	function grabSurname($input)
114	{
115		$surnameArray = split(" ", $input);
116		$noPrefix = $surname = FALSE;
117		foreach($surnameArray as $value)
118		{
119			$firstChar = substr($value, 0, 1);
120			if(!$noPrefix && (ord($firstChar) >= 97) && (ord($firstChar) <= 122))
121				$prefix[] = $value;
122			else
123			{
124				$surname[] = $value;
125				$noPrefix = TRUE;
126			}
127		}
128		if($surname)
129			$surname = join(" ", $surname);
130		if(isset($prefix))
131		{
132			$prefix = join(" ", $prefix);
133			return array($surname, $prefix);
134		}
135		return array($surname, FALSE);
136	}
137}
138?>
139