1<?php
2
3/////////////////////////////////////////////////////////////////
4/// getID3() by James Heinrich <info@getid3.org>               //
5//  available at https://github.com/JamesHeinrich/getID3       //
6//            or https://www.getid3.org                        //
7//            or http://getid3.sourceforge.net                 //
8//  see readme.txt for more details                            //
9/////////////////////////////////////////////////////////////////
10//                                                             //
11// module.tag.id3v1.php                                        //
12// module for analyzing ID3v1 tags                             //
13// dependencies: NONE                                          //
14//                                                            ///
15/////////////////////////////////////////////////////////////////
16
17if (!defined('GETID3_INCLUDEPATH')) { // prevent path-exposing attacks that access modules directly on public webservers
18	exit;
19}
20
21class getid3_id3v1 extends getid3_handler
22{
23	/**
24	 * @return bool
25	 */
26	public function Analyze() {
27		$info = &$this->getid3->info;
28
29		if (!getid3_lib::intValueSupported($info['filesize'])) {
30			$this->warning('Unable to check for ID3v1 because file is larger than '.round(PHP_INT_MAX / 1073741824).'GB');
31			return false;
32		}
33
34		$this->fseek(-256, SEEK_END);
35		$preid3v1 = $this->fread(128);
36		$id3v1tag = $this->fread(128);
37
38		if (substr($id3v1tag, 0, 3) == 'TAG') {
39
40			$info['avdataend'] = $info['filesize'] - 128;
41
42			$ParsedID3v1['title']   = $this->cutfield(substr($id3v1tag,   3, 30));
43			$ParsedID3v1['artist']  = $this->cutfield(substr($id3v1tag,  33, 30));
44			$ParsedID3v1['album']   = $this->cutfield(substr($id3v1tag,  63, 30));
45			$ParsedID3v1['year']    = $this->cutfield(substr($id3v1tag,  93,  4));
46			$ParsedID3v1['comment'] =                 substr($id3v1tag,  97, 30);  // can't remove nulls yet, track detection depends on them
47			$ParsedID3v1['genreid'] =             ord(substr($id3v1tag, 127,  1));
48
49			// If second-last byte of comment field is null and last byte of comment field is non-null
50			// then this is ID3v1.1 and the comment field is 28 bytes long and the 30th byte is the track number
51			if (($id3v1tag[125] === "\x00") && ($id3v1tag[126] !== "\x00")) {
52				$ParsedID3v1['track_number'] = ord(substr($ParsedID3v1['comment'], 29,  1));
53				$ParsedID3v1['comment']      =     substr($ParsedID3v1['comment'],  0, 28);
54			}
55			$ParsedID3v1['comment'] = $this->cutfield($ParsedID3v1['comment']);
56
57			$ParsedID3v1['genre'] = $this->LookupGenreName($ParsedID3v1['genreid']);
58			if (!empty($ParsedID3v1['genre'])) {
59				unset($ParsedID3v1['genreid']);
60			}
61			if (isset($ParsedID3v1['genre']) && (empty($ParsedID3v1['genre']) || ($ParsedID3v1['genre'] == 'Unknown'))) {
62				unset($ParsedID3v1['genre']);
63			}
64
65			foreach ($ParsedID3v1 as $key => $value) {
66				$ParsedID3v1['comments'][$key][0] = $value;
67			}
68			$ID3v1encoding = $this->getid3->encoding_id3v1;
69			if ($this->getid3->encoding_id3v1_autodetect) {
70				// ID3v1 encoding detection hack START
71				// ID3v1 is defined as always using ISO-8859-1 encoding, but it is not uncommon to find files tagged with ID3v1 using Windows-1251 or other character sets
72				// Since ID3v1 has no concept of character sets there is no certain way to know we have the correct non-ISO-8859-1 character set, but we can guess
73				foreach ($ParsedID3v1['comments'] as $tag_key => $valuearray) {
74					foreach ($valuearray as $key => $value) {
75						if (preg_match('#^[\\x00-\\x40\\x80-\\xFF]+$#', $value) && !ctype_digit((string) $value)) { // check for strings with only characters above chr(128) and punctuation/numbers, but not just numeric strings (e.g. track numbers or years)
76							foreach (array('Windows-1251', 'KOI8-R') as $id3v1_bad_encoding) {
77								if (function_exists('mb_convert_encoding') && @mb_convert_encoding($value, $id3v1_bad_encoding, $id3v1_bad_encoding) === $value) {
78									$ID3v1encoding = $id3v1_bad_encoding;
79									$this->warning('ID3v1 detected as '.$id3v1_bad_encoding.' text encoding in '.$tag_key);
80									break 3;
81								} elseif (function_exists('iconv') && @iconv($id3v1_bad_encoding, $id3v1_bad_encoding, $value) === $value) {
82									$ID3v1encoding = $id3v1_bad_encoding;
83									$this->warning('ID3v1 detected as '.$id3v1_bad_encoding.' text encoding in '.$tag_key);
84									break 3;
85								}
86							}
87						}
88					}
89				}
90				// ID3v1 encoding detection hack END
91			}
92
93			// ID3v1 data is supposed to be padded with NULL characters, but some taggers pad with spaces
94			$GoodFormatID3v1tag = $this->GenerateID3v1Tag(
95											$ParsedID3v1['title'],
96											$ParsedID3v1['artist'],
97											$ParsedID3v1['album'],
98											$ParsedID3v1['year'],
99											(isset($ParsedID3v1['genre']) ? $this->LookupGenreID($ParsedID3v1['genre']) : false),
100											$ParsedID3v1['comment'],
101											(!empty($ParsedID3v1['track_number']) ? $ParsedID3v1['track_number'] : ''));
102			$ParsedID3v1['padding_valid'] = true;
103			if ($id3v1tag !== $GoodFormatID3v1tag) {
104				$ParsedID3v1['padding_valid'] = false;
105				$this->warning('Some ID3v1 fields do not use NULL characters for padding');
106			}
107
108			$ParsedID3v1['tag_offset_end']   = $info['filesize'];
109			$ParsedID3v1['tag_offset_start'] = $ParsedID3v1['tag_offset_end'] - 128;
110
111			$info['id3v1'] = $ParsedID3v1;
112			$info['id3v1']['encoding'] = $ID3v1encoding;
113		}
114
115		if (substr($preid3v1, 0, 3) == 'TAG') {
116			// The way iTunes handles tags is, well, brain-damaged.
117			// It completely ignores v1 if ID3v2 is present.
118			// This goes as far as adding a new v1 tag *even if there already is one*
119
120			// A suspected double-ID3v1 tag has been detected, but it could be that
121			// the "TAG" identifier is a legitimate part of an APE or Lyrics3 tag
122			if (substr($preid3v1, 96, 8) == 'APETAGEX') {
123				// an APE tag footer was found before the last ID3v1, assume false "TAG" synch
124			} elseif (substr($preid3v1, 119, 6) == 'LYRICS') {
125				// a Lyrics3 tag footer was found before the last ID3v1, assume false "TAG" synch
126			} else {
127				// APE and Lyrics3 footers not found - assume double ID3v1
128				$this->warning('Duplicate ID3v1 tag detected - this has been known to happen with iTunes');
129				$info['avdataend'] -= 128;
130			}
131		}
132
133		return true;
134	}
135
136	/**
137	 * @param string $str
138	 *
139	 * @return string
140	 */
141	public static function cutfield($str) {
142		return trim(substr($str, 0, strcspn($str, "\x00")));
143	}
144
145	/**
146	 * @param bool $allowSCMPXextended
147	 *
148	 * @return string[]
149	 */
150	public static function ArrayOfGenres($allowSCMPXextended=false) {
151		static $GenreLookup = array(
152			0    => 'Blues',
153			1    => 'Classic Rock',
154			2    => 'Country',
155			3    => 'Dance',
156			4    => 'Disco',
157			5    => 'Funk',
158			6    => 'Grunge',
159			7    => 'Hip-Hop',
160			8    => 'Jazz',
161			9    => 'Metal',
162			10   => 'New Age',
163			11   => 'Oldies',
164			12   => 'Other',
165			13   => 'Pop',
166			14   => 'R&B',
167			15   => 'Rap',
168			16   => 'Reggae',
169			17   => 'Rock',
170			18   => 'Techno',
171			19   => 'Industrial',
172			20   => 'Alternative',
173			21   => 'Ska',
174			22   => 'Death Metal',
175			23   => 'Pranks',
176			24   => 'Soundtrack',
177			25   => 'Euro-Techno',
178			26   => 'Ambient',
179			27   => 'Trip-Hop',
180			28   => 'Vocal',
181			29   => 'Jazz+Funk',
182			30   => 'Fusion',
183			31   => 'Trance',
184			32   => 'Classical',
185			33   => 'Instrumental',
186			34   => 'Acid',
187			35   => 'House',
188			36   => 'Game',
189			37   => 'Sound Clip',
190			38   => 'Gospel',
191			39   => 'Noise',
192			40   => 'Alt. Rock',
193			41   => 'Bass',
194			42   => 'Soul',
195			43   => 'Punk',
196			44   => 'Space',
197			45   => 'Meditative',
198			46   => 'Instrumental Pop',
199			47   => 'Instrumental Rock',
200			48   => 'Ethnic',
201			49   => 'Gothic',
202			50   => 'Darkwave',
203			51   => 'Techno-Industrial',
204			52   => 'Electronic',
205			53   => 'Pop-Folk',
206			54   => 'Eurodance',
207			55   => 'Dream',
208			56   => 'Southern Rock',
209			57   => 'Comedy',
210			58   => 'Cult',
211			59   => 'Gangsta Rap',
212			60   => 'Top 40',
213			61   => 'Christian Rap',
214			62   => 'Pop/Funk',
215			63   => 'Jungle',
216			64   => 'Native American',
217			65   => 'Cabaret',
218			66   => 'New Wave',
219			67   => 'Psychedelic',
220			68   => 'Rave',
221			69   => 'Showtunes',
222			70   => 'Trailer',
223			71   => 'Lo-Fi',
224			72   => 'Tribal',
225			73   => 'Acid Punk',
226			74   => 'Acid Jazz',
227			75   => 'Polka',
228			76   => 'Retro',
229			77   => 'Musical',
230			78   => 'Rock & Roll',
231			79   => 'Hard Rock',
232			80   => 'Folk',
233			81   => 'Folk/Rock',
234			82   => 'National Folk',
235			83   => 'Swing',
236			84   => 'Fast-Fusion',
237			85   => 'Bebob',
238			86   => 'Latin',
239			87   => 'Revival',
240			88   => 'Celtic',
241			89   => 'Bluegrass',
242			90   => 'Avantgarde',
243			91   => 'Gothic Rock',
244			92   => 'Progressive Rock',
245			93   => 'Psychedelic Rock',
246			94   => 'Symphonic Rock',
247			95   => 'Slow Rock',
248			96   => 'Big Band',
249			97   => 'Chorus',
250			98   => 'Easy Listening',
251			99   => 'Acoustic',
252			100  => 'Humour',
253			101  => 'Speech',
254			102  => 'Chanson',
255			103  => 'Opera',
256			104  => 'Chamber Music',
257			105  => 'Sonata',
258			106  => 'Symphony',
259			107  => 'Booty Bass',
260			108  => 'Primus',
261			109  => 'Porn Groove',
262			110  => 'Satire',
263			111  => 'Slow Jam',
264			112  => 'Club',
265			113  => 'Tango',
266			114  => 'Samba',
267			115  => 'Folklore',
268			116  => 'Ballad',
269			117  => 'Power Ballad',
270			118  => 'Rhythmic Soul',
271			119  => 'Freestyle',
272			120  => 'Duet',
273			121  => 'Punk Rock',
274			122  => 'Drum Solo',
275			123  => 'A Cappella',
276			124  => 'Euro-House',
277			125  => 'Dance Hall',
278			126  => 'Goa',
279			127  => 'Drum & Bass',
280			128  => 'Club-House',
281			129  => 'Hardcore',
282			130  => 'Terror',
283			131  => 'Indie',
284			132  => 'BritPop',
285			133  => 'Negerpunk',
286			134  => 'Polsk Punk',
287			135  => 'Beat',
288			136  => 'Christian Gangsta Rap',
289			137  => 'Heavy Metal',
290			138  => 'Black Metal',
291			139  => 'Crossover',
292			140  => 'Contemporary Christian',
293			141  => 'Christian Rock',
294			142  => 'Merengue',
295			143  => 'Salsa',
296			144  => 'Thrash Metal',
297			145  => 'Anime',
298			146  => 'JPop',
299			147  => 'Synthpop',
300
301			255  => 'Unknown',
302
303			'CR' => 'Cover',
304			'RX' => 'Remix'
305		);
306
307		static $GenreLookupSCMPX = array();
308		if ($allowSCMPXextended && empty($GenreLookupSCMPX)) {
309			$GenreLookupSCMPX = $GenreLookup;
310			// http://www.geocities.co.jp/SiliconValley-Oakland/3664/alittle.html#GenreExtended
311			// Extended ID3v1 genres invented by SCMPX
312			// Note that 255 "Japanese Anime" conflicts with standard "Unknown"
313			$GenreLookupSCMPX[240] = 'Sacred';
314			$GenreLookupSCMPX[241] = 'Northern Europe';
315			$GenreLookupSCMPX[242] = 'Irish & Scottish';
316			$GenreLookupSCMPX[243] = 'Scotland';
317			$GenreLookupSCMPX[244] = 'Ethnic Europe';
318			$GenreLookupSCMPX[245] = 'Enka';
319			$GenreLookupSCMPX[246] = 'Children\'s Song';
320			$GenreLookupSCMPX[247] = 'Japanese Sky';
321			$GenreLookupSCMPX[248] = 'Japanese Heavy Rock';
322			$GenreLookupSCMPX[249] = 'Japanese Doom Rock';
323			$GenreLookupSCMPX[250] = 'Japanese J-POP';
324			$GenreLookupSCMPX[251] = 'Japanese Seiyu';
325			$GenreLookupSCMPX[252] = 'Japanese Ambient Techno';
326			$GenreLookupSCMPX[253] = 'Japanese Moemoe';
327			$GenreLookupSCMPX[254] = 'Japanese Tokusatsu';
328			//$GenreLookupSCMPX[255] = 'Japanese Anime';
329		}
330
331		return ($allowSCMPXextended ? $GenreLookupSCMPX : $GenreLookup);
332	}
333
334	/**
335	 * @param string $genreid
336	 * @param bool   $allowSCMPXextended
337	 *
338	 * @return string|false
339	 */
340	public static function LookupGenreName($genreid, $allowSCMPXextended=true) {
341		switch ($genreid) {
342			case 'RX':
343			case 'CR':
344				break;
345			default:
346				if (!is_numeric($genreid)) {
347					return false;
348				}
349				$genreid = intval($genreid); // to handle 3 or '3' or '03'
350				break;
351		}
352		$GenreLookup = self::ArrayOfGenres($allowSCMPXextended);
353		return (isset($GenreLookup[$genreid]) ? $GenreLookup[$genreid] : false);
354	}
355
356	/**
357	 * @param string $genre
358	 * @param bool   $allowSCMPXextended
359	 *
360	 * @return string|false
361	 */
362	public static function LookupGenreID($genre, $allowSCMPXextended=false) {
363		$GenreLookup = self::ArrayOfGenres($allowSCMPXextended);
364		$LowerCaseNoSpaceSearchTerm = strtolower(str_replace(' ', '', $genre));
365		foreach ($GenreLookup as $key => $value) {
366			if (strtolower(str_replace(' ', '', $value)) == $LowerCaseNoSpaceSearchTerm) {
367				return $key;
368			}
369		}
370		return false;
371	}
372
373	/**
374	 * @param string $OriginalGenre
375	 *
376	 * @return string|false
377	 */
378	public static function StandardiseID3v1GenreName($OriginalGenre) {
379		if (($GenreID = self::LookupGenreID($OriginalGenre)) !== false) {
380			return self::LookupGenreName($GenreID);
381		}
382		return $OriginalGenre;
383	}
384
385	/**
386	 * @param string     $title
387	 * @param string     $artist
388	 * @param string     $album
389	 * @param string     $year
390	 * @param int        $genreid
391	 * @param string     $comment
392	 * @param int|string $track
393	 *
394	 * @return string
395	 */
396	public static function GenerateID3v1Tag($title, $artist, $album, $year, $genreid, $comment, $track='') {
397		$ID3v1Tag  = 'TAG';
398		$ID3v1Tag .= str_pad(trim(substr($title,  0, 30)), 30, "\x00", STR_PAD_RIGHT);
399		$ID3v1Tag .= str_pad(trim(substr($artist, 0, 30)), 30, "\x00", STR_PAD_RIGHT);
400		$ID3v1Tag .= str_pad(trim(substr($album,  0, 30)), 30, "\x00", STR_PAD_RIGHT);
401		$ID3v1Tag .= str_pad(trim(substr($year,   0,  4)),  4, "\x00", STR_PAD_LEFT);
402		if (!empty($track) && ($track > 0) && ($track <= 255)) {
403			$ID3v1Tag .= str_pad(trim(substr($comment, 0, 28)), 28, "\x00", STR_PAD_RIGHT);
404			$ID3v1Tag .= "\x00";
405			if (gettype($track) == 'string') {
406				$track = (int) $track;
407			}
408			$ID3v1Tag .= chr($track);
409		} else {
410			$ID3v1Tag .= str_pad(trim(substr($comment, 0, 30)), 30, "\x00", STR_PAD_RIGHT);
411		}
412		if (($genreid < 0) || ($genreid > 147)) {
413			$genreid = 255; // 'unknown' genre
414		}
415		switch (gettype($genreid)) {
416			case 'string':
417			case 'integer':
418				$ID3v1Tag .= chr(intval($genreid));
419				break;
420			default:
421				$ID3v1Tag .= chr(255); // 'unknown' genre
422				break;
423		}
424
425		return $ID3v1Tag;
426	}
427
428}
429