1<?php
2
3/////////////////////////////////////////////////////////////////
4/// getID3() by James Heinrich <info@getid3.org>               //
5//  available at https://github.com/JamesHeinrich/getID3       //
6//            or https://www.getid3.org                        //
7//            or http://getid3.sourceforge.net                 //
8//  see readme.txt for more details                            //
9/////////////////////////////////////////////////////////////////
10//                                                             //
11// module.tag.apetag.php                                       //
12// module for analyzing APE tags                               //
13// dependencies: NONE                                          //
14//                                                            ///
15/////////////////////////////////////////////////////////////////
16
17if (!defined('GETID3_INCLUDEPATH')) { // prevent path-exposing attacks that access modules directly on public webservers
18	exit;
19}
20
21class getid3_apetag extends getid3_handler
22{
23	/**
24	 * true: return full data for all attachments;
25	 * false: return no data for all attachments;
26	 * integer: return data for attachments <= than this;
27	 * string: save as file to this directory.
28	 *
29	 * @var int|bool|string
30	 */
31	public $inline_attachments = true;
32
33	public $overrideendoffset  = 0;
34
35	/**
36	 * @return bool
37	 */
38	public function Analyze() {
39		$info = &$this->getid3->info;
40
41		if (!getid3_lib::intValueSupported($info['filesize'])) {
42			$this->warning('Unable to check for APEtags because file is larger than '.round(PHP_INT_MAX / 1073741824).'GB');
43			return false;
44		}
45
46		$id3v1tagsize     = 128;
47		$apetagheadersize = 32;
48		$lyrics3tagsize   = 10;
49
50		if ($this->overrideendoffset == 0) {
51
52			$this->fseek(0 - $id3v1tagsize - $apetagheadersize - $lyrics3tagsize, SEEK_END);
53			$APEfooterID3v1 = $this->fread($id3v1tagsize + $apetagheadersize + $lyrics3tagsize);
54
55			//if (preg_match('/APETAGEX.{24}TAG.{125}$/i', $APEfooterID3v1)) {
56			if (substr($APEfooterID3v1, strlen($APEfooterID3v1) - $id3v1tagsize - $apetagheadersize, 8) == 'APETAGEX') {
57
58				// APE tag found before ID3v1
59				$info['ape']['tag_offset_end'] = $info['filesize'] - $id3v1tagsize;
60
61			//} elseif (preg_match('/APETAGEX.{24}$/i', $APEfooterID3v1)) {
62			} elseif (substr($APEfooterID3v1, strlen($APEfooterID3v1) - $apetagheadersize, 8) == 'APETAGEX') {
63
64				// APE tag found, no ID3v1
65				$info['ape']['tag_offset_end'] = $info['filesize'];
66
67			}
68
69		} else {
70
71			$this->fseek($this->overrideendoffset - $apetagheadersize);
72			if ($this->fread(8) == 'APETAGEX') {
73				$info['ape']['tag_offset_end'] = $this->overrideendoffset;
74			}
75
76		}
77		if (!isset($info['ape']['tag_offset_end'])) {
78
79			// APE tag not found
80			unset($info['ape']);
81			return false;
82
83		}
84
85		// shortcut
86		$thisfile_ape = &$info['ape'];
87
88		$this->fseek($thisfile_ape['tag_offset_end'] - $apetagheadersize);
89		$APEfooterData = $this->fread(32);
90		if (!($thisfile_ape['footer'] = $this->parseAPEheaderFooter($APEfooterData))) {
91			$this->error('Error parsing APE footer at offset '.$thisfile_ape['tag_offset_end']);
92			return false;
93		}
94
95		if (isset($thisfile_ape['footer']['flags']['header']) && $thisfile_ape['footer']['flags']['header']) {
96			$this->fseek($thisfile_ape['tag_offset_end'] - $thisfile_ape['footer']['raw']['tagsize'] - $apetagheadersize);
97			$thisfile_ape['tag_offset_start'] = $this->ftell();
98			$APEtagData = $this->fread($thisfile_ape['footer']['raw']['tagsize'] + $apetagheadersize);
99		} else {
100			$thisfile_ape['tag_offset_start'] = $thisfile_ape['tag_offset_end'] - $thisfile_ape['footer']['raw']['tagsize'];
101			$this->fseek($thisfile_ape['tag_offset_start']);
102			$APEtagData = $this->fread($thisfile_ape['footer']['raw']['tagsize']);
103		}
104		$info['avdataend'] = $thisfile_ape['tag_offset_start'];
105
106		if (isset($info['id3v1']['tag_offset_start']) && ($info['id3v1']['tag_offset_start'] < $thisfile_ape['tag_offset_end'])) {
107			$this->warning('ID3v1 tag information ignored since it appears to be a false synch in APEtag data');
108			unset($info['id3v1']);
109			foreach ($info['warning'] as $key => $value) {
110				if ($value == 'Some ID3v1 fields do not use NULL characters for padding') {
111					unset($info['warning'][$key]);
112					sort($info['warning']);
113					break;
114				}
115			}
116		}
117
118		$offset = 0;
119		if (isset($thisfile_ape['footer']['flags']['header']) && $thisfile_ape['footer']['flags']['header']) {
120			if ($thisfile_ape['header'] = $this->parseAPEheaderFooter(substr($APEtagData, 0, $apetagheadersize))) {
121				$offset += $apetagheadersize;
122			} else {
123				$this->error('Error parsing APE header at offset '.$thisfile_ape['tag_offset_start']);
124				return false;
125			}
126		}
127
128		// shortcut
129		$info['replay_gain'] = array();
130		$thisfile_replaygain = &$info['replay_gain'];
131
132		for ($i = 0; $i < $thisfile_ape['footer']['raw']['tag_items']; $i++) {
133			$value_size = getid3_lib::LittleEndian2Int(substr($APEtagData, $offset, 4));
134			$offset += 4;
135			$item_flags = getid3_lib::LittleEndian2Int(substr($APEtagData, $offset, 4));
136			$offset += 4;
137			if (strstr(substr($APEtagData, $offset), "\x00") === false) {
138				$this->error('Cannot find null-byte (0x00) separator between ItemKey #'.$i.' and value. ItemKey starts '.$offset.' bytes into the APE tag, at file offset '.($thisfile_ape['tag_offset_start'] + $offset));
139				return false;
140			}
141			$ItemKeyLength = strpos($APEtagData, "\x00", $offset) - $offset;
142			$item_key      = strtolower(substr($APEtagData, $offset, $ItemKeyLength));
143
144			// shortcut
145			$thisfile_ape['items'][$item_key] = array();
146			$thisfile_ape_items_current = &$thisfile_ape['items'][$item_key];
147
148			$thisfile_ape_items_current['offset'] = $thisfile_ape['tag_offset_start'] + $offset;
149
150			$offset += ($ItemKeyLength + 1); // skip 0x00 terminator
151			$thisfile_ape_items_current['data'] = substr($APEtagData, $offset, $value_size);
152			$offset += $value_size;
153
154			$thisfile_ape_items_current['flags'] = $this->parseAPEtagFlags($item_flags);
155			switch ($thisfile_ape_items_current['flags']['item_contents_raw']) {
156				case 0: // UTF-8
157				case 2: // Locator (URL, filename, etc), UTF-8 encoded
158					$thisfile_ape_items_current['data'] = explode("\x00", $thisfile_ape_items_current['data']);
159					break;
160
161				case 1:  // binary data
162				default:
163					break;
164			}
165
166			switch (strtolower($item_key)) {
167				// http://wiki.hydrogenaud.io/index.php?title=ReplayGain#MP3Gain
168				case 'replaygain_track_gain':
169					if (preg_match('#^([\\-\\+][0-9\\.,]{8})( dB)?$#', $thisfile_ape_items_current['data'][0], $matches)) {
170						$thisfile_replaygain['track']['adjustment'] = (float) str_replace(',', '.', $matches[1]); // float casting will see "0,95" as zero!
171						$thisfile_replaygain['track']['originator'] = 'unspecified';
172					} else {
173						$this->warning('MP3gainTrackGain value in APEtag appears invalid: "'.$thisfile_ape_items_current['data'][0].'"');
174					}
175					break;
176
177				case 'replaygain_track_peak':
178					if (preg_match('#^([0-9\\.,]{8})$#', $thisfile_ape_items_current['data'][0], $matches)) {
179						$thisfile_replaygain['track']['peak']       = (float) str_replace(',', '.', $matches[1]); // float casting will see "0,95" as zero!
180						$thisfile_replaygain['track']['originator'] = 'unspecified';
181						if ($thisfile_replaygain['track']['peak'] <= 0) {
182							$this->warning('ReplayGain Track peak from APEtag appears invalid: '.$thisfile_replaygain['track']['peak'].' (original value = "'.$thisfile_ape_items_current['data'][0].'")');
183						}
184					} else {
185						$this->warning('MP3gainTrackPeak value in APEtag appears invalid: "'.$thisfile_ape_items_current['data'][0].'"');
186					}
187					break;
188
189				case 'replaygain_album_gain':
190					if (preg_match('#^([\\-\\+][0-9\\.,]{8})( dB)?$#', $thisfile_ape_items_current['data'][0], $matches)) {
191						$thisfile_replaygain['album']['adjustment'] = (float) str_replace(',', '.', $matches[1]); // float casting will see "0,95" as zero!
192						$thisfile_replaygain['album']['originator'] = 'unspecified';
193					} else {
194						$this->warning('MP3gainAlbumGain value in APEtag appears invalid: "'.$thisfile_ape_items_current['data'][0].'"');
195					}
196					break;
197
198				case 'replaygain_album_peak':
199					if (preg_match('#^([0-9\\.,]{8})$#', $thisfile_ape_items_current['data'][0], $matches)) {
200						$thisfile_replaygain['album']['peak']       = (float) str_replace(',', '.', $matches[1]); // float casting will see "0,95" as zero!
201						$thisfile_replaygain['album']['originator'] = 'unspecified';
202						if ($thisfile_replaygain['album']['peak'] <= 0) {
203							$this->warning('ReplayGain Album peak from APEtag appears invalid: '.$thisfile_replaygain['album']['peak'].' (original value = "'.$thisfile_ape_items_current['data'][0].'")');
204						}
205					} else {
206						$this->warning('MP3gainAlbumPeak value in APEtag appears invalid: "'.$thisfile_ape_items_current['data'][0].'"');
207					}
208					break;
209
210				case 'mp3gain_undo':
211					if (preg_match('#^[\\-\\+][0-9]{3},[\\-\\+][0-9]{3},[NW]$#', $thisfile_ape_items_current['data'][0])) {
212						list($mp3gain_undo_left, $mp3gain_undo_right, $mp3gain_undo_wrap) = explode(',', $thisfile_ape_items_current['data'][0]);
213						$thisfile_replaygain['mp3gain']['undo_left']  = intval($mp3gain_undo_left);
214						$thisfile_replaygain['mp3gain']['undo_right'] = intval($mp3gain_undo_right);
215						$thisfile_replaygain['mp3gain']['undo_wrap']  = (($mp3gain_undo_wrap == 'Y') ? true : false);
216					} else {
217						$this->warning('MP3gainUndo value in APEtag appears invalid: "'.$thisfile_ape_items_current['data'][0].'"');
218					}
219					break;
220
221				case 'mp3gain_minmax':
222					if (preg_match('#^[0-9]{3},[0-9]{3}$#', $thisfile_ape_items_current['data'][0])) {
223						list($mp3gain_globalgain_min, $mp3gain_globalgain_max) = explode(',', $thisfile_ape_items_current['data'][0]);
224						$thisfile_replaygain['mp3gain']['globalgain_track_min'] = intval($mp3gain_globalgain_min);
225						$thisfile_replaygain['mp3gain']['globalgain_track_max'] = intval($mp3gain_globalgain_max);
226					} else {
227						$this->warning('MP3gainMinMax value in APEtag appears invalid: "'.$thisfile_ape_items_current['data'][0].'"');
228					}
229					break;
230
231				case 'mp3gain_album_minmax':
232					if (preg_match('#^[0-9]{3},[0-9]{3}$#', $thisfile_ape_items_current['data'][0])) {
233						list($mp3gain_globalgain_album_min, $mp3gain_globalgain_album_max) = explode(',', $thisfile_ape_items_current['data'][0]);
234						$thisfile_replaygain['mp3gain']['globalgain_album_min'] = intval($mp3gain_globalgain_album_min);
235						$thisfile_replaygain['mp3gain']['globalgain_album_max'] = intval($mp3gain_globalgain_album_max);
236					} else {
237						$this->warning('MP3gainAlbumMinMax value in APEtag appears invalid: "'.$thisfile_ape_items_current['data'][0].'"');
238					}
239					break;
240
241				case 'tracknumber':
242					if (is_array($thisfile_ape_items_current['data'])) {
243						foreach ($thisfile_ape_items_current['data'] as $comment) {
244							$thisfile_ape['comments']['track_number'][] = $comment;
245						}
246					}
247					break;
248
249				case 'cover art (artist)':
250				case 'cover art (back)':
251				case 'cover art (band logo)':
252				case 'cover art (band)':
253				case 'cover art (colored fish)':
254				case 'cover art (composer)':
255				case 'cover art (conductor)':
256				case 'cover art (front)':
257				case 'cover art (icon)':
258				case 'cover art (illustration)':
259				case 'cover art (lead)':
260				case 'cover art (leaflet)':
261				case 'cover art (lyricist)':
262				case 'cover art (media)':
263				case 'cover art (movie scene)':
264				case 'cover art (other icon)':
265				case 'cover art (other)':
266				case 'cover art (performance)':
267				case 'cover art (publisher logo)':
268				case 'cover art (recording)':
269				case 'cover art (studio)':
270					// list of possible cover arts from http://taglib-sharp.sourcearchive.com/documentation/2.0.3.0-2/Ape_2Tag_8cs-source.html
271					if (is_array($thisfile_ape_items_current['data'])) {
272						$this->warning('APEtag "'.$item_key.'" should be flagged as Binary data, but was incorrectly flagged as UTF-8');
273						$thisfile_ape_items_current['data'] = implode("\x00", $thisfile_ape_items_current['data']);
274					}
275					list($thisfile_ape_items_current['filename'], $thisfile_ape_items_current['data']) = explode("\x00", $thisfile_ape_items_current['data'], 2);
276					$thisfile_ape_items_current['data_offset'] = $thisfile_ape_items_current['offset'] + strlen($thisfile_ape_items_current['filename']."\x00");
277					$thisfile_ape_items_current['data_length'] = strlen($thisfile_ape_items_current['data']);
278
279					do {
280						$thisfile_ape_items_current['image_mime'] = '';
281						$imageinfo = array();
282						$imagechunkcheck = getid3_lib::GetDataImageSize($thisfile_ape_items_current['data'], $imageinfo);
283						if (($imagechunkcheck === false) || !isset($imagechunkcheck[2])) {
284							$this->warning('APEtag "'.$item_key.'" contains invalid image data');
285							break;
286						}
287						$thisfile_ape_items_current['image_mime'] = image_type_to_mime_type($imagechunkcheck[2]);
288
289						if ($this->inline_attachments === false) {
290							// skip entirely
291							unset($thisfile_ape_items_current['data']);
292							break;
293						}
294						if ($this->inline_attachments === true) {
295							// great
296						} elseif (is_int($this->inline_attachments)) {
297							if ($this->inline_attachments < $thisfile_ape_items_current['data_length']) {
298								// too big, skip
299								$this->warning('attachment at '.$thisfile_ape_items_current['offset'].' is too large to process inline ('.number_format($thisfile_ape_items_current['data_length']).' bytes)');
300								unset($thisfile_ape_items_current['data']);
301								break;
302							}
303						} elseif (is_string($this->inline_attachments)) {
304							$this->inline_attachments = rtrim(str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $this->inline_attachments), DIRECTORY_SEPARATOR);
305							if (!is_dir($this->inline_attachments) || !getID3::is_writable($this->inline_attachments)) {
306								// cannot write, skip
307								$this->warning('attachment at '.$thisfile_ape_items_current['offset'].' cannot be saved to "'.$this->inline_attachments.'" (not writable)');
308								unset($thisfile_ape_items_current['data']);
309								break;
310							}
311						}
312						// if we get this far, must be OK
313						if (is_string($this->inline_attachments)) {
314							$destination_filename = $this->inline_attachments.DIRECTORY_SEPARATOR.md5($info['filenamepath']).'_'.$thisfile_ape_items_current['data_offset'];
315							if (!file_exists($destination_filename) || getID3::is_writable($destination_filename)) {
316								file_put_contents($destination_filename, $thisfile_ape_items_current['data']);
317							} else {
318								$this->warning('attachment at '.$thisfile_ape_items_current['offset'].' cannot be saved to "'.$destination_filename.'" (not writable)');
319							}
320							$thisfile_ape_items_current['data_filename'] = $destination_filename;
321							unset($thisfile_ape_items_current['data']);
322						} else {
323							if (!isset($info['ape']['comments']['picture'])) {
324								$info['ape']['comments']['picture'] = array();
325							}
326							$comments_picture_data = array();
327							foreach (array('data', 'image_mime', 'image_width', 'image_height', 'imagetype', 'picturetype', 'description', 'datalength') as $picture_key) {
328								if (isset($thisfile_ape_items_current[$picture_key])) {
329									$comments_picture_data[$picture_key] = $thisfile_ape_items_current[$picture_key];
330								}
331							}
332							$info['ape']['comments']['picture'][] = $comments_picture_data;
333							unset($comments_picture_data);
334						}
335					} while (false);
336					break;
337
338				default:
339					if (is_array($thisfile_ape_items_current['data'])) {
340						foreach ($thisfile_ape_items_current['data'] as $comment) {
341							$thisfile_ape['comments'][strtolower($item_key)][] = $comment;
342						}
343					}
344					break;
345			}
346
347		}
348		if (empty($thisfile_replaygain)) {
349			unset($info['replay_gain']);
350		}
351		return true;
352	}
353
354	/**
355	 * @param string $APEheaderFooterData
356	 *
357	 * @return array|false
358	 */
359	public function parseAPEheaderFooter($APEheaderFooterData) {
360		// http://www.uni-jena.de/~pfk/mpp/sv8/apeheader.html
361
362		// shortcut
363		$headerfooterinfo['raw'] = array();
364		$headerfooterinfo_raw = &$headerfooterinfo['raw'];
365
366		$headerfooterinfo_raw['footer_tag']   =                  substr($APEheaderFooterData,  0, 8);
367		if ($headerfooterinfo_raw['footer_tag'] != 'APETAGEX') {
368			return false;
369		}
370		$headerfooterinfo_raw['version']      = getid3_lib::LittleEndian2Int(substr($APEheaderFooterData,  8, 4));
371		$headerfooterinfo_raw['tagsize']      = getid3_lib::LittleEndian2Int(substr($APEheaderFooterData, 12, 4));
372		$headerfooterinfo_raw['tag_items']    = getid3_lib::LittleEndian2Int(substr($APEheaderFooterData, 16, 4));
373		$headerfooterinfo_raw['global_flags'] = getid3_lib::LittleEndian2Int(substr($APEheaderFooterData, 20, 4));
374		$headerfooterinfo_raw['reserved']     =                              substr($APEheaderFooterData, 24, 8);
375
376		$headerfooterinfo['tag_version']         = $headerfooterinfo_raw['version'] / 1000;
377		if ($headerfooterinfo['tag_version'] >= 2) {
378			$headerfooterinfo['flags'] = $this->parseAPEtagFlags($headerfooterinfo_raw['global_flags']);
379		}
380		return $headerfooterinfo;
381	}
382
383	/**
384	 * @param int $rawflagint
385	 *
386	 * @return array
387	 */
388	public function parseAPEtagFlags($rawflagint) {
389		// "Note: APE Tags 1.0 do not use any of the APE Tag flags.
390		// All are set to zero on creation and ignored on reading."
391		// http://wiki.hydrogenaud.io/index.php?title=Ape_Tags_Flags
392		$flags['header']            = (bool) ($rawflagint & 0x80000000);
393		$flags['footer']            = (bool) ($rawflagint & 0x40000000);
394		$flags['this_is_header']    = (bool) ($rawflagint & 0x20000000);
395		$flags['item_contents_raw'] =        ($rawflagint & 0x00000006) >> 1;
396		$flags['read_only']         = (bool) ($rawflagint & 0x00000001);
397
398		$flags['item_contents']     = $this->APEcontentTypeFlagLookup($flags['item_contents_raw']);
399
400		return $flags;
401	}
402
403	/**
404	 * @param int $contenttypeid
405	 *
406	 * @return string
407	 */
408	public function APEcontentTypeFlagLookup($contenttypeid) {
409		static $APEcontentTypeFlagLookup = array(
410			0 => 'utf-8',
411			1 => 'binary',
412			2 => 'external',
413			3 => 'reserved'
414		);
415		return (isset($APEcontentTypeFlagLookup[$contenttypeid]) ? $APEcontentTypeFlagLookup[$contenttypeid] : 'invalid');
416	}
417
418	/**
419	 * @param string $itemkey
420	 *
421	 * @return bool
422	 */
423	public function APEtagItemIsUTF8Lookup($itemkey) {
424		static $APEtagItemIsUTF8Lookup = array(
425			'title',
426			'subtitle',
427			'artist',
428			'album',
429			'debut album',
430			'publisher',
431			'conductor',
432			'track',
433			'composer',
434			'comment',
435			'copyright',
436			'publicationright',
437			'file',
438			'year',
439			'record date',
440			'record location',
441			'genre',
442			'media',
443			'related',
444			'isrc',
445			'abstract',
446			'language',
447			'bibliography'
448		);
449		return in_array(strtolower($itemkey), $APEtagItemIsUTF8Lookup);
450	}
451
452}
453