1<?php 2 3declare(strict_types=1); 4/** 5 * SimplePie 6 * 7 * A PHP-Based RSS and Atom Feed Framework. 8 * Takes the hard work out of managing a complete RSS/Atom solution. 9 * 10 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without modification, are 14 * permitted provided that the following conditions are met: 15 * 16 * * Redistributions of source code must retain the above copyright notice, this list of 17 * conditions and the following disclaimer. 18 * 19 * * Redistributions in binary form must reproduce the above copyright notice, this list 20 * of conditions and the following disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * * Neither the name of the SimplePie Team nor the names of its contributors may be used 24 * to endorse or promote products derived from this software without specific prior 25 * written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 28 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 29 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS 30 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 34 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 * 37 * @package SimplePie 38 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue 39 * @author Ryan Parman 40 * @author Sam Sneddon 41 * @author Ryan McCue 42 * @link http://simplepie.org/ SimplePie 43 * @license http://www.opensource.org/licenses/bsd-license.php BSD License 44 */ 45 46namespace SimplePie\Content\Type; 47 48/** 49 * Content-type sniffing 50 * 51 * Based on the rules in http://tools.ietf.org/html/draft-abarth-mime-sniff-06 52 * 53 * This is used since we can't always trust Content-Type headers, and is based 54 * upon the HTML5 parsing rules. 55 * 56 * 57 * This class can be overloaded with {@see \SimplePie\SimplePie::set_content_type_sniffer_class()} 58 * 59 * @package SimplePie 60 * @subpackage HTTP 61 */ 62class Sniffer 63{ 64 /** 65 * File object 66 * 67 * @var \SimplePie\File 68 */ 69 public $file; 70 71 /** 72 * Create an instance of the class with the input file 73 * 74 * @param Sniffer $file Input file 75 */ 76 public function __construct($file) 77 { 78 $this->file = $file; 79 } 80 81 /** 82 * Get the Content-Type of the specified file 83 * 84 * @return string Actual Content-Type 85 */ 86 public function get_type() 87 { 88 if (isset($this->file->headers['content-type'])) { 89 if (!isset($this->file->headers['content-encoding']) 90 && ($this->file->headers['content-type'] === 'text/plain' 91 || $this->file->headers['content-type'] === 'text/plain; charset=ISO-8859-1' 92 || $this->file->headers['content-type'] === 'text/plain; charset=iso-8859-1' 93 || $this->file->headers['content-type'] === 'text/plain; charset=UTF-8')) { 94 return $this->text_or_binary(); 95 } 96 97 if (($pos = strpos($this->file->headers['content-type'], ';')) !== false) { 98 $official = substr($this->file->headers['content-type'], 0, $pos); 99 } else { 100 $official = $this->file->headers['content-type']; 101 } 102 $official = trim(strtolower($official)); 103 104 if ($official === 'unknown/unknown' 105 || $official === 'application/unknown') { 106 return $this->unknown(); 107 } elseif (substr($official, -4) === '+xml' 108 || $official === 'text/xml' 109 || $official === 'application/xml') { 110 return $official; 111 } elseif (substr($official, 0, 6) === 'image/') { 112 if ($return = $this->image()) { 113 return $return; 114 } 115 116 return $official; 117 } elseif ($official === 'text/html') { 118 return $this->feed_or_html(); 119 } 120 121 return $official; 122 } 123 124 return $this->unknown(); 125 } 126 127 /** 128 * Sniff text or binary 129 * 130 * @return string Actual Content-Type 131 */ 132 public function text_or_binary() 133 { 134 if (substr($this->file->body, 0, 2) === "\xFE\xFF" 135 || substr($this->file->body, 0, 2) === "\xFF\xFE" 136 || substr($this->file->body, 0, 4) === "\x00\x00\xFE\xFF" 137 || substr($this->file->body, 0, 3) === "\xEF\xBB\xBF") { 138 return 'text/plain'; 139 } elseif (preg_match('/[\x00-\x08\x0E-\x1A\x1C-\x1F]/', $this->file->body)) { 140 return 'application/octet-stream'; 141 } 142 143 return 'text/plain'; 144 } 145 146 /** 147 * Sniff unknown 148 * 149 * @return string Actual Content-Type 150 */ 151 public function unknown() 152 { 153 $ws = strspn($this->file->body, "\x09\x0A\x0B\x0C\x0D\x20"); 154 if (strtolower(substr($this->file->body, $ws, 14)) === '<!doctype html' 155 || strtolower(substr($this->file->body, $ws, 5)) === '<html' 156 || strtolower(substr($this->file->body, $ws, 7)) === '<script') { 157 return 'text/html'; 158 } elseif (substr($this->file->body, 0, 5) === '%PDF-') { 159 return 'application/pdf'; 160 } elseif (substr($this->file->body, 0, 11) === '%!PS-Adobe-') { 161 return 'application/postscript'; 162 } elseif (substr($this->file->body, 0, 6) === 'GIF87a' 163 || substr($this->file->body, 0, 6) === 'GIF89a') { 164 return 'image/gif'; 165 } elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A") { 166 return 'image/png'; 167 } elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF") { 168 return 'image/jpeg'; 169 } elseif (substr($this->file->body, 0, 2) === "\x42\x4D") { 170 return 'image/bmp'; 171 } elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00") { 172 return 'image/vnd.microsoft.icon'; 173 } 174 175 return $this->text_or_binary(); 176 } 177 178 /** 179 * Sniff images 180 * 181 * @return string Actual Content-Type 182 */ 183 public function image() 184 { 185 if (substr($this->file->body, 0, 6) === 'GIF87a' 186 || substr($this->file->body, 0, 6) === 'GIF89a') { 187 return 'image/gif'; 188 } elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A") { 189 return 'image/png'; 190 } elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF") { 191 return 'image/jpeg'; 192 } elseif (substr($this->file->body, 0, 2) === "\x42\x4D") { 193 return 'image/bmp'; 194 } elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00") { 195 return 'image/vnd.microsoft.icon'; 196 } 197 198 return false; 199 } 200 201 /** 202 * Sniff HTML 203 * 204 * @return string Actual Content-Type 205 */ 206 public function feed_or_html() 207 { 208 $len = strlen($this->file->body); 209 $pos = strspn($this->file->body, "\x09\x0A\x0D\x20\xEF\xBB\xBF"); 210 211 while ($pos < $len) { 212 switch ($this->file->body[$pos]) { 213 case "\x09": 214 case "\x0A": 215 case "\x0D": 216 case "\x20": 217 $pos += strspn($this->file->body, "\x09\x0A\x0D\x20", $pos); 218 continue 2; 219 220 case '<': 221 $pos++; 222 break; 223 224 default: 225 return 'text/html'; 226 } 227 228 if (substr($this->file->body, $pos, 3) === '!--') { 229 $pos += 3; 230 if ($pos < $len && ($pos = strpos($this->file->body, '-->', $pos)) !== false) { 231 $pos += 3; 232 } else { 233 return 'text/html'; 234 } 235 } elseif (substr($this->file->body, $pos, 1) === '!') { 236 if ($pos < $len && ($pos = strpos($this->file->body, '>', $pos)) !== false) { 237 $pos++; 238 } else { 239 return 'text/html'; 240 } 241 } elseif (substr($this->file->body, $pos, 1) === '?') { 242 if ($pos < $len && ($pos = strpos($this->file->body, '?>', $pos)) !== false) { 243 $pos += 2; 244 } else { 245 return 'text/html'; 246 } 247 } elseif (substr($this->file->body, $pos, 3) === 'rss' 248 || substr($this->file->body, $pos, 7) === 'rdf:RDF') { 249 return 'application/rss+xml'; 250 } elseif (substr($this->file->body, $pos, 4) === 'feed') { 251 return 'application/atom+xml'; 252 } else { 253 return 'text/html'; 254 } 255 } 256 257 return 'text/html'; 258 } 259} 260 261class_alias('SimplePie\Content\Type\Sniffer', 'SimplePie_Content_Type_Sniffer'); 262