1<?php 2 3namespace splitbrain\JSStrip; 4 5/** 6 * Strip comments and whitespaces from given JavaScript Code 7 * 8 * This is a port of Nick Galbreath's python tool jsstrip.py which is 9 * released under BSD license. See link for original code. 10 * 11 * @author Nick Galbreath <nickg@modp.com> 12 * @author Andreas Gohr <andi@splitbrain.org> 13 * @link http://code.google.com/p/jsstrip/ 14 */ 15class JSStrip 16{ 17 18 const REGEX_STARTERS = [ 19 '(', '=', '<', '>', '?', '[', '{', ',', ';', ':', '!', '&', '|', '+', '-', '%', '~', '^', 20 'return', 'yield', 'else', 'throw', 'await' 21 ]; 22 const WHITESPACE_CHARS = [" ", "\t", "\n", "\r", "\0", "\x0B"]; 23 24 /** items that don't need spaces next to them */ 25 const CHARS = "^&|!+\-*\/%=\?:;,{}()<>% \t\n\r'\"`[]~^"; 26 27 /** 28 * items which need a space if the sign before and after whitespace is equal. 29 * E.g. '+ ++' may not be compressed to '+++' --> syntax error. 30 */ 31 const OPS = "+-/"; 32 33 protected $source; 34 protected $idx = 0; 35 protected $line = 0; 36 37 /** 38 * Compress the given code 39 * 40 * @param string $source The JavaScript code to compress 41 * @return string 42 * @throws Exception if parsing fails 43 */ 44 function compress($source) 45 { 46 $source = ltrim($source); // strip all initial whitespace 47 $source .= "\n"; 48 $idx = 0; // char index for input string 49 50 // track these as member variables 51 $this->source = $source; 52 $this->line = 1; 53 $this->idx = &$idx; 54 55 $j = 0; // char forward index for input string 56 $slen = strlen($source); // size of input string 57 $lch = ''; // last char added 58 $result = ''; // we store the final result here 59 60 61 while ($idx < $slen) { 62 // skip all "boring" characters. This is either 63 // reserved word (e.g. "for", "else", "if") or a 64 // variable/object/method (e.g. "foo.color") 65 while ($idx < $slen && (strpos(self::CHARS, $source[$idx]) === false)) { 66 $result .= $source[$idx]; 67 $idx = $idx + 1; 68 } 69 70 $ch = $source[$idx]; 71 // multiline comments (keeping IE conditionals) 72 if ($ch == '/' && $source[$idx + 1] == '*' && $source[$idx + 2] != '@') { 73 $endC = strpos($source, '*/', $idx + 2); 74 if ($endC === false) $this->fatal('Found invalid /*..*/ comment'); 75 76 // check if this is a NOCOMPRESS comment 77 if (substr($source, $idx, $endC + 2 - $idx) == '/* BEGIN NOCOMPRESS */') { 78 // take nested NOCOMPRESS comments into account 79 $depth = 0; 80 $nextNC = $endC; 81 do { 82 $beginNC = strpos($source, '/* BEGIN NOCOMPRESS */', $nextNC + 2); 83 $endNC = strpos($source, '/* END NOCOMPRESS */', $nextNC + 2); 84 85 if ($endNC === false) $this->fatal('Found invalid NOCOMPRESS comment'); 86 if ($beginNC !== false && $beginNC < $endNC) { 87 $depth++; 88 $nextNC = $beginNC; 89 } else { 90 $depth--; 91 $nextNC = $endNC; 92 } 93 } while ($depth >= 0); 94 95 // verbatim copy contents, trimming but putting it on its own line 96 $result .= "\n" . trim(substr($source, $idx + 22, $endNC - ($idx + 22))) . "\n"; // BEGIN comment = 22 chars 97 $idx = $endNC + 20; // END comment = 20 chars 98 } else { 99 $idx = $endC + 2; 100 } 101 continue; 102 } 103 104 // singleline 105 if ($ch == '/' && $source[$idx + 1] == '/') { 106 $endC = strpos($source, "\n", $idx + 2); 107 if ($endC === false) $this->fatal('Invalid comment'); // not sure this can happen 108 $idx = $endC; 109 continue; 110 } 111 112 // tricky. might be an RE 113 if ($ch == '/') { 114 // rewind, skip white space 115 $j = 1; 116 while (in_array($source[$idx - $j], self::WHITESPACE_CHARS)) { 117 $j = $j + 1; 118 } 119 if (current(array_filter( 120 self::REGEX_STARTERS, 121 function ($e) use ($source, $idx, $j) { 122 $len = strlen($e); 123 $idx = $idx - $j + 1 - $len; 124 return substr($source, $idx, $len) === $e; 125 } 126 ))) { 127 // yes, this is an re 128 // now move forward and find the end of it 129 $j = 1; 130 // we set this flag when inside a character class definition, enclosed by brackets [] where '/' does not terminate the re 131 $ccd = false; 132 while ($ccd || $source[$idx + $j] != '/') { 133 if ($source[$idx + $j] == '\\') $j = $j + 2; 134 else { 135 $j++; 136 // check if we entered/exited a character class definition and set flag accordingly 137 if ($source[$idx + $j - 1] == '[') $ccd = true; 138 else if ($source[$idx + $j - 1] == ']') $ccd = false; 139 } 140 } 141 $result .= substr($source, $idx, $j + 1); 142 $idx = $idx + $j + 1; 143 continue; 144 } 145 } 146 147 // double quote strings 148 if ($ch == '"') { 149 $j = 1; 150 while (($idx + $j < $slen) && $source[$idx + $j] != '"') { 151 if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == '"' || $source[$idx + $j + 1] == '\\')) { 152 $j += 2; 153 } else { 154 $j += 1; 155 } 156 } 157 $string = substr($source, $idx, $j + 1); 158 // remove multiline markers: 159 $string = str_replace("\\\n", '', $string); 160 $result .= $string; 161 $idx = $idx + $j + 1; 162 continue; 163 } 164 165 // single quote strings 166 if ($ch == "'") { 167 $j = 1; 168 while (($idx + $j < $slen) && $source[$idx + $j] != "'") { 169 if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "'" || $source[$idx + $j + 1] == '\\')) { 170 $j += 2; 171 } else { 172 $j += 1; 173 } 174 } 175 $string = substr($source, $idx, $j + 1); 176 // remove multiline markers: 177 $string = str_replace("\\\n", '', $string); 178 $result .= $string; 179 $idx = $idx + $j + 1; 180 continue; 181 } 182 183 // backtick strings 184 if ($ch == "`") { 185 $j = 1; 186 while (($idx + $j < $slen) && $source[$idx + $j] != "`") { 187 if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "`" || $source[$idx + $j + 1] == '\\')) { 188 $j += 2; 189 } else { 190 $j += 1; 191 } 192 } 193 $string = substr($source, $idx, $j + 1); 194 // remove multiline markers: 195 $string = str_replace("\\\n", '', $string); 196 $result .= $string; 197 $idx = $idx + $j + 1; 198 continue; 199 } 200 201 // whitespaces 202 if ($ch == ' ' || $ch == "\r" || $ch == "\n" || $ch == "\t") { 203 $lch = substr($result, -1); 204 if ($ch == "\n") $this->line++; 205 206 // Only consider deleting whitespace if the signs before and after 207 // are not equal and are not an operator which may not follow itself. 208 if ($idx + 1 < $slen && ((!$lch || $source[$idx + 1] == ' ') 209 || $lch != $source[$idx + 1] 210 || strpos(self::OPS, $source[$idx + 1]) === false)) { 211 // leading spaces 212 if ($idx + 1 < $slen && (strpos(self::CHARS, $source[$idx + 1]) !== false)) { 213 $idx = $idx + 1; 214 continue; 215 } 216 // trailing spaces 217 // if this ch is space AND the last char processed 218 // is special, then skip the space 219 if ($lch && (strpos(self::CHARS, $lch) !== false)) { 220 $idx = $idx + 1; 221 continue; 222 } 223 } 224 225 // else after all of this convert the "whitespace" to 226 // a single space. It will get appended below 227 $ch = ' '; 228 } 229 230 // other chars 231 $result .= $ch; 232 $idx = $idx + 1; 233 } 234 235 return trim($result); 236 } 237 238 /** 239 * Helper to throw a fatal error 240 * 241 * Tries to give some context to locate the error 242 * 243 * @param string $msg 244 * @throws Exception 245 */ 246 protected function fatal($msg) 247 { 248 $before = substr($this->source, max(0, $this->idx - 15), $this->idx); 249 $after = substr($this->source, $this->idx, 15); 250 251 $msg = "$msg on line {$this->line}: '{$before}◎{$after}'"; 252 throw new Exception($msg); 253 } 254} 255