1<?php
2
3namespace splitbrain\JSStrip;
4
5/**
6 * Strip comments and whitespaces from given JavaScript Code
7 *
8 * This is a port of Nick Galbreath's python tool jsstrip.py which is
9 * released under BSD license. See link for original code.
10 *
11 * @author Nick Galbreath <nickg@modp.com>
12 * @author Andreas Gohr <andi@splitbrain.org>
13 * @link   http://code.google.com/p/jsstrip/
14 */
15class JSStrip
16{
17
18    const REGEX_STARTERS = [
19        '(', '=', '<', '>', '?', '[', '{', ',', ';', ':', '!', '&', '|', '+', '-', '%', '~', '^',
20        'return', 'yield', 'else', 'throw', 'await'
21    ];
22    const WHITESPACE_CHARS = [" ", "\t", "\n", "\r", "\0", "\x0B"];
23
24    /** items that don't need spaces next to them */
25    const CHARS = "^&|!+\-*\/%=\?:;,{}()<>% \t\n\r'\"`[]~^";
26
27    /**
28     * items which need a space if the sign before and after whitespace is equal.
29     * E.g. '+ ++' may not be compressed to '+++' --> syntax error.
30     */
31    const OPS = "+-/";
32
33    protected $source;
34    protected $idx = 0;
35    protected $line = 0;
36
37    /**
38     * Compress the given code
39     *
40     * @param string $source The JavaScript code to compress
41     * @return string
42     * @throws Exception if parsing fails
43     */
44    function compress($source)
45    {
46        $source = ltrim($source);     // strip all initial whitespace
47        $source .= "\n";
48        $idx = 0;             // char index for input string
49
50        // track these as member variables
51        $this->source = $source;
52        $this->line = 1;
53        $this->idx = &$idx;
54
55        $j = 0;             // char forward index for input string
56        $slen = strlen($source); // size of input string
57        $lch = '';         // last char added
58        $result = '';       // we store the final result here
59
60
61        while ($idx < $slen) {
62            // skip all "boring" characters.  This is either
63            // reserved word (e.g. "for", "else", "if") or a
64            // variable/object/method (e.g. "foo.color")
65            while ($idx < $slen && (strpos(self::CHARS, $source[$idx]) === false)) {
66                $result .= $source[$idx];
67                $idx = $idx + 1;
68            }
69
70            $ch = $source[$idx];
71            // multiline comments (keeping IE conditionals)
72            if ($ch == '/' && $source[$idx + 1] == '*' && $source[$idx + 2] != '@') {
73                $endC = strpos($source, '*/', $idx + 2);
74                if ($endC === false) $this->fatal('Found invalid /*..*/ comment');
75
76                // check if this is a NOCOMPRESS comment
77                if (substr($source, $idx, $endC + 2 - $idx) == '/* BEGIN NOCOMPRESS */') {
78                    // take nested NOCOMPRESS comments into account
79                    $depth = 0;
80                    $nextNC = $endC;
81                    do {
82                        $beginNC = strpos($source, '/* BEGIN NOCOMPRESS */', $nextNC + 2);
83                        $endNC = strpos($source, '/* END NOCOMPRESS */', $nextNC + 2);
84
85                        if ($endNC === false) $this->fatal('Found invalid NOCOMPRESS comment');
86                        if ($beginNC !== false && $beginNC < $endNC) {
87                            $depth++;
88                            $nextNC = $beginNC;
89                        } else {
90                            $depth--;
91                            $nextNC = $endNC;
92                        }
93                    } while ($depth >= 0);
94
95                    // verbatim copy contents, trimming but putting it on its own line
96                    $result .= "\n" . trim(substr($source, $idx + 22, $endNC - ($idx + 22))) . "\n"; // BEGIN comment = 22 chars
97                    $idx = $endNC + 20; // END comment = 20 chars
98                } else {
99                    $idx = $endC + 2;
100                }
101                continue;
102            }
103
104            // singleline
105            if ($ch == '/' && $source[$idx + 1] == '/') {
106                $endC = strpos($source, "\n", $idx + 2);
107                if ($endC === false) $this->fatal('Invalid comment'); // not sure this can happen
108                $idx = $endC;
109                continue;
110            }
111
112            // tricky.  might be an RE
113            if ($ch == '/') {
114                // rewind, skip white space
115                $j = 1;
116                while (in_array($source[$idx - $j], self::WHITESPACE_CHARS)) {
117                    $j = $j + 1;
118                }
119                if (current(array_filter(
120                    self::REGEX_STARTERS,
121                    function ($e) use ($source, $idx, $j) {
122                        $len = strlen($e);
123                        $idx = $idx - $j + 1 - $len;
124                        return substr($source, $idx, $len) === $e;
125                    }
126                ))) {
127                    // yes, this is an re
128                    // now move forward and find the end of it
129                    $j = 1;
130                    // we set this flag when inside a character class definition, enclosed by brackets [] where '/' does not terminate the re
131                    $ccd = false;
132                    while ($ccd || $source[$idx + $j] != '/') {
133                        if ($source[$idx + $j] == '\\') $j = $j + 2;
134                        else {
135                            $j++;
136                            // check if we entered/exited a character class definition and set flag accordingly
137                            if ($source[$idx + $j - 1] == '[') $ccd = true;
138                            else if ($source[$idx + $j - 1] == ']') $ccd = false;
139                        }
140                    }
141                    $result .= substr($source, $idx, $j + 1);
142                    $idx = $idx + $j + 1;
143                    continue;
144                }
145            }
146
147            // double quote strings
148            if ($ch == '"') {
149                $j = 1;
150                while (($idx + $j < $slen) && $source[$idx + $j] != '"') {
151                    if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == '"' || $source[$idx + $j + 1] == '\\')) {
152                        $j += 2;
153                    } else {
154                        $j += 1;
155                    }
156                }
157                $string = substr($source, $idx, $j + 1);
158                // remove multiline markers:
159                $string = str_replace("\\\n", '', $string);
160                $result .= $string;
161                $idx = $idx + $j + 1;
162                continue;
163            }
164
165            // single quote strings
166            if ($ch == "'") {
167                $j = 1;
168                while (($idx + $j < $slen) && $source[$idx + $j] != "'") {
169                    if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "'" || $source[$idx + $j + 1] == '\\')) {
170                        $j += 2;
171                    } else {
172                        $j += 1;
173                    }
174                }
175                $string = substr($source, $idx, $j + 1);
176                // remove multiline markers:
177                $string = str_replace("\\\n", '', $string);
178                $result .= $string;
179                $idx = $idx + $j + 1;
180                continue;
181            }
182
183            // backtick strings
184            if ($ch == "`") {
185                $j = 1;
186                while (($idx + $j < $slen) && $source[$idx + $j] != "`") {
187                    if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "`" || $source[$idx + $j + 1] == '\\')) {
188                        $j += 2;
189                    } else {
190                        $j += 1;
191                    }
192                }
193                $string = substr($source, $idx, $j + 1);
194                // remove multiline markers:
195                $string = str_replace("\\\n", '', $string);
196                $result .= $string;
197                $idx = $idx + $j + 1;
198                continue;
199            }
200
201            // whitespaces
202            if ($ch == ' ' || $ch == "\r" || $ch == "\n" || $ch == "\t") {
203                $lch = substr($result, -1);
204                if ($ch == "\n") $this->line++;
205
206                // Only consider deleting whitespace if the signs before and after
207                // are not equal and are not an operator which may not follow itself.
208                if ($idx + 1 < $slen && ((!$lch || $source[$idx + 1] == ' ')
209                        || $lch != $source[$idx + 1]
210                        || strpos(self::OPS, $source[$idx + 1]) === false)) {
211                    // leading spaces
212                    if ($idx + 1 < $slen && (strpos(self::CHARS, $source[$idx + 1]) !== false)) {
213                        $idx = $idx + 1;
214                        continue;
215                    }
216                    // trailing spaces
217                    //  if this ch is space AND the last char processed
218                    //  is special, then skip the space
219                    if ($lch && (strpos(self::CHARS, $lch) !== false)) {
220                        $idx = $idx + 1;
221                        continue;
222                    }
223                }
224
225                // else after all of this convert the "whitespace" to
226                // a single space.  It will get appended below
227                $ch = ' ';
228            }
229
230            // other chars
231            $result .= $ch;
232            $idx = $idx + 1;
233        }
234
235        return trim($result);
236    }
237
238    /**
239     * Helper to throw a fatal error
240     *
241     * Tries to give some context to locate the error
242     *
243     * @param string $msg
244     * @throws Exception
245     */
246    protected function fatal($msg)
247    {
248        $before = substr($this->source, max(0, $this->idx - 15), $this->idx);
249        $after = substr($this->source, $this->idx, 15);
250
251        $msg = "$msg on line {$this->line}: '{$before}{$after}'";
252        throw new Exception($msg);
253    }
254}
255