xref: /dokuwiki/vendor/splitbrain/php-jsstrip/src/JSStrip.php (revision af28745a55598899ff91b5048e6acff9cd2ed1d8)
1*af28745aSAndreas Gohr<?php
2*af28745aSAndreas Gohr
3*af28745aSAndreas Gohrnamespace splitbrain\JSStrip;
4*af28745aSAndreas Gohr
5*af28745aSAndreas Gohr/**
6*af28745aSAndreas Gohr * Strip comments and whitespaces from given JavaScript Code
7*af28745aSAndreas Gohr *
8*af28745aSAndreas Gohr * This is a port of Nick Galbreath's python tool jsstrip.py which is
9*af28745aSAndreas Gohr * released under BSD license. See link for original code.
10*af28745aSAndreas Gohr *
11*af28745aSAndreas Gohr * @author Nick Galbreath <nickg@modp.com>
12*af28745aSAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org>
13*af28745aSAndreas Gohr * @link   http://code.google.com/p/jsstrip/
14*af28745aSAndreas Gohr */
15*af28745aSAndreas Gohrclass JSStrip
16*af28745aSAndreas Gohr{
17*af28745aSAndreas Gohr
18*af28745aSAndreas Gohr    const REGEX_STARTERS = [
19*af28745aSAndreas Gohr        '(', '=', '<', '>', '?', '[', '{', ',', ';', ':', '!', '&', '|', '+', '-', '%', '~', '^',
20*af28745aSAndreas Gohr        'return', 'yield', 'else', 'throw', 'await'
21*af28745aSAndreas Gohr    ];
22*af28745aSAndreas Gohr    const WHITESPACE_CHARS = [" ", "\t", "\n", "\r", "\0", "\x0B"];
23*af28745aSAndreas Gohr
24*af28745aSAndreas Gohr    /** items that don't need spaces next to them */
25*af28745aSAndreas Gohr    const CHARS = "^&|!+\-*\/%=\?:;,{}()<>% \t\n\r'\"`[]~^";
26*af28745aSAndreas Gohr
27*af28745aSAndreas Gohr    /**
28*af28745aSAndreas Gohr     * items which need a space if the sign before and after whitespace is equal.
29*af28745aSAndreas Gohr     * E.g. '+ ++' may not be compressed to '+++' --> syntax error.
30*af28745aSAndreas Gohr     */
31*af28745aSAndreas Gohr    const OPS = "+-/";
32*af28745aSAndreas Gohr
33*af28745aSAndreas Gohr    protected $source;
34*af28745aSAndreas Gohr    protected $idx = 0;
35*af28745aSAndreas Gohr    protected $line = 0;
36*af28745aSAndreas Gohr
37*af28745aSAndreas Gohr    /**
38*af28745aSAndreas Gohr     * Compress the given code
39*af28745aSAndreas Gohr     *
40*af28745aSAndreas Gohr     * @param string $source The JavaScript code to compress
41*af28745aSAndreas Gohr     * @return string
42*af28745aSAndreas Gohr     * @throws Exception if parsing fails
43*af28745aSAndreas Gohr     */
44*af28745aSAndreas Gohr    function compress($source)
45*af28745aSAndreas Gohr    {
46*af28745aSAndreas Gohr        $source = ltrim($source);     // strip all initial whitespace
47*af28745aSAndreas Gohr        $source .= "\n";
48*af28745aSAndreas Gohr        $idx = 0;             // char index for input string
49*af28745aSAndreas Gohr
50*af28745aSAndreas Gohr        // track these as member variables
51*af28745aSAndreas Gohr        $this->source = $source;
52*af28745aSAndreas Gohr        $this->line = 1;
53*af28745aSAndreas Gohr        $this->idx = &$idx;
54*af28745aSAndreas Gohr
55*af28745aSAndreas Gohr        $j = 0;             // char forward index for input string
56*af28745aSAndreas Gohr        $slen = strlen($source); // size of input string
57*af28745aSAndreas Gohr        $lch = '';         // last char added
58*af28745aSAndreas Gohr        $result = '';       // we store the final result here
59*af28745aSAndreas Gohr
60*af28745aSAndreas Gohr
61*af28745aSAndreas Gohr        while ($idx < $slen) {
62*af28745aSAndreas Gohr            // skip all "boring" characters.  This is either
63*af28745aSAndreas Gohr            // reserved word (e.g. "for", "else", "if") or a
64*af28745aSAndreas Gohr            // variable/object/method (e.g. "foo.color")
65*af28745aSAndreas Gohr            while ($idx < $slen && (strpos(self::CHARS, $source[$idx]) === false)) {
66*af28745aSAndreas Gohr                $result .= $source[$idx];
67*af28745aSAndreas Gohr                $idx = $idx + 1;
68*af28745aSAndreas Gohr            }
69*af28745aSAndreas Gohr
70*af28745aSAndreas Gohr            $ch = $source[$idx];
71*af28745aSAndreas Gohr            // multiline comments (keeping IE conditionals)
72*af28745aSAndreas Gohr            if ($ch == '/' && $source[$idx + 1] == '*' && $source[$idx + 2] != '@') {
73*af28745aSAndreas Gohr                $endC = strpos($source, '*/', $idx + 2);
74*af28745aSAndreas Gohr                if ($endC === false) $this->fatal('Found invalid /*..*/ comment');
75*af28745aSAndreas Gohr
76*af28745aSAndreas Gohr                // check if this is a NOCOMPRESS comment
77*af28745aSAndreas Gohr                if (substr($source, $idx, $endC + 2 - $idx) == '/* BEGIN NOCOMPRESS */') {
78*af28745aSAndreas Gohr                    // take nested NOCOMPRESS comments into account
79*af28745aSAndreas Gohr                    $depth = 0;
80*af28745aSAndreas Gohr                    $nextNC = $endC;
81*af28745aSAndreas Gohr                    do {
82*af28745aSAndreas Gohr                        $beginNC = strpos($source, '/* BEGIN NOCOMPRESS */', $nextNC + 2);
83*af28745aSAndreas Gohr                        $endNC = strpos($source, '/* END NOCOMPRESS */', $nextNC + 2);
84*af28745aSAndreas Gohr
85*af28745aSAndreas Gohr                        if ($endNC === false) $this->fatal('Found invalid NOCOMPRESS comment');
86*af28745aSAndreas Gohr                        if ($beginNC !== false && $beginNC < $endNC) {
87*af28745aSAndreas Gohr                            $depth++;
88*af28745aSAndreas Gohr                            $nextNC = $beginNC;
89*af28745aSAndreas Gohr                        } else {
90*af28745aSAndreas Gohr                            $depth--;
91*af28745aSAndreas Gohr                            $nextNC = $endNC;
92*af28745aSAndreas Gohr                        }
93*af28745aSAndreas Gohr                    } while ($depth >= 0);
94*af28745aSAndreas Gohr
95*af28745aSAndreas Gohr                    // verbatim copy contents, trimming but putting it on its own line
96*af28745aSAndreas Gohr                    $result .= "\n" . trim(substr($source, $idx + 22, $endNC - ($idx + 22))) . "\n"; // BEGIN comment = 22 chars
97*af28745aSAndreas Gohr                    $idx = $endNC + 20; // END comment = 20 chars
98*af28745aSAndreas Gohr                } else {
99*af28745aSAndreas Gohr                    $idx = $endC + 2;
100*af28745aSAndreas Gohr                }
101*af28745aSAndreas Gohr                continue;
102*af28745aSAndreas Gohr            }
103*af28745aSAndreas Gohr
104*af28745aSAndreas Gohr            // singleline
105*af28745aSAndreas Gohr            if ($ch == '/' && $source[$idx + 1] == '/') {
106*af28745aSAndreas Gohr                $endC = strpos($source, "\n", $idx + 2);
107*af28745aSAndreas Gohr                if ($endC === false) $this->fatal('Invalid comment'); // not sure this can happen
108*af28745aSAndreas Gohr                $idx = $endC;
109*af28745aSAndreas Gohr                continue;
110*af28745aSAndreas Gohr            }
111*af28745aSAndreas Gohr
112*af28745aSAndreas Gohr            // tricky.  might be an RE
113*af28745aSAndreas Gohr            if ($ch == '/') {
114*af28745aSAndreas Gohr                // rewind, skip white space
115*af28745aSAndreas Gohr                $j = 1;
116*af28745aSAndreas Gohr                while (in_array($source[$idx - $j], self::WHITESPACE_CHARS)) {
117*af28745aSAndreas Gohr                    $j = $j + 1;
118*af28745aSAndreas Gohr                }
119*af28745aSAndreas Gohr                if (current(array_filter(
120*af28745aSAndreas Gohr                    self::REGEX_STARTERS,
121*af28745aSAndreas Gohr                    function ($e) use ($source, $idx, $j) {
122*af28745aSAndreas Gohr                        $len = strlen($e);
123*af28745aSAndreas Gohr                        $idx = $idx - $j + 1 - $len;
124*af28745aSAndreas Gohr                        return substr($source, $idx, $len) === $e;
125*af28745aSAndreas Gohr                    }
126*af28745aSAndreas Gohr                ))) {
127*af28745aSAndreas Gohr                    // yes, this is an re
128*af28745aSAndreas Gohr                    // now move forward and find the end of it
129*af28745aSAndreas Gohr                    $j = 1;
130*af28745aSAndreas Gohr                    // we set this flag when inside a character class definition, enclosed by brackets [] where '/' does not terminate the re
131*af28745aSAndreas Gohr                    $ccd = false;
132*af28745aSAndreas Gohr                    while ($ccd || $source[$idx + $j] != '/') {
133*af28745aSAndreas Gohr                        if ($source[$idx + $j] == '\\') $j = $j + 2;
134*af28745aSAndreas Gohr                        else {
135*af28745aSAndreas Gohr                            $j++;
136*af28745aSAndreas Gohr                            // check if we entered/exited a character class definition and set flag accordingly
137*af28745aSAndreas Gohr                            if ($source[$idx + $j - 1] == '[') $ccd = true;
138*af28745aSAndreas Gohr                            else if ($source[$idx + $j - 1] == ']') $ccd = false;
139*af28745aSAndreas Gohr                        }
140*af28745aSAndreas Gohr                    }
141*af28745aSAndreas Gohr                    $result .= substr($source, $idx, $j + 1);
142*af28745aSAndreas Gohr                    $idx = $idx + $j + 1;
143*af28745aSAndreas Gohr                    continue;
144*af28745aSAndreas Gohr                }
145*af28745aSAndreas Gohr            }
146*af28745aSAndreas Gohr
147*af28745aSAndreas Gohr            // double quote strings
148*af28745aSAndreas Gohr            if ($ch == '"') {
149*af28745aSAndreas Gohr                $j = 1;
150*af28745aSAndreas Gohr                while (($idx + $j < $slen) && $source[$idx + $j] != '"') {
151*af28745aSAndreas Gohr                    if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == '"' || $source[$idx + $j + 1] == '\\')) {
152*af28745aSAndreas Gohr                        $j += 2;
153*af28745aSAndreas Gohr                    } else {
154*af28745aSAndreas Gohr                        $j += 1;
155*af28745aSAndreas Gohr                    }
156*af28745aSAndreas Gohr                }
157*af28745aSAndreas Gohr                $string = substr($source, $idx, $j + 1);
158*af28745aSAndreas Gohr                // remove multiline markers:
159*af28745aSAndreas Gohr                $string = str_replace("\\\n", '', $string);
160*af28745aSAndreas Gohr                $result .= $string;
161*af28745aSAndreas Gohr                $idx = $idx + $j + 1;
162*af28745aSAndreas Gohr                continue;
163*af28745aSAndreas Gohr            }
164*af28745aSAndreas Gohr
165*af28745aSAndreas Gohr            // single quote strings
166*af28745aSAndreas Gohr            if ($ch == "'") {
167*af28745aSAndreas Gohr                $j = 1;
168*af28745aSAndreas Gohr                while (($idx + $j < $slen) && $source[$idx + $j] != "'") {
169*af28745aSAndreas Gohr                    if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "'" || $source[$idx + $j + 1] == '\\')) {
170*af28745aSAndreas Gohr                        $j += 2;
171*af28745aSAndreas Gohr                    } else {
172*af28745aSAndreas Gohr                        $j += 1;
173*af28745aSAndreas Gohr                    }
174*af28745aSAndreas Gohr                }
175*af28745aSAndreas Gohr                $string = substr($source, $idx, $j + 1);
176*af28745aSAndreas Gohr                // remove multiline markers:
177*af28745aSAndreas Gohr                $string = str_replace("\\\n", '', $string);
178*af28745aSAndreas Gohr                $result .= $string;
179*af28745aSAndreas Gohr                $idx = $idx + $j + 1;
180*af28745aSAndreas Gohr                continue;
181*af28745aSAndreas Gohr            }
182*af28745aSAndreas Gohr
183*af28745aSAndreas Gohr            // backtick strings
184*af28745aSAndreas Gohr            if ($ch == "`") {
185*af28745aSAndreas Gohr                $j = 1;
186*af28745aSAndreas Gohr                while (($idx + $j < $slen) && $source[$idx + $j] != "`") {
187*af28745aSAndreas Gohr                    if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "`" || $source[$idx + $j + 1] == '\\')) {
188*af28745aSAndreas Gohr                        $j += 2;
189*af28745aSAndreas Gohr                    } else {
190*af28745aSAndreas Gohr                        $j += 1;
191*af28745aSAndreas Gohr                    }
192*af28745aSAndreas Gohr                }
193*af28745aSAndreas Gohr                $string = substr($source, $idx, $j + 1);
194*af28745aSAndreas Gohr                // remove multiline markers:
195*af28745aSAndreas Gohr                $string = str_replace("\\\n", '', $string);
196*af28745aSAndreas Gohr                $result .= $string;
197*af28745aSAndreas Gohr                $idx = $idx + $j + 1;
198*af28745aSAndreas Gohr                continue;
199*af28745aSAndreas Gohr            }
200*af28745aSAndreas Gohr
201*af28745aSAndreas Gohr            // whitespaces
202*af28745aSAndreas Gohr            if ($ch == ' ' || $ch == "\r" || $ch == "\n" || $ch == "\t") {
203*af28745aSAndreas Gohr                $lch = substr($result, -1);
204*af28745aSAndreas Gohr                if ($ch == "\n") $this->line++;
205*af28745aSAndreas Gohr
206*af28745aSAndreas Gohr                // Only consider deleting whitespace if the signs before and after
207*af28745aSAndreas Gohr                // are not equal and are not an operator which may not follow itself.
208*af28745aSAndreas Gohr                if ($idx + 1 < $slen && ((!$lch || $source[$idx + 1] == ' ')
209*af28745aSAndreas Gohr                        || $lch != $source[$idx + 1]
210*af28745aSAndreas Gohr                        || strpos(self::OPS, $source[$idx + 1]) === false)) {
211*af28745aSAndreas Gohr                    // leading spaces
212*af28745aSAndreas Gohr                    if ($idx + 1 < $slen && (strpos(self::CHARS, $source[$idx + 1]) !== false)) {
213*af28745aSAndreas Gohr                        $idx = $idx + 1;
214*af28745aSAndreas Gohr                        continue;
215*af28745aSAndreas Gohr                    }
216*af28745aSAndreas Gohr                    // trailing spaces
217*af28745aSAndreas Gohr                    //  if this ch is space AND the last char processed
218*af28745aSAndreas Gohr                    //  is special, then skip the space
219*af28745aSAndreas Gohr                    if ($lch && (strpos(self::CHARS, $lch) !== false)) {
220*af28745aSAndreas Gohr                        $idx = $idx + 1;
221*af28745aSAndreas Gohr                        continue;
222*af28745aSAndreas Gohr                    }
223*af28745aSAndreas Gohr                }
224*af28745aSAndreas Gohr
225*af28745aSAndreas Gohr                // else after all of this convert the "whitespace" to
226*af28745aSAndreas Gohr                // a single space.  It will get appended below
227*af28745aSAndreas Gohr                $ch = ' ';
228*af28745aSAndreas Gohr            }
229*af28745aSAndreas Gohr
230*af28745aSAndreas Gohr            // other chars
231*af28745aSAndreas Gohr            $result .= $ch;
232*af28745aSAndreas Gohr            $idx = $idx + 1;
233*af28745aSAndreas Gohr        }
234*af28745aSAndreas Gohr
235*af28745aSAndreas Gohr        return trim($result);
236*af28745aSAndreas Gohr    }
237*af28745aSAndreas Gohr
238*af28745aSAndreas Gohr    /**
239*af28745aSAndreas Gohr     * Helper to throw a fatal error
240*af28745aSAndreas Gohr     *
241*af28745aSAndreas Gohr     * Tries to give some context to locate the error
242*af28745aSAndreas Gohr     *
243*af28745aSAndreas Gohr     * @param string $msg
244*af28745aSAndreas Gohr     * @throws Exception
245*af28745aSAndreas Gohr     */
246*af28745aSAndreas Gohr    protected function fatal($msg)
247*af28745aSAndreas Gohr    {
248*af28745aSAndreas Gohr        $before = substr($this->source, max(0, $this->idx - 15), $this->idx);
249*af28745aSAndreas Gohr        $after = substr($this->source, $this->idx, 15);
250*af28745aSAndreas Gohr
251*af28745aSAndreas Gohr        $msg = "$msg on line {$this->line}: '{$before}{$after}'";
252*af28745aSAndreas Gohr        throw new Exception($msg);
253*af28745aSAndreas Gohr    }
254*af28745aSAndreas Gohr}
255