1 <?php
2 
3 namespace Elastica;
4 
5 /**
6  * Elastica tools.
7  *
8  * @author Nicolas Ruflin <spam@ruflin.com>
9  * @author Thibault Duplessis <thibault.duplessis@gmail.com>
10  * @author Oleg Zinchenko <olegz@default-value.com>
11  * @author Roberto Nygaard <roberto@nygaard.es>
12  */
13 class Util
14 {
15     /** @var list<string> */
16     protected static $dateMathSymbols = ['<', '>', '/', '{', '}', '|', '+', ':', ','];
17 
18     /** @var list<string> */
19     protected static $escapedDateMathSymbols = ['%3C', '%3E', '%2F', '%7B', '%7D', '%7C', '%2B', '%3A', '%2C'];
20 
21     /**
22      * Checks if date math is already escaped within request URI.
23      *
24      * @param string $requestUri
25      *
26      * @return bool
27      */
28     public static function isDateMathEscaped($requestUri)
29     {
30         // In practice, the only symbol that really needs to be escaped in URI is '/' => '%2F'
31         return false !== \strpos(\strtoupper($requestUri), '%2F');
32     }
33 
34     /**
35      * Escapes date math symbols within request URI.
36      *
37      * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.x/date-math-index-names.html
38      *
39      * @param string $requestUri
40      *
41      * @return string
42      */
43     public static function escapeDateMath($requestUri)
44     {
45         if (empty($requestUri)) {
46             return $requestUri;
47         }
48 
49         // Check if date math if used at all. Find last '>'. E.g. /<log-{now/d}>,log-2011.12.01/log/_refresh
50         $pos1 = \strrpos($requestUri, '>');
51         if (false === $pos1) {
52             return $requestUri;
53         }
54 
55         // Find the position up to which we should escape.
56         // Should be next slash '/' after last '>' E.g. /<log-{now/d}>,log-2011.12.01/log/_refresh
57         $pos2 = \strpos($requestUri, '/', $pos1);
58         $pos2 = false !== $pos2 ? $pos2 : \strlen($requestUri);
59 
60         // Cut out the bit we need to escape: /<log-{now/d}>,log-2011.12.01
61         $uriSegment = \substr($requestUri, 0, $pos2);
62 
63         // Escape using character map
64         $escapedUriSegment = \str_replace(static::$dateMathSymbols, static::$escapedDateMathSymbols, $uriSegment);
65 
66         // '\\{' and '\\}' should not be escaped
67         if (false !== \strpos($uriSegment, '\\\\')) {
68             $escapedUriSegment = \str_replace(['\\\\%7B', '\\\\%7D'], ['\\\\{', '\\\\}'], $escapedUriSegment);
69         }
70 
71         // Replace part of the string. E.g. /%3Clog-%7Bnow%2Fd%7D%3E%2Clog-2011.12.01/log/_refresh
72         return \substr_replace($requestUri, $escapedUriSegment, 0, $pos2);
73     }
74 
75     /**
76      * Replace known reserved words (e.g. AND OR NOT)
77      * and
78      * escape known special characters (e.g. + - && || ! ( ) { } [ ] ^ " ~ * ? : etc.).
79      *
80      * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.1/query-dsl-query-string-query.html#_boolean_operators
81      * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.1/query-dsl-query-string-query.html#_reserved_characters
82      *
83      * @param string $term Query term to replace and escape
84      *
85      * @return string Replaced and escaped query term
86      */
87     public static function replaceBooleanWordsAndEscapeTerm($term)
88     {
89         $result = $term;
90         $result = self::replaceBooleanWords($result);
91 
92         return self::escapeTerm($result);
93     }
94 
95     /**
96      * Escapes the following terms (because part of the query language)
97      * + - && || ! ( ) { } [ ] ^ " ~ * ? : \ < >.
98      *
99      * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters
100      *
101      * @param string $term Query term to escape
102      *
103      * @return string Escaped query term
104      */
105     public static function escapeTerm($term)
106     {
107         $result = $term;
108         // \ escaping has to be first, otherwise escaped later once again
109         $escapableChars = ['\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '/'];
110 
111         foreach ($escapableChars as $char) {
112             $result = \str_replace($char, '\\'.$char, $result);
113         }
114 
115         // < and > cannot be escaped, so they should be removed
116         // @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters
117         $nonEscapableChars = ['<', '>'];
118 
119         foreach ($nonEscapableChars as $char) {
120             $result = \str_replace($char, '', $result);
121         }
122 
123         return $result;
124     }
125 
126     /**
127      * Replace the following reserved words (because part of the query language)
128      * AND OR NOT.
129      *
130      * @see http://lucene.apache.org/java/2_4_0/queryparsersyntax.html#Boolean%20operators
131      *
132      * @param string $term Query term to replace
133      *
134      * @return string Replaced query term
135      */
136     public static function replaceBooleanWords($term)
137     {
138         $replacementMap = [' AND ' => ' && ', ' OR ' => ' || ', ' NOT ' => ' !'];
139 
140         return \strtr($term, $replacementMap);
141     }
142 
143     /**
144      * Converts a snake_case string to CamelCase.
145      *
146      * For example: hello_world to HelloWorld
147      *
148      * @param string $string snake_case string
149      *
150      * @return string CamelCase string
151      */
152     public static function toCamelCase($string)
153     {
154         return \str_replace(' ', '', \ucwords(\str_replace('_', ' ', $string)));
155     }
156 
157     /**
158      * Converts a CamelCase string to snake_case.
159      *
160      * For Example HelloWorld to hello_world
161      *
162      * @param string $string CamelCase String to Convert
163      *
164      * @return string SnakeCase string
165      */
166     public static function toSnakeCase($string)
167     {
168         return \strtolower(\preg_replace('/[A-Z]/', '_\\0', \lcfirst($string)));
169     }
170 
171     /**
172      * Converts given time to format: 1995-12-31T23:59:59Z.
173      *
174      * This is the lucene date format
175      *
176      * @param int|string $date Date input (could be string etc.) -> must be supported by strtotime
177      *
178      * @return string Converted date string
179      */
180     public static function convertDate($date)
181     {
182         if (\is_int($date)) {
183             $timestamp = $date;
184         } else {
185             $timestamp = \strtotime($date);
186         }
187 
188         return \date('Y-m-d\TH:i:s\Z', $timestamp);
189     }
190 
191     /**
192      * Convert a \DateTime object to format: 1995-12-31T23:59:59Z+02:00.
193      *
194      * Converts it to the lucene format, including the appropriate TimeZone
195      *
196      * @return string
197      */
198     public static function convertDateTimeObject(\DateTime $dateTime, bool $includeTimezone = true)
199     {
200         $formatString = 'Y-m-d\TH:i:s'.(true === $includeTimezone ? 'P' : '\Z');
201 
202         return $dateTime->format($formatString);
203     }
204 
205     /**
206      * Tries to guess the name of the param, based on its class
207      * Example: \Elastica\Query\MatchAll => match_all.
208      *
209      * @param object|string $class Object or class name
210      *
211      * @return string parameter name
212      */
213     public static function getParamName($class)
214     {
215         \trigger_deprecation('ruflin/elastica', '7.1.0', 'The "%s()" method is deprecated. It will be removed in 8.0.', __METHOD__);
216 
217         if (\is_object($class)) {
218             $class = \get_class($class);
219         }
220 
221         $parts = \explode('\\', $class);
222         $last = \array_pop($parts);
223         $last = \preg_replace('/Query$/', '', $last); // for BoolQuery and MatchQuery
224 
225         return self::toSnakeCase($last);
226     }
227 
228     /**
229      * Converts Request to Curl console command.
230      *
231      * @return string
232      */
233     public static function convertRequestToCurlCommand(Request $request)
234     {
235         $message = 'curl -X'.\strtoupper($request->getMethod()).' ';
236         $message .= '\'http://'.$request->getConnection()->getHost().':'.$request->getConnection()->getPort().'/';
237         $message .= $request->getPath();
238 
239         $query = $request->getQuery();
240         if (!empty($query)) {
241             $message .= '?'.\http_build_query($query);
242         }
243 
244         $message .= '\'';
245 
246         $data = $request->getData();
247         if (!empty($data)) {
248             $message .= ' -d \''.JSON::stringify($data).'\'';
249         }
250 
251         return $message;
252     }
253 }
254