1<?php 2 3namespace Elastica; 4 5/** 6 * Elastica tools. 7 * 8 * @author Nicolas Ruflin <spam@ruflin.com> 9 * @author Thibault Duplessis <thibault.duplessis@gmail.com> 10 * @author Oleg Zinchenko <olegz@default-value.com> 11 * @author Roberto Nygaard <roberto@nygaard.es> 12 */ 13class Util 14{ 15 /** @var list<string> */ 16 protected static $dateMathSymbols = ['<', '>', '/', '{', '}', '|', '+', ':', ',']; 17 18 /** @var list<string> */ 19 protected static $escapedDateMathSymbols = ['%3C', '%3E', '%2F', '%7B', '%7D', '%7C', '%2B', '%3A', '%2C']; 20 21 /** 22 * Checks if date math is already escaped within request URI. 23 * 24 * @param string $requestUri 25 * 26 * @return bool 27 */ 28 public static function isDateMathEscaped($requestUri) 29 { 30 // In practice, the only symbol that really needs to be escaped in URI is '/' => '%2F' 31 return false !== \strpos(\strtoupper($requestUri), '%2F'); 32 } 33 34 /** 35 * Escapes date math symbols within request URI. 36 * 37 * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.x/date-math-index-names.html 38 * 39 * @param string $requestUri 40 * 41 * @return string 42 */ 43 public static function escapeDateMath($requestUri) 44 { 45 if (empty($requestUri)) { 46 return $requestUri; 47 } 48 49 // Check if date math if used at all. Find last '>'. E.g. /<log-{now/d}>,log-2011.12.01/log/_refresh 50 $pos1 = \strrpos($requestUri, '>'); 51 if (false === $pos1) { 52 return $requestUri; 53 } 54 55 // Find the position up to which we should escape. 56 // Should be next slash '/' after last '>' E.g. /<log-{now/d}>,log-2011.12.01/log/_refresh 57 $pos2 = \strpos($requestUri, '/', $pos1); 58 $pos2 = false !== $pos2 ? $pos2 : \strlen($requestUri); 59 60 // Cut out the bit we need to escape: /<log-{now/d}>,log-2011.12.01 61 $uriSegment = \substr($requestUri, 0, $pos2); 62 63 // Escape using character map 64 $escapedUriSegment = \str_replace(static::$dateMathSymbols, static::$escapedDateMathSymbols, $uriSegment); 65 66 // '\\{' and '\\}' should not be escaped 67 if (false !== \strpos($uriSegment, '\\\\')) { 68 $escapedUriSegment = \str_replace(['\\\\%7B', '\\\\%7D'], ['\\\\{', '\\\\}'], $escapedUriSegment); 69 } 70 71 // Replace part of the string. E.g. /%3Clog-%7Bnow%2Fd%7D%3E%2Clog-2011.12.01/log/_refresh 72 return \substr_replace($requestUri, $escapedUriSegment, 0, $pos2); 73 } 74 75 /** 76 * Replace known reserved words (e.g. AND OR NOT) 77 * and 78 * escape known special characters (e.g. + - && || ! ( ) { } [ ] ^ " ~ * ? : etc.). 79 * 80 * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.1/query-dsl-query-string-query.html#_boolean_operators 81 * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.1/query-dsl-query-string-query.html#_reserved_characters 82 * 83 * @param string $term Query term to replace and escape 84 * 85 * @return string Replaced and escaped query term 86 */ 87 public static function replaceBooleanWordsAndEscapeTerm($term) 88 { 89 $result = $term; 90 $result = self::replaceBooleanWords($result); 91 92 return self::escapeTerm($result); 93 } 94 95 /** 96 * Escapes the following terms (because part of the query language) 97 * + - && || ! ( ) { } [ ] ^ " ~ * ? : \ < >. 98 * 99 * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters 100 * 101 * @param string $term Query term to escape 102 * 103 * @return string Escaped query term 104 */ 105 public static function escapeTerm($term) 106 { 107 $result = $term; 108 // \ escaping has to be first, otherwise escaped later once again 109 $escapableChars = ['\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '/']; 110 111 foreach ($escapableChars as $char) { 112 $result = \str_replace($char, '\\'.$char, $result); 113 } 114 115 // < and > cannot be escaped, so they should be removed 116 // @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters 117 $nonEscapableChars = ['<', '>']; 118 119 foreach ($nonEscapableChars as $char) { 120 $result = \str_replace($char, '', $result); 121 } 122 123 return $result; 124 } 125 126 /** 127 * Replace the following reserved words (because part of the query language) 128 * AND OR NOT. 129 * 130 * @see http://lucene.apache.org/java/2_4_0/queryparsersyntax.html#Boolean%20operators 131 * 132 * @param string $term Query term to replace 133 * 134 * @return string Replaced query term 135 */ 136 public static function replaceBooleanWords($term) 137 { 138 $replacementMap = [' AND ' => ' && ', ' OR ' => ' || ', ' NOT ' => ' !']; 139 140 return \strtr($term, $replacementMap); 141 } 142 143 /** 144 * Converts a snake_case string to CamelCase. 145 * 146 * For example: hello_world to HelloWorld 147 * 148 * @param string $string snake_case string 149 * 150 * @return string CamelCase string 151 */ 152 public static function toCamelCase($string) 153 { 154 return \str_replace(' ', '', \ucwords(\str_replace('_', ' ', $string))); 155 } 156 157 /** 158 * Converts a CamelCase string to snake_case. 159 * 160 * For Example HelloWorld to hello_world 161 * 162 * @param string $string CamelCase String to Convert 163 * 164 * @return string SnakeCase string 165 */ 166 public static function toSnakeCase($string) 167 { 168 return \strtolower(\preg_replace('/[A-Z]/', '_\\0', \lcfirst($string))); 169 } 170 171 /** 172 * Converts given time to format: 1995-12-31T23:59:59Z. 173 * 174 * This is the lucene date format 175 * 176 * @param int|string $date Date input (could be string etc.) -> must be supported by strtotime 177 * 178 * @return string Converted date string 179 */ 180 public static function convertDate($date) 181 { 182 if (\is_int($date)) { 183 $timestamp = $date; 184 } else { 185 $timestamp = \strtotime($date); 186 } 187 188 return \date('Y-m-d\TH:i:s\Z', $timestamp); 189 } 190 191 /** 192 * Convert a \DateTime object to format: 1995-12-31T23:59:59Z+02:00. 193 * 194 * Converts it to the lucene format, including the appropriate TimeZone 195 * 196 * @return string 197 */ 198 public static function convertDateTimeObject(\DateTime $dateTime, bool $includeTimezone = true) 199 { 200 $formatString = 'Y-m-d\TH:i:s'.(true === $includeTimezone ? 'P' : '\Z'); 201 202 return $dateTime->format($formatString); 203 } 204 205 /** 206 * Tries to guess the name of the param, based on its class 207 * Example: \Elastica\Query\MatchAll => match_all. 208 * 209 * @param object|string $class Object or class name 210 * 211 * @return string parameter name 212 */ 213 public static function getParamName($class) 214 { 215 \trigger_deprecation('ruflin/elastica', '7.1.0', 'The "%s()" method is deprecated. It will be removed in 8.0.', __METHOD__); 216 217 if (\is_object($class)) { 218 $class = \get_class($class); 219 } 220 221 $parts = \explode('\\', $class); 222 $last = \array_pop($parts); 223 $last = \preg_replace('/Query$/', '', $last); // for BoolQuery and MatchQuery 224 225 return self::toSnakeCase($last); 226 } 227 228 /** 229 * Converts Request to Curl console command. 230 * 231 * @return string 232 */ 233 public static function convertRequestToCurlCommand(Request $request) 234 { 235 $message = 'curl -X'.\strtoupper($request->getMethod()).' '; 236 $message .= '\'http://'.$request->getConnection()->getHost().':'.$request->getConnection()->getPort().'/'; 237 $message .= $request->getPath(); 238 239 $query = $request->getQuery(); 240 if (!empty($query)) { 241 $message .= '?'.\http_build_query($query); 242 } 243 244 $message .= '\''; 245 246 $data = $request->getData(); 247 if (!empty($data)) { 248 $message .= ' -d \''.JSON::stringify($data).'\''; 249 } 250 251 return $message; 252 } 253} 254