1 /* 2 * Hunt - A refined core library for D programming language. 3 * 4 * Copyright (C) 2018-2019 HuntLabs 5 * 6 * Website: https://www.huntlabs.net/ 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 12 module hunt.text.QuoteUtil; 13 14 import hunt.text.StringBuilder; 15 import hunt.Exceptions; 16 17 import std.ascii; 18 import std.conv; 19 import std.range; 20 import std.string; 21 22 23 /** 24 * Provide some consistent Http header value and Extension configuration parameter quoting support. 25 * <p> 26 * While QuotedStringTokenizer exists in the utils, and works great with http header values, using it in websocket-api is undesired. 27 * <ul> 28 * <li>Using QuotedStringTokenizer would introduce a dependency to the utils that would need to be exposed via the WebAppContext classloader</li> 29 * <li>ABNF defined extension parameter parsing requirements of RFC-6455 (WebSocket) ABNF, is slightly different than the ABNF parsing defined in RFC-2616 30 * (HTTP/1.1).</li> 31 * <li>Future HTTPbis ABNF changes for parsing will impact QuotedStringTokenizer</li> 32 * </ul> 33 * It was decided to keep this implementation separate for the above reasons. 34 */ 35 class QuoteUtil { 36 private static class DeQuotingStringIterator : InputRange!string { 37 private enum State { 38 START, 39 TOKEN, 40 QUOTE_SINGLE, 41 QUOTE_DOUBLE 42 } 43 44 private string input; 45 private string delims; 46 private StringBuilder token; 47 private bool hasToken = false; 48 private int i = 0; 49 50 this(string input, string delims) { 51 this.input = input; 52 this.delims = delims; 53 size_t len = input.length; 54 token = new StringBuilder(len > 1024 ? 512 : len / 2); 55 56 popFront(); 57 } 58 59 private void appendToken(char c) { 60 if (hasToken) { 61 token.append(c); 62 } else { 63 if (isWhite(c)) { 64 return; // skip whitespace at start of token. 65 } else { 66 token.append(c); 67 hasToken = true; 68 } 69 } 70 } 71 72 bool empty() { 73 return !hasToken; 74 } 75 76 string front() @property { 77 if (!hasToken) { 78 throw new NoSuchElementException(); 79 } 80 string ret = token.toString(); 81 return QuoteUtil.dequote(ret.strip()); 82 } 83 84 void popFront() { 85 token.setLength(0); 86 hasToken = false; 87 88 State state = State.START; 89 bool escape = false; 90 size_t inputLen = input.length; 91 92 while (i < inputLen) { 93 char c = input[i++]; 94 95 switch (state) { 96 case State.START: { 97 if (c == '\'') { 98 state = State.QUOTE_SINGLE; 99 appendToken(c); 100 } else if (c == '\"') { 101 state = State.QUOTE_DOUBLE; 102 appendToken(c); 103 } else { 104 appendToken(c); 105 state = State.TOKEN; 106 } 107 break; 108 } 109 case State.TOKEN: { 110 if (delims.indexOf(c) >= 0) { 111 // System.out.printf("hasNext/t: %b [%s]%n",hasToken,token); 112 // return hasToken; 113 return; 114 } else if (c == '\'') { 115 state = State.QUOTE_SINGLE; 116 } else if (c == '\"') { 117 state = State.QUOTE_DOUBLE; 118 } 119 appendToken(c); 120 break; 121 } 122 case State.QUOTE_SINGLE: { 123 if (escape) { 124 escape = false; 125 appendToken(c); 126 } else if (c == '\'') { 127 appendToken(c); 128 state = State.TOKEN; 129 } else if (c == '\\') { 130 escape = true; 131 } else { 132 appendToken(c); 133 } 134 break; 135 } 136 case State.QUOTE_DOUBLE: { 137 if (escape) { 138 escape = false; 139 appendToken(c); 140 } else if (c == '\"') { 141 appendToken(c); 142 state = State.TOKEN; 143 } else if (c == '\\') { 144 escape = true; 145 } else { 146 appendToken(c); 147 } 148 break; 149 } 150 151 default: break; 152 } 153 // System.out.printf("%s <%s> : [%s]%n",state,c,token); 154 } 155 } 156 157 158 int opApply(scope int delegate(string) dg) { 159 if(dg is null) 160 throw new NullPointerException(""); 161 int result = 0; 162 while(hasToken && result == 0) { 163 result = dg(front()); 164 popFront(); 165 } 166 return result; 167 } 168 169 int opApply(scope int delegate(size_t, string) dg) { 170 if(dg is null) 171 throw new NullPointerException(""); 172 int result = 0; 173 size_t index = 0; 174 while(hasToken && result == 0) { 175 result = dg(index++, front()); 176 popFront(); 177 } 178 return result; 179 } 180 181 string moveFront() { 182 throw new UnsupportedOperationException("Remove not supported with this iterator"); 183 } 184 185 /++ 186 // override 187 bool hasNext() { 188 // already found a token 189 if (hasToken) { 190 return true; 191 } 192 193 State state = State.START; 194 bool escape = false; 195 size_t inputLen = input.length; 196 197 while (i < inputLen) { 198 char c = input.charAt(i++); 199 200 switch (state) { 201 case State.START: { 202 if (c == '\'') { 203 state = State.QUOTE_SINGLE; 204 appendToken(c); 205 } else if (c == '\"') { 206 state = State.QUOTE_DOUBLE; 207 appendToken(c); 208 } else { 209 appendToken(c); 210 state = State.TOKEN; 211 } 212 break; 213 } 214 case State.TOKEN: { 215 if (delims.indexOf(c) >= 0) { 216 // System.out.printf("hasNext/t: %b [%s]%n",hasToken,token); 217 return hasToken; 218 } else if (c == '\'') { 219 state = State.QUOTE_SINGLE; 220 } else if (c == '\"') { 221 state = State.QUOTE_DOUBLE; 222 } 223 appendToken(c); 224 break; 225 } 226 case State.QUOTE_SINGLE: { 227 if (escape) { 228 escape = false; 229 appendToken(c); 230 } else if (c == '\'') { 231 appendToken(c); 232 state = State.TOKEN; 233 } else if (c == '\\') { 234 escape = true; 235 } else { 236 appendToken(c); 237 } 238 break; 239 } 240 case State.QUOTE_DOUBLE: { 241 if (escape) { 242 escape = false; 243 appendToken(c); 244 } else if (c == '\"') { 245 appendToken(c); 246 state = State.TOKEN; 247 } else if (c == '\\') { 248 escape = true; 249 } else { 250 appendToken(c); 251 } 252 break; 253 } 254 255 default: break; 256 } 257 // System.out.printf("%s <%s> : [%s]%n",state,c,token); 258 } 259 // System.out.printf("hasNext/e: %b [%s]%n",hasToken,token); 260 return hasToken; 261 } 262 263 // override 264 string next() { 265 if (!hasNext()) { 266 throw new NoSuchElementException(); 267 } 268 string ret = token.toString(); 269 token.setLength(0); 270 hasToken = false; 271 return QuoteUtil.dequote(ret.strip()); 272 } 273 ++/ 274 } 275 276 /** 277 * ABNF from RFC 2616, RFC 822, and RFC 6455 specified characters requiring quoting. 278 */ 279 enum string ABNF_REQUIRED_QUOTING = "\"'\\\n\r\t\f\b%+ ;="; 280 281 private enum char UNICODE_TAG = cast(char)0xFF; 282 private __gshared char[] escapes; 283 284 shared static this() { 285 escapes = new char[32]; 286 escapes[] = UNICODE_TAG; 287 // non-unicode 288 escapes['\b'] = 'b'; 289 escapes['\t'] = 't'; 290 escapes['\n'] = 'n'; 291 escapes['\f'] = 'f'; 292 escapes['\r'] = 'r'; 293 } 294 295 private static int dehex(byte b) { 296 if ((b >= '0') && (b <= '9')) { 297 return cast(byte) (b - '0'); 298 } 299 if ((b >= 'a') && (b <= 'f')) { 300 return cast(byte) ((b - 'a') + 10); 301 } 302 if ((b >= 'A') && (b <= 'F')) { 303 return cast(byte) ((b - 'A') + 10); 304 } 305 throw new IllegalArgumentException("!hex:" ~ to!string(0xff & b, 16)); 306 } 307 308 /** 309 * Remove quotes from a string, only if the input string start with and end with the same quote character. 310 * 311 * @param str the string to remove surrounding quotes from 312 * @return the de-quoted string 313 */ 314 static string dequote(string str) { 315 char start = str[0]; 316 if ((start == '\'') || (start == '\"')) { 317 // possibly quoted 318 char end = str[$ - 1]; 319 if (start == end) { 320 // dequote 321 return str[1 .. $-1]; 322 } 323 } 324 return str; 325 } 326 327 static void escape(StringBuilder buf, string str) { 328 foreach (char c ; str) { 329 if (c >= 32) { 330 // non special character 331 if ((c == '"') || (c == '\\')) { 332 buf.append('\\'); 333 } 334 buf.append(c); 335 } else { 336 // special characters, requiring escaping 337 char escaped = escapes[c]; 338 339 // is this a unicode escape? 340 if (escaped == UNICODE_TAG) { 341 buf.append("\\u00"); 342 if (c < 0x10) { 343 buf.append('0'); 344 } 345 buf.append(to!string(cast(int)c, 16)); // hex 346 } else { 347 // normal escape 348 buf.append('\\').append(escaped); 349 } 350 } 351 } 352 } 353 354 /** 355 * Simple quote of a string, escaping where needed. 356 * 357 * @param buf the StringBuilder to append to 358 * @param str the string to quote 359 */ 360 static void quote(StringBuilder buf, string str) { 361 buf.append('"'); 362 escape(buf, str); 363 buf.append('"'); 364 } 365 366 /** 367 * Append into buf the provided string, adding quotes if needed. 368 * <p> 369 * Quoting is determined if any of the characters in the <code>delim</code> are found in the input <code>str</code>. 370 * 371 * @param buf the buffer to append to 372 * @param str the string to possibly quote 373 * @param delim the delimiter characters that will trigger automatic quoting 374 */ 375 static void quoteIfNeeded(StringBuilder buf, string str, string delim) { 376 if (str is null) { 377 return; 378 } 379 // check for delimiters in input string 380 size_t len = str.length; 381 if (len == 0) { 382 return; 383 } 384 int ch; 385 for (size_t i = 0; i < len; i++) { 386 // ch = str.codePointAt(i); 387 ch = str[i]; 388 if (delim.indexOf(ch) >= 0) { 389 // found a delimiter codepoint. we need to quote it. 390 quote(buf, str); 391 return; 392 } 393 } 394 395 // no special delimiters used, no quote needed. 396 buf.append(str); 397 } 398 399 /** 400 * Create an iterator of the input string, breaking apart the string at the provided delimiters, removing quotes and triming the parts of the string as 401 * needed. 402 * 403 * @param str the input string to split apart 404 * @param delims the delimiter characters to split the string on 405 * @return the iterator of the parts of the string, trimmed, with quotes around the string part removed, and unescaped 406 */ 407 static InputRange!string splitAt(string str, string delims) { 408 return new DeQuotingStringIterator(str.strip(), delims); 409 } 410 411 static string unescape(string str) { 412 if (str is null) { 413 // nothing there 414 return null; 415 } 416 417 size_t len = str.length; 418 if (len <= 1) { 419 // impossible to be escaped 420 return str; 421 } 422 423 StringBuilder ret = new StringBuilder(len - 2); 424 bool escaped = false; 425 char c; 426 for (size_t i = 0; i < len; i++) { 427 c = str[i]; 428 if (escaped) { 429 escaped = false; 430 switch (c) { 431 case 'n': 432 ret.append('\n'); 433 break; 434 case 'r': 435 ret.append('\r'); 436 break; 437 case 't': 438 ret.append('\t'); 439 break; 440 case 'f': 441 ret.append('\f'); 442 break; 443 case 'b': 444 ret.append('\b'); 445 break; 446 case '\\': 447 ret.append('\\'); 448 break; 449 case '/': 450 ret.append('/'); 451 break; 452 case '"': 453 ret.append('"'); 454 break; 455 case 'u': 456 ret.append(cast(char) ((dehex(cast(byte) str[i++]) << 24) + 457 (dehex(cast(byte) str[i++]) << 16) + 458 (dehex(cast(byte) str[i++]) << 8) + 459 (dehex(cast(byte) str[i++])))); 460 break; 461 default: 462 ret.append(c); 463 } 464 } else if (c == '\\') { 465 escaped = true; 466 } else { 467 ret.append(c); 468 } 469 } 470 return ret.toString(); 471 } 472 473 // static string join(Object[] objs, string delim) { 474 // if (objs is null) { 475 // return ""; 476 // } 477 // StringBuilder ret = new StringBuilder(); 478 // int len = objs.length; 479 // for (int i = 0; i < len; i++) { 480 // if (i > 0) { 481 // ret.append(delim); 482 // } 483 // if (objs[i] instanceof string) { 484 // ret.append('"').append(objs[i]).append('"'); 485 // } else { 486 // ret.append(objs[i]); 487 // } 488 // } 489 // return ret.toString(); 490 // } 491 492 // static string join(Collection<?> objs, string delim) { 493 // if (objs is null) { 494 // return ""; 495 // } 496 // StringBuilder ret = new StringBuilder(); 497 // bool needDelim = false; 498 // foreach (Object obj ; objs) { 499 // if (needDelim) { 500 // ret.append(delim); 501 // } 502 // if (obj instanceof string) { 503 // ret.append('"').append(obj).append('"'); 504 // } else { 505 // ret.append(obj); 506 // } 507 // needDelim = true; 508 // } 509 // return ret.toString(); 510 // } 511 }