1 /*
2  * Hunt - A refined core library for D programming language.
3  *
4  * Copyright (C) 2018-2019 HuntLabs
5  *
6  * Website: https://www.huntlabs.net/
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module hunt.text.QuoteUtil;
13 
14 import hunt.text.StringBuilder;
15 import hunt.Exceptions;
16 
17 import std.ascii;
18 import std.conv;
19 import std.range;
20 import std.string;
21 
22 
23 /**
24  * Provide some consistent Http header value and Extension configuration parameter quoting support.
25  * <p>
26  * While QuotedStringTokenizer exists in the utils, and works great with http header values, using it in websocket-api is undesired.
27  * <ul>
28  * <li>Using QuotedStringTokenizer would introduce a dependency to the utils that would need to be exposed via the WebAppContext classloader</li>
29  * <li>ABNF defined extension parameter parsing requirements of RFC-6455 (WebSocket) ABNF, is slightly different than the ABNF parsing defined in RFC-2616
30  * (HTTP/1.1).</li>
31  * <li>Future HTTPbis ABNF changes for parsing will impact QuotedStringTokenizer</li>
32  * </ul>
33  * It was decided to keep this implementation separate for the above reasons.
34  */
35 class QuoteUtil {
36     private static class DeQuotingStringIterator : InputRange!string { 
37         private enum State {
38             START,
39             TOKEN,
40             QUOTE_SINGLE,
41             QUOTE_DOUBLE
42         }
43 
44         private string input;
45         private string delims;
46         private StringBuilder token;
47         private bool hasToken = false;
48         private int i = 0;
49 
50         this(string input, string delims) {
51             this.input = input;
52             this.delims = delims;
53             size_t len = input.length;
54             token = new StringBuilder(len > 1024 ? 512 : len / 2);
55 
56             popFront();
57         }
58 
59         private void appendToken(char c) {
60             if (hasToken) {
61                 token.append(c);
62             } else {
63                 if (isWhite(c)) {
64                     return; // skip whitespace at start of token.
65                 } else {
66                     token.append(c);
67                     hasToken = true;
68                 }
69             }
70         }
71 
72         bool empty() {
73             return !hasToken;
74         }
75 
76         string front() @property { 
77             if (!hasToken) {
78                 throw new NoSuchElementException();
79             }
80             string ret = token.toString();
81             return QuoteUtil.dequote(ret.strip());
82          }
83 
84         void popFront() {
85             token.setLength(0);
86             hasToken = false;
87 
88             State state = State.START;
89             bool escape = false;
90             size_t inputLen = input.length;
91 
92             while (i < inputLen) {
93                 char c = input[i++];
94 
95                 switch (state) {
96                     case State.START: {
97                         if (c == '\'') {
98                             state = State.QUOTE_SINGLE;
99                             appendToken(c);
100                         } else if (c == '\"') {
101                             state = State.QUOTE_DOUBLE;
102                             appendToken(c);
103                         } else {
104                             appendToken(c);
105                             state = State.TOKEN;
106                         }
107                         break;
108                     }
109                     case State.TOKEN: {
110                         if (delims.indexOf(c) >= 0) {
111                             // System.out.printf("hasNext/t: %b [%s]%n",hasToken,token);
112                             // return hasToken;
113                             return;
114                         } else if (c == '\'') {
115                             state = State.QUOTE_SINGLE;
116                         } else if (c == '\"') {
117                             state = State.QUOTE_DOUBLE;
118                         }
119                         appendToken(c);
120                         break;
121                     }
122                     case State.QUOTE_SINGLE: {
123                         if (escape) {
124                             escape = false;
125                             appendToken(c);
126                         } else if (c == '\'') {
127                             appendToken(c);
128                             state = State.TOKEN;
129                         } else if (c == '\\') {
130                             escape = true;
131                         } else {
132                             appendToken(c);
133                         }
134                         break;
135                     }
136                     case State.QUOTE_DOUBLE: {
137                         if (escape) {
138                             escape = false;
139                             appendToken(c);
140                         } else if (c == '\"') {
141                             appendToken(c);
142                             state = State.TOKEN;
143                         } else if (c == '\\') {
144                             escape = true;
145                         } else {
146                             appendToken(c);
147                         }
148                         break;
149                     }
150 
151                     default: break;
152                 }
153                 // System.out.printf("%s <%s> : [%s]%n",state,c,token);
154             }
155         }
156 
157 
158         int opApply(scope int delegate(string) dg) {
159             if(dg is null)
160                 throw new NullPointerException("");
161             int result = 0;
162             while(hasToken && result == 0) {
163                 result = dg(front());
164                 popFront();
165             }
166             return result;
167         }
168 
169         int opApply(scope int delegate(size_t, string) dg) {
170             if(dg is null)
171                 throw new NullPointerException("");
172             int result = 0;          
173             size_t index = 0;
174             while(hasToken && result == 0) {
175                 result = dg(index++, front());
176                 popFront();
177             }
178             return result;
179         }
180 
181         string moveFront() {
182             throw new UnsupportedOperationException("Remove not supported with this iterator");
183         }
184 
185 /++
186         // override
187         bool hasNext() {
188             // already found a token
189             if (hasToken) {
190                 return true;
191             }
192 
193             State state = State.START;
194             bool escape = false;
195             size_t inputLen = input.length;
196 
197             while (i < inputLen) {
198                 char c = input.charAt(i++);
199 
200                 switch (state) {
201                     case State.START: {
202                         if (c == '\'') {
203                             state = State.QUOTE_SINGLE;
204                             appendToken(c);
205                         } else if (c == '\"') {
206                             state = State.QUOTE_DOUBLE;
207                             appendToken(c);
208                         } else {
209                             appendToken(c);
210                             state = State.TOKEN;
211                         }
212                         break;
213                     }
214                     case State.TOKEN: {
215                         if (delims.indexOf(c) >= 0) {
216                             // System.out.printf("hasNext/t: %b [%s]%n",hasToken,token);
217                             return hasToken;
218                         } else if (c == '\'') {
219                             state = State.QUOTE_SINGLE;
220                         } else if (c == '\"') {
221                             state = State.QUOTE_DOUBLE;
222                         }
223                         appendToken(c);
224                         break;
225                     }
226                     case State.QUOTE_SINGLE: {
227                         if (escape) {
228                             escape = false;
229                             appendToken(c);
230                         } else if (c == '\'') {
231                             appendToken(c);
232                             state = State.TOKEN;
233                         } else if (c == '\\') {
234                             escape = true;
235                         } else {
236                             appendToken(c);
237                         }
238                         break;
239                     }
240                     case State.QUOTE_DOUBLE: {
241                         if (escape) {
242                             escape = false;
243                             appendToken(c);
244                         } else if (c == '\"') {
245                             appendToken(c);
246                             state = State.TOKEN;
247                         } else if (c == '\\') {
248                             escape = true;
249                         } else {
250                             appendToken(c);
251                         }
252                         break;
253                     }
254 
255                     default: break;
256                 }
257                 // System.out.printf("%s <%s> : [%s]%n",state,c,token);
258             }
259             // System.out.printf("hasNext/e: %b [%s]%n",hasToken,token);
260             return hasToken;
261         }
262 
263         // override
264         string next() {
265             if (!hasNext()) {
266                 throw new NoSuchElementException();
267             }
268             string ret = token.toString();
269             token.setLength(0);
270             hasToken = false;
271             return QuoteUtil.dequote(ret.strip());
272         }
273 ++/
274     }
275 
276     /**
277      * ABNF from RFC 2616, RFC 822, and RFC 6455 specified characters requiring quoting.
278      */
279     enum string ABNF_REQUIRED_QUOTING = "\"'\\\n\r\t\f\b%+ ;=";
280 
281     private enum char UNICODE_TAG = cast(char)0xFF;
282     private __gshared char[] escapes;
283 
284     shared static this() {
285         escapes = new char[32];
286         escapes[] = UNICODE_TAG;
287         // non-unicode
288         escapes['\b'] = 'b';
289         escapes['\t'] = 't';
290         escapes['\n'] = 'n';
291         escapes['\f'] = 'f';
292         escapes['\r'] = 'r';
293     }
294 
295     private static int dehex(byte b) {
296         if ((b >= '0') && (b <= '9')) {
297             return cast(byte) (b - '0');
298         }
299         if ((b >= 'a') && (b <= 'f')) {
300             return cast(byte) ((b - 'a') + 10);
301         }
302         if ((b >= 'A') && (b <= 'F')) {
303             return cast(byte) ((b - 'A') + 10);
304         }
305         throw new IllegalArgumentException("!hex:" ~ to!string(0xff & b, 16));
306     }
307 
308     /**
309      * Remove quotes from a string, only if the input string start with and end with the same quote character.
310      *
311      * @param str the string to remove surrounding quotes from
312      * @return the de-quoted string
313      */
314     static string dequote(string str) {
315         char start = str[0];
316         if ((start == '\'') || (start == '\"')) {
317             // possibly quoted
318             char end = str[$ - 1];
319             if (start == end) {
320                 // dequote
321                 return str[1 .. $-1];
322             }
323         }
324         return str;
325     }
326 
327     static void escape(StringBuilder buf, string str) {
328         foreach (char c ; str) {
329             if (c >= 32) {
330                 // non special character
331                 if ((c == '"') || (c == '\\')) {
332                     buf.append('\\');
333                 }
334                 buf.append(c);
335             } else {
336                 // special characters, requiring escaping
337                 char escaped = escapes[c];
338 
339                 // is this a unicode escape?
340                 if (escaped == UNICODE_TAG) {
341                     buf.append("\\u00");
342                     if (c < 0x10) {
343                         buf.append('0');
344                     }
345                     buf.append(to!string(cast(int)c, 16)); // hex
346                 } else {
347                     // normal escape
348                     buf.append('\\').append(escaped);
349                 }
350             }
351         }
352     }
353 
354     /**
355      * Simple quote of a string, escaping where needed.
356      *
357      * @param buf the StringBuilder to append to
358      * @param str the string to quote
359      */
360     static void quote(StringBuilder buf, string str) {
361         buf.append('"');
362         escape(buf, str);
363         buf.append('"');
364     }
365 
366     /**
367      * Append into buf the provided string, adding quotes if needed.
368      * <p>
369      * Quoting is determined if any of the characters in the <code>delim</code> are found in the input <code>str</code>.
370      *
371      * @param buf   the buffer to append to
372      * @param str   the string to possibly quote
373      * @param delim the delimiter characters that will trigger automatic quoting
374      */
375     static void quoteIfNeeded(StringBuilder buf, string str, string delim) {
376         if (str is null) {
377             return;
378         }
379         // check for delimiters in input string
380         size_t len = str.length;
381         if (len == 0) {
382             return;
383         }
384         int ch;
385         for (size_t i = 0; i < len; i++) {
386             // ch = str.codePointAt(i);
387             ch = str[i];
388             if (delim.indexOf(ch) >= 0) {
389                 // found a delimiter codepoint. we need to quote it.
390                 quote(buf, str);
391                 return;
392             }
393         }
394 
395         // no special delimiters used, no quote needed.
396         buf.append(str);
397     }
398 
399     /**
400      * Create an iterator of the input string, breaking apart the string at the provided delimiters, removing quotes and triming the parts of the string as
401      * needed.
402      *
403      * @param str    the input string to split apart
404      * @param delims the delimiter characters to split the string on
405      * @return the iterator of the parts of the string, trimmed, with quotes around the string part removed, and unescaped
406      */
407     static InputRange!string splitAt(string str, string delims) {
408         return new DeQuotingStringIterator(str.strip(), delims);
409     }
410 
411     static string unescape(string str) {
412         if (str is null) {
413             // nothing there
414             return null;
415         }
416 
417         size_t len = str.length;
418         if (len <= 1) {
419             // impossible to be escaped
420             return str;
421         }
422 
423         StringBuilder ret = new StringBuilder(len - 2);
424         bool escaped = false;
425         char c;
426         for (size_t i = 0; i < len; i++) {
427             c = str[i];
428             if (escaped) {
429                 escaped = false;
430                 switch (c) {
431                     case 'n':
432                         ret.append('\n');
433                         break;
434                     case 'r':
435                         ret.append('\r');
436                         break;
437                     case 't':
438                         ret.append('\t');
439                         break;
440                     case 'f':
441                         ret.append('\f');
442                         break;
443                     case 'b':
444                         ret.append('\b');
445                         break;
446                     case '\\':
447                         ret.append('\\');
448                         break;
449                     case '/':
450                         ret.append('/');
451                         break;
452                     case '"':
453                         ret.append('"');
454                         break;
455                     case 'u':
456                         ret.append(cast(char) ((dehex(cast(byte) str[i++]) << 24) + 
457                             (dehex(cast(byte) str[i++]) << 16) + 
458                             (dehex(cast(byte) str[i++]) << 8) + 
459                             (dehex(cast(byte) str[i++]))));
460                         break;
461                     default:
462                         ret.append(c);
463                 }
464             } else if (c == '\\') {
465                 escaped = true;
466             } else {
467                 ret.append(c);
468             }
469         }
470         return ret.toString();
471     }
472 
473     // static string join(Object[] objs, string delim) {
474     //     if (objs is null) {
475     //         return "";
476     //     }
477     //     StringBuilder ret = new StringBuilder();
478     //     int len = objs.length;
479     //     for (int i = 0; i < len; i++) {
480     //         if (i > 0) {
481     //             ret.append(delim);
482     //         }
483     //         if (objs[i] instanceof string) {
484     //             ret.append('"').append(objs[i]).append('"');
485     //         } else {
486     //             ret.append(objs[i]);
487     //         }
488     //     }
489     //     return ret.toString();
490     // }
491 
492     // static string join(Collection<?> objs, string delim) {
493     //     if (objs is null) {
494     //         return "";
495     //     }
496     //     StringBuilder ret = new StringBuilder();
497     //     bool needDelim = false;
498     //     foreach (Object obj ; objs) {
499     //         if (needDelim) {
500     //             ret.append(delim);
501     //         }
502     //         if (obj instanceof string) {
503     //             ret.append('"').append(obj).append('"');
504     //         } else {
505     //             ret.append(obj);
506     //         }
507     //         needDelim = true;
508     //     }
509     //     return ret.toString();
510     // }
511 }