020b692014-10-23Arne Goedeke /* vim:syntax=ragel */
a9deeb2010-06-10Arne Goedeke #define HEX2DEC(x) ((x) <= '9' ? (x) - '0' : ((x) < 'G') ? (x) - 'A' + 10 : (x) - 'a' + 10) %%{ machine JSON_string; alphtype int; include JSOND "json_defaults.rl";
a9e2ef2010-06-10Arne Goedeke  getkey ((int)INDEX_PCHARP(str, fpc));
a9deeb2010-06-10Arne Goedeke 
02de272011-09-21Martin Stjernholm  action hex0beg { hexchr0 = HEX2DEC(fc);
a9deeb2010-06-10Arne Goedeke  }
02de272011-09-21Martin Stjernholm  action hex0mid { hexchr0 *= 16; hexchr0 += HEX2DEC(fc);
a9deeb2010-06-10Arne Goedeke  }
02de272011-09-21Martin Stjernholm  action hex0end { if (IS_HIGH_SURROGATE (hexchr0)) { /* Chars outside the BMP can be expressed as two hex * escapes that codes a surrogate pair, so see if we can * read a second one. */ fnext hex1; } else { if (IS_NUNICODE(hexchr0)) { fpc--; fbreak; }
020b692014-10-23Arne Goedeke  if (validate) {
02de272011-09-21Martin Stjernholm  string_builder_putchar(&s, hexchr0); } } } action hex1beg { hexchr1 = HEX2DEC(fc); } action hex1mid { hexchr1 *= 16; hexchr1 += HEX2DEC(fc); } action hex1end { if (!IS_LOW_SURROGATE (hexchr1)) {
507cbb2011-09-21Martin Stjernholm  fpc--; fbreak; }
020b692014-10-23Arne Goedeke  if (validate) {
02de272011-09-21Martin Stjernholm  int cp = (((hexchr0 - 0xd800) << 10) | (hexchr1 - 0xdc00)) + 0x10000; string_builder_putchar(&s, cp);
507cbb2011-09-21Martin Stjernholm  }
a9deeb2010-06-10Arne Goedeke  } action add_unquote {
020b692014-10-23Arne Goedeke  if (validate) switch(fc) {
3baf8e2014-11-07Henrik Grubbström (Grubba)  case '\'':
b56cb92010-06-10Arne Goedeke  case '"': case '/': case '\\': string_builder_putchar(&s, fc); break; case 'b': string_builder_putchar(&s, '\b'); break; case 'f': string_builder_putchar(&s, '\f'); break; case 'n': string_builder_putchar(&s, '\n'); break; case 'r': string_builder_putchar(&s, '\r'); break; case 't': string_builder_putchar(&s, '\t'); break; }
a9deeb2010-06-10Arne Goedeke  } action mark {
b56cb92010-06-10Arne Goedeke  mark = fpc;
a9deeb2010-06-10Arne Goedeke  } action mark_next { mark = fpc + 1; } action string_append {
b56cb92010-06-10Arne Goedeke  if (fpc - mark > 0) {
020b692014-10-23Arne Goedeke  if (validate)
b56cb92010-06-10Arne Goedeke  string_builder_append(&s, ADD_PCHARP(str, mark), fpc - mark);
a9deeb2010-06-10Arne Goedeke  } } main := '"' . ( start: ( '"' >string_append -> final | '\\' >string_append -> unquote | (unicode - [\\"]) -> start ), unquote: (
3baf8e2014-11-07Henrik Grubbström (Grubba)  ['"\\/bfnrt] >add_unquote -> start |
02de272011-09-21Martin Stjernholm  'u' . xdigit >hex0beg . (xdigit{3} $hex0mid) @hex0end -> start ) @mark_next, hex1: ( '\\u' . xdigit >hex1beg . (xdigit{3} $hex1mid) @hex1end -> start
a9deeb2010-06-10Arne Goedeke  ) @mark_next ) >mark %*{ fpc--; fbreak; }; }%%
a9e2ef2010-06-10Arne Goedeke static ptrdiff_t _parse_JSON_string(PCHARP str, ptrdiff_t p, ptrdiff_t pe, struct parser_state *state) {
02de272011-09-21Martin Stjernholm  int hexchr0, hexchr1;
b56cb92010-06-10Arne Goedeke  ptrdiff_t start = p, mark = 0;
a9deeb2010-06-10Arne Goedeke  struct string_builder s; int cs;
b56cb92010-06-10Arne Goedeke  ONERROR handle;
020b692014-10-23Arne Goedeke  const int validate = !(state->flags&JSON_VALIDATE);
a9deeb2010-06-10Arne Goedeke  %% write data;
020b692014-10-23Arne Goedeke  if (validate) {
b56cb92010-06-10Arne Goedeke  init_string_builder(&s, 0); SET_ONERROR (handle, free_string_builder, &s); }
a9deeb2010-06-10Arne Goedeke  %% write init; %% write exec; if (cs < JSON_string_first_final) {
020b692014-10-23Arne Goedeke  if (validate) {
b56cb92010-06-10Arne Goedeke  UNSET_ONERROR(handle); free_string_builder(&s); } state->flags |= JSON_ERROR; if (p == pe) { err_msg = "Unterminated string"; return start; } return p;
a9deeb2010-06-10Arne Goedeke  }
020b692014-10-23Arne Goedeke  if (validate) {
b56cb92010-06-10Arne Goedeke  push_string(finish_string_builder(&s)); UNSET_ONERROR(handle); }
a9deeb2010-06-10Arne Goedeke  return p; } #undef HEX2DEC