Branch: Tag:

2016-03-02

2016-03-02 15:16:39 by Henrik Grubbström (Grubba) <grubba@grubba.org>

Roxen: Added decoding of surrogates to http_decode_string().

Some javascript code sends %u-encoded surrogate pairs.

377:   static void f_http_decode_string(INT32 args)   /*! @decl string http_decode_string(string encoded)    *! -  *! Decodes an http transport-encoded string. Knows about %XX and -  *! %uXXXX syntax. Treats %UXXXX as %uXXXX. It will treat '+' as '+' -  *! and not ' ', so form decoding needs to replace that in a prior -  *! step. +  *! Decodes an http transport-encoded string. Knows about @tt{%XX@} and +  *! @tt{%uXXXX@} syntax. Treats @tt{%UXXXX@} as @tt{%uXXXX@}. It will +  *! treat '+' as '+' and not ' ', so form decoding needs to replace that +  *! in a second step.    *! -  +  *! It also knows about UTF-16 surrogate pairs when decoding @tt{%UXXXX@} +  *! sequences. +  *!    *! @note    *! Performs a best-effort decoding. Invalid and truncated escapes    *! will still be decoded.
390:    int proc = 0;    int trunc = 0;    int size_shift; +  int got_surrogates = 0;    PCHARP foo, end;    struct string_builder newstr;   
457:    c |= ((hex<'A')?hex:(hex + 9)) & 15;    }    INC_PCHARP(foo, 5); +  if ((c & 0xf800) == 0xd800) { +  got_surrogates = 1; +  }    } else {    c = 0;    if (SUBTRACT_PCHARP(end, foo) > 2) {
472:    }       pop_n_elems(args); +  +  if (got_surrogates) { +  /* Convert the result string to a byte string. */ +  newstr.s->size_shift = 0; +  newstr.known_shift = 0; +  newstr.s->len <<= 1; +  +  /* Then run unicode_to_string() in native byte-order. */    push_string(finish_string_builder(&newstr)); -  +  push_int(2); +  f_unicode_to_string(2); +  } else { +  push_string(finish_string_builder(&newstr));    } -  + }      static void f_html_encode_string( INT32 args )   /*! @decl string html_encode_string(mixed in)