Branch: Tag:

2014-10-08

2014-10-08 16:30:32 by Henrik Grubbström (Grubba) <grubba@grubba.org>

_Roxen: http_decode_string() now supports wide strings.

Wide strings may show up in URLs in eg tag attributes.

Fixes [bug 7318 (#7318)].

383:    *! step.    */   { -  int proc; -  int size_shift = 0; -  int adjust_len = 0; -  p_wchar0 *foo, *bar, *end; -  struct pike_string *newstr; +  int proc = 0; +  int size_shift; +  PCHARP foo, end; +  struct string_builder newstr;    -  if (!args || TYPEOF(Pike_sp[-args]) != PIKE_T_STRING || -  Pike_sp[-args].u.string->size_shift) -  Pike_error("Invalid argument to http_decode_string(string(8bit));\n"); +  if (!args || Pike_sp[-args].type != PIKE_T_STRING) +  Pike_error("Invalid argument to http_decode_string(string).\n");    -  foo = bar = STR0(Pike_sp[-args].u.string); -  end = foo + Pike_sp[-args].u.string->len; +  foo = MKPCHARP_STR(Pike_sp[-args].u.string); +  end = ADD_PCHARP(foo, Pike_sp[-args].u.string->len);    -  /* count '%' and wide characters */ -  for (proc=0; foo<end; foo++) { -  if (*foo=='%') { -  proc++; -  if (foo[1] == 'u' || foo[1] == 'U') { +  size_shift = Pike_sp[-args].u.string->size_shift; +  +  /* Count '%' and wide characters. +  * +  * proc counts the number of characters that are to be removed. +  */ +  for (; COMPARE_PCHARP(foo, <, end); INC_PCHARP(foo, 1)) { +  p_wchar2 c = INDEX_PCHARP(foo, 0); +  if (c == '%') { +  c = INDEX_PCHARP(foo, 1); +  if (c == 'u' || c == 'U') {    /* %uXXXX */ -  if (foo[2] != '0' || foo[3] != '0') { -  size_shift = 1; +  if (INDEX_PCHARP(foo, 2) != '0' || INDEX_PCHARP(foo, 3) != '0') { +  if (!size_shift) size_shift = 1;    } -  foo += 5; -  if (foo < end) { -  adjust_len += 5; +  proc += 5; +  INC_PCHARP(foo, 5);    } else { -  adjust_len += end - (foo - 4); +  proc += 2; +  INC_PCHARP(foo, 2);    } -  } else { -  foo += 2; -  if (foo < end) { -  adjust_len += 2; -  } else { -  adjust_len += end - (foo - 1); +     }    } -  } -  } +        if (!proc) { pop_n_elems(args-1); return; }    -  newstr = begin_wide_shared_string(Pike_sp[-args].u.string->len - adjust_len, +  init_string_builder_alloc(&newstr, Pike_sp[-args].u.string->len - proc,    size_shift); -  if (size_shift) { -  p_wchar1 *dest = STR1(newstr); +     -  for (proc=0; bar<end; dest++) -  if (*bar=='%') { -  if (bar[1] == 'u' || bar[1] == 'U') { -  if (bar<end-5) -  *dest = (((bar[2]<'A')?(bar[2]&15):((bar[2]+9)&15))<<12)| -  (((bar[3]<'A')?(bar[3]&15):((bar[3]+9)&15))<<8)| -  (((bar[4]<'A')?(bar[4]&15):((bar[4]+9)&15))<<4)| -  ((bar[5]<'A')?(bar[5]&15):((bar[5]+9)&15)); -  else -  *dest=0; -  bar+=6; -  } else { -  if (bar<end-2) -  *dest=(((bar[1]<'A')?(bar[1]&15):((bar[1]+9)&15))<<4)| -  ((bar[2]<'A')?(bar[2]&15):((bar[2]+9)&15)); -  else -  *dest=0; -  bar+=3; +  foo = MKPCHARP_STR(Pike_sp[-args].u.string); +  +  for (; COMPARE_PCHARP(foo, <, end); INC_PCHARP(foo, 1)) { +  p_wchar2 c = INDEX_PCHARP(foo, 0); +  if (c == '%') { +  c = INDEX_PCHARP(foo, 1); +  if (c == 'u' || c == 'U') { +  c = 0; +  if (SUBTRACT_PCHARP(end, foo) > 5) { +  p_wchar2 hex = INDEX_PCHARP(foo, 2); +  c = (((hex<'A')?hex:(hex + 9)) & 15)<<12; +  hex = INDEX_PCHARP(foo, 3); +  c |= (((hex<'A')?hex:(hex + 9)) & 15)<<8; +  hex = INDEX_PCHARP(foo, 4); +  c |= (((hex<'A')?hex:(hex + 9)) & 15)<<4; +  hex = INDEX_PCHARP(foo, 5); +  c |= ((hex<'A')?hex:(hex + 9)) & 15;    } -  +  INC_PCHARP(foo, 5);    } else { -  *dest=*(bar++); +  c = 0; +  if (SUBTRACT_PCHARP(end, foo) > 2) { +  p_wchar2 hex = INDEX_PCHARP(foo, 1); +  c = (((hex<'A')?hex:(hex + 9)) & 15)<<4; +  hex = INDEX_PCHARP(foo, 2); +  c |= ((hex<'A')?hex:(hex + 9)) & 15;    } -  } else { -  foo = STR0(newstr); -  for (proc=0; bar<end; foo++) -  if (*bar=='%') { -  if (bar[1] == 'u' || bar[1] == 'U') { -  /* We know that the following two characters are zeros. */ -  bar+=3; +  INC_PCHARP(foo, 2);    } -  if (bar<end-2) -  *foo=(((bar[1]<'A')?(bar[1]&15):((bar[1]+9)&15))<<4)| -  ((bar[2]<'A')?(bar[2]&15):((bar[2]+9)&15)); -  else -  *foo=0; -  bar+=3; -  } else { -  *foo=*(bar++); +     } -  +  string_builder_putchar(&newstr, c);    } -  +     pop_n_elems(args); -  push_string(end_shared_string(newstr)); +  push_string(finish_string_builder(&newstr));   }      static void f_html_encode_string( INT32 args )