pike.git / src / stralloc.c

version» Context lines:

pike.git/src/stralloc.c:3138:       /* process the single byte tail */       elen = (size_t)inlen % sizeof(poptype);       if (elen) {    poptype a = 0;       in = (const unsigned char*)in8;    -  switch (7-elen) { -  case 0: a |= in[0] & 0x80; /* FALLTHRU */ -  case 1: a |= in[1] & 0x80; /* FALLTHRU */ -  case 2: a |= in[2] & 0x80; /* FALLTHRU */ -  case 3: a |= in[3] & 0x80; /* FALLTHRU */ -  case 4: a |= in[4] & 0x80; /* FALLTHRU */ -  case 5: a |= in[5] & 0x80; /* FALLTHRU */ -  case 6: a |= in[6] & 0x80; break; -  default: UNREACHABLE(break); -  } +  for (size_t i = 0; i < elen; i++) +  a |= in[i] & 0x80;       if (UNLIKELY(a)) {    goto not_7bit;    }    len += elen;    }       *_shift = 0;    return len;   not_7bit: -  +     return pike_string_utf8_decode_length_slowpath(len, in, end, args,    extended, _shift);   }      PMOD_EXPORT size_t pike_string_utf8_length(const struct pike_string *s, INT32 args, int extended) {    size_t len = s->len;    size_t elen = s->len;       /* This 8bit version counts the number of high bits in each byte.    * The loop is unrolled 4 times and starts with a Duff's device style
pike.git/src/stralloc.c:3253:    if (extended & 1) continue;    if (UNLIKELY(c >= 0xd800 && c <= 0xdfff)) goto surrogate_error;    }    } else {    unsigned INT32 *in = (unsigned INT32*)STR2(s);       for (i = 0; i < len; i++) {    c = in[i];    if (c <= 0x7f) continue;    elen += div5_8bit(fls32(c) - 2); -  if (extended & 2 && c < 0x10ffff) +  if (extended & 2 && c <= 0x10ffff)    {    /* Encode with a surrogate pair. */    elen += 2;    continue;    }    if (extended & 1) continue;    if (UNLIKELY(c >= 0xd800 && c <= 0xdfff)) goto surrogate_error;    if (UNLIKELY(c > 0x10ffff)) goto extended_error;    }    }
pike.git/src/stralloc.c:3280:    c, i);   extended_error:    bad_arg_error ("string_to_utf8", args, 1, NULL, Pike_sp - args,    "Character 0x%08x at index %"PRINTPTRDIFFT"d is "    "outside the allowed range.\n",    c, i);    }    UNREACHABLE(return 0);   }    - PMOD_EXPORT struct pike_string *pike_string_utf8_decode(const p_wchar0 *in_str, -  enum size_shift shift, ptrdiff_t len) { + PMOD_EXPORT struct pike_string *pike_string_utf8_decode(const p_wchar0 *src, ptrdiff_t in_len, +  enum size_shift shift, ptrdiff_t out_len, +  INT_TYPE extended) {    -  struct pike_string *out = begin_wide_shared_string(len, shift); +  struct pike_string *out = begin_wide_shared_string(out_len, shift); +  const p_wchar0 *src_end = src + in_len;       switch (shift) {    case eightbit: {    p_wchar0 *out_str = STR0 (out);    -  for(ptrdiff_t j=0; j < len; j++) { -  unsigned int c = *(in_str++); +  for(ptrdiff_t j=0; j < out_len; j++) { +  unsigned int c = *(src++);    /* NOTE: No tests here since we've already tested the string above. */    if (c & 0x80) {    /* 11bit */ -  unsigned int c2 = *(in_str++) & 0x3f; +  unsigned int c2 = *(src++) & 0x3f;    c &= 0x1f;    c = (c << 6) | c2;    }    out_str[j] = c;    }    break;    }       case sixteenbit: {    p_wchar1 *out_str = STR1 (out);    -  for(ptrdiff_t j=0; j < len; j++) { -  unsigned int c = *(in_str++); +  for(ptrdiff_t j=0; j < out_len; j++) { +  unsigned int c = *(src++);    /* NOTE: No tests here since we've already tested the string above. */    if (c & 0x80) {    if ((c & 0xe0) == 0xc0) {    /* 11bit */ -  unsigned int c2 = *(in_str++) & 0x3f; +  unsigned int c2 = *(src++) & 0x3f;    c &= 0x1f;    c = (c << 6) | c2;    } else {    /* 16bit */ -  unsigned int c2 = *(in_str++) & 0x3f; -  unsigned int c3 = *(in_str++) & 0x3f; +  unsigned int c2 = *(src++) & 0x3f; +  unsigned int c3 = *(src++) & 0x3f;    c &= 0x0f;    c = (c << 12) | (c2 << 6) | c3;    }    }    out_str[j] = c;    }    break;    }       case thirtytwobit: {    p_wchar2 *out_str = STR2 (out);    -  for(ptrdiff_t j=0; j < len; j++) { -  unsigned int c = *(in_str++); +  for(ptrdiff_t j=0; src < src_end; j++) { +  unsigned int c = *(src++);    /* NOTE: No tests here since we've already tested the string above. */    if (c & 0x80) {    int cont = 0;    if ((c & 0xe0) == 0xc0) {    /* 11bit */    cont = 1;    c &= 0x1f;    } else if ((c & 0xf0) == 0xe0) {    /* 16bit */    cont = 2;
pike.git/src/stralloc.c:3362:    } else if ((c & 0xfe) == 0xfc) {    /* 31bit */    cont = 5;    c &= 0x01;    } else {    /* 36bit */    cont = 6;    c = 0;    }    while(cont--) { -  unsigned int c2 = *(in_str++) & 0x3f; +  unsigned int c2 = *(src++) & 0x3f;    c = (c << 6) | c2;    } -  if (/*(extended & 2) && */(c & 0xfc00) == 0xdc00) { +  if ((extended & 2) && (c & 0xfc00) == 0xdc00) {    /* Low surrogate */    c &= 0x3ff;    c |= ((out_str[--j] & 0x3ff)<<10) + 0x10000;    }    }    out_str[j] = c;    }    break;    }    default: UNREACHABLE(break);    }    -  + #ifdef PIKE_DEBUG +  if (src_end != src) +  Pike_fatal("utf8_to_string(): Length mismatch after decode. Did not use all input.\n"); + #endif +     out = low_end_shared_string(out);      #ifdef PIKE_DEBUG    check_string (out);   #endif       return out;   }      PMOD_EXPORT unsigned char *pike_string_utf8_encode(unsigned char *dst, const struct pike_string *s,
pike.git/src/stralloc.c:3443:    {    const unsigned INT32 *in = (unsigned INT32*)STR2(s);       for (size_t i = 0; i < len; i++) {    unsigned INT32 bytes, shift, first;    unsigned INT32 c = in[i];       if (c <= 0x7f) {    *dst++ = c;    continue; -  } else if (UNLIKELY(extended & 2 && c >= 0xffff && c < 0x10ffff)) { +  } else if (UNLIKELY(extended & 2 && c > 0xffff && c <= 0x10ffff)) {    /* Encode with surrogates. */    c -= 0x10000;    /* 0xd800 | (c>>10)    * 0b1101 10cccc cccccc    * UTF8: 11101101 1010cccc 10cccccc    */    *dst++ = 0xed;    *dst++ = 0xa0 | (c >> 16);    *dst++ = 0x80 | ((c >> 10) & 0x3f);    /* 0xdc00 | (c & 0x3ff)    * 0b1101 11cccc cccccc    * UTF8: 11101101 1011cccc 10cccccc    */    *dst++ = 0xed;    *dst++ = 0xb0 | ((c >> 6) & 0x3f);    *dst++ = 0x80 | (c & 0x3f); -  +  continue;    }       bytes = 1 + div5_8bit(fls32(c) - 2);    shift = 6 * (bytes - 1);    first = -0x40 >> (bytes - 2);       /* the > 31bit case */    if (UNLIKELY(bytes >= 7)) {    bytes = 7;    shift = 32;