pike.git / src / builtin_functions.c

version» Context lines:

pike.git/src/builtin_functions.c:1:   /*   || This file is part of Pike. For copyright information see COPYRIGHT.   || Pike is distributed under GPL, LGPL and MPL. See the file COPYING   || for more information. - || $Id: builtin_functions.c,v 1.594 2005/04/02 11:43:03 mast Exp $ + || $Id: builtin_functions.c,v 1.595 2005/04/02 12:24:43 mast Exp $   */      #include "global.h"   #include "interpret.h"   #include "svalue.h"   #include "pike_macros.h"   #include "object.h"   #include "program.h"   #include "array.h"   #include "pike_error.h"
pike.git/src/builtin_functions.c:1923:    struct pike_string *in;    struct pike_string *out;    ptrdiff_t len = 0;    int shift = 0;    ptrdiff_t i,j;    INT_TYPE extended = 0;       get_all_args("utf8_to_string", args, "%S.%i", &in, &extended);       for(i=0; i < in->len; i++) { -  unsigned int c = ((unsigned char *)in->str)[i]; +  unsigned int c = STR0(in)[i];    len++;    if (c & 0x80) {    int cont = 0;       /* From table 3-6 in the Unicode standard 4.0: Well-Formed UTF-8    * Byte Sequences    *    * Code Points 1st Byte 2nd Byte 3rd Byte 4th Byte    * 000000-00007f 00-7f    * 000080-0007ff c2-df 80-bf
pike.git/src/builtin_functions.c:1957:    "at index %"PRINTPTRDIFFT"d.\n",    c, i);    }      #define GET_CONT_CHAR(in, i, c) do { \    i++; \    if (i >= in->len) \    bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, \    NULL, Pike_sp - args, \    "Truncated UTF-8 sequence at end of string.\n"); \ -  c = ((unsigned char *)(in->str))[i]; \ +  c = STR0 (in)[i]; \    if ((c & 0xc0) != 0x80) \    bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, \    NULL, Pike_sp - args, \    "Expected continuation character at index %d, " \    "got 0x%02x.\n", \    i, c); \    } while (0)      #define UTF8_SEQ_ERROR(prefix, c, i, problem) do { \    bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, \
pike.git/src/builtin_functions.c:2009:    cont = 1;    }    else    cont = 2;    if (shift < 1) {    shift = 1;    }    }       else { -  if (shift < 2) -  shift = 2; -  +     if ((c & 0xf8) == 0xf0) {    /* 21bit */    if (c == 0xf0) {    GET_CONT_CHAR (in, i, c);    if (!(c & 0x30))    UTF8_SEQ_ERROR ("0xf0 ", c, i - 1, "is a non-shortest form");    cont = 2;    }    else if (!extended) {    if (c > 0xf4)
pike.git/src/builtin_functions.c:2080:    /* 36bit */    GET_CONT_CHAR (in, i, c);    if (!(c & 0x3e))    UTF8_SEQ_ERROR ("0xfe ", c, i - 1, "is a non-shortest form");    else if (c & 0x3c)    UTF8_SEQ_ERROR ("0xfe ", c, i - 1, "would decode to "    "a too large character value");    cont = 5;    }    } +  +  if (shift < 2) +  shift = 2;    }       while(cont--)    GET_CONT_CHAR (in, i, c); -  +  + #undef GET_CONT_CHAR + #undef UTF8_SEQ_ERROR    }    }    if (len == in->len) {    /* 7bit in == 7bit out */    pop_n_elems(args-1);    return;    }       out = begin_wide_shared_string(len, shift);    -  for(j=i=0; i < in->len; i++) { -  unsigned int c = ((unsigned char *)in->str)[i]; +  switch (shift) { +  case 0: { +  p_wchar0 *out_str = (p_wchar0 *) out->str; +  for(j=i=0; i < in->len;) { +  unsigned int c = STR0(in)[i++]; +  /* NOTE: No tests here since we've already tested the string above. */ +  if (c & 0x80) { +  /* 11bit */ +  unsigned int c2 = STR0(in)[i++] & 0x3f; +  c &= 0x1f; +  c = (c << 6) | c2; +  } +  out_str[j++] = c; +  } +  break; +  }    -  +  case 1: { +  p_wchar1 *out_str = (p_wchar1 *) out->str; +  for(j=i=0; i < in->len;) { +  unsigned int c = STR0(in)[i++]; +  /* NOTE: No tests here since we've already tested the string above. */    if (c & 0x80) { -  int cont = 0; +  if ((c & 0xe0) == 0xc0) { +  /* 11bit */ +  unsigned int c2 = STR0(in)[i++] & 0x3f; +  c &= 0x1f; +  c = (c << 6) | c2; +  } else { +  /* 16bit */ +  unsigned int c2 = STR0(in)[i++] & 0x3f; +  unsigned int c3 = STR0(in)[i++] & 0x3f; +  c &= 0x0f; +  c = (c << 12) | (c2 << 6) | c3; +  } +  } +  out_str[j++] = c; +  } +  break; +  }    -  /* NOTE: No tests here since we've already tested the string -  * above. -  */ +  case 2: { +  p_wchar2 *out_str = (p_wchar2 *) out->str; +  for(j=i=0; i < in->len;) { +  unsigned int c = STR0(in)[i++]; +  /* NOTE: No tests here since we've already tested the string above. */ +  if (c & 0x80) { +  int cont = 0;    if ((c & 0xe0) == 0xc0) {    /* 11bit */    cont = 1;    c &= 0x1f;    } else if ((c & 0xf0) == 0xe0) {    /* 16bit */    cont = 2;    c &= 0x0f;    } else if ((c & 0xf8) == 0xf0) {    /* 21bit */
pike.git/src/builtin_functions.c:2129:    } else if ((c & 0xfe) == 0xfc) {    /* 31bit */    cont = 5;    c &= 0x01;    } else {    /* 36bit */    cont = 6;    c = 0;    }    while(cont--) { -  unsigned INT32 c2 = ((unsigned char *)(in->str))[++i] & 0x3f; +  unsigned int c2 = STR0(in)[i++] & 0x3f;    c = (c << 6) | c2;    }    } -  low_set_index(out, j++, c); +  out_str[j++] = c;    } -  +  break; +  } +  } +    #ifdef PIKE_DEBUG    if (j != len) {    Pike_fatal("utf8_to_string(): Calculated and actual lengths differ: %d != %d\n",    len, j);    }   #endif /* PIKE_DEBUG */ -  out = end_shared_string(out); +  out = low_end_shared_string(out);    pop_n_elems(args);    push_string(out);   }      /*! @decl string __parse_pike_type(string t)    */   static void f_parse_pike_type( INT32 args )   {    struct pike_type *t;