pike.git / src / builtin_functions.c

version» Context lines:

pike.git/src/builtin_functions.c:1688:    emit0(F_DESTRUCTEDP);    else    return 0;    return 1;   }      /*    * Some wide-strings related functions    */    - /*! @decl string(0..255) string_to_unicode(string s) + /*! @decl string(0..255) string_to_unicode(string s, int(0..2)|void byteorder)    *!    *! Converts a string into an UTF16 compliant byte-stream.    *! -  +  *! @param s +  *! String to convert to UTF16. +  *! +  *! @param byteorder +  *! Byte-order for the output. One of: +  *! @int +  *! @value 0 +  *! Network (aka big-endian) byte-order (default). +  *! @value 1 +  *! Little-endian byte-order. +  *! @value 2 +  *! Native byte-order. +  *! @endint +  *!    *! @note    *! Throws an error if characters not legal in an UTF16 stream are    *! encountered. Valid characters are in the range 0x00000 - 0x10ffff,    *! except for characters 0xfffe and 0xffff.    *!    *! Characters in range 0x010000 - 0x10ffff are encoded using surrogates.    *!    *! @seealso    *! @[Charset.decoder()], @[string_to_utf8()], @[unicode_to_string()],    *! @[utf8_to_string()]    */   PMOD_EXPORT void f_string_to_unicode(INT32 args)   {    struct pike_string *in;    struct pike_string *out = NULL;    ptrdiff_t len;    ptrdiff_t i; -  +  unsigned INT_TYPE byteorder = 0;    -  get_all_args("string_to_unicode", args, "%W", &in); +  get_all_args("string_to_unicode", args, "%W.%i", &in, &byteorder);    -  +  if (byteorder >= 2) { +  if (byteorder == 2) { + #if PIKE_BYTEORDER == 1234 +  /* Little endian. */ +  byteorder = 1; + #else +  byteorder = 0; + #endif +  } else { +  SIMPLE_BAD_ARG_ERROR("string_to_unicode", 2, "int(0..2)|void"); +  } +  } +     switch(in->size_shift) {    case 0:    /* Just 8bit characters */    len = in->len * 2;    out = begin_shared_string(len);    if (len) {    memset(out->str, 0, len); /* Clear the upper (and lower) byte */ - #ifdef PIKE_DEBUG -  if (d_flag) { -  for(i = len; i--;) { -  if (out->str[i]) { -  Pike_fatal("memset didn't clear byte %ld of %ld\n", -  PTRDIFF_T_TO_LONG(i+1), -  PTRDIFF_T_TO_LONG(len)); -  } -  } -  } - #endif /* PIKE_DEBUG */ +     for(i = in->len; i--;) { -  out->str[i * 2 + 1] = in->str[i]; +  out->str[i * 2 + 1 - byteorder] = in->str[i];    }    }    out = end_shared_string(out);    break;    case 1:    /* 16 bit characters */    /* FIXME: Should we check for 0xfffe & 0xffff here too? */    len = in->len * 2;    out = begin_shared_string(len); -  +  if (byteorder ==   #if (PIKE_BYTEORDER == 4321) -  /* Big endian -- We don't need to do much... -  * -  * FIXME: Future optimization: Check if refcount is == 1, -  * and perform sufficient magic to be able to convert in place. -  */ -  memcpy(out->str, in->str, len); +  1 /* Little endian. */   #else -  +  0 /* Big endian. */ + #endif +  ) {    /* Other endianness, may need to do byte-order conversion also. */ -  { +     p_wchar1 *str1 = STR1(in);    for(i = in->len; i--;) {    unsigned INT32 c = str1[i]; -  out->str[i * 2 + 1] = c & 0xff; -  out->str[i * 2] = c >> 8; +  out->str[i * 2 + 1 - byteorder] = c & 0xff; +  out->str[i * 2 + byteorder] = c >> 8;    } -  +  } else { +  /* Native byte order -- We don't need to do much... +  * +  * FIXME: Future optimization: Check if refcount is == 1, +  * and perform sufficient magic to be able to convert in place. +  */ +  memcpy(out->str, in->str, len);    } - #endif +     out = end_shared_string(out);    break;    case 2:    /* 32 bit characters -- Is someone writing in Klingon? */    {    p_wchar2 *str2 = STR2(in);    ptrdiff_t j;    len = in->len * 2;    /* Check how many extra wide characters there are. */    for(i = in->len; i--;) {
pike.git/src/builtin_functions.c:1783:    */    Pike_error("Illegal character 0x%04x (index %ld) "    "is not a Unicode character.",    str2[i], PTRDIFF_T_TO_LONG(i));    }    if (str2[i] > 0x10ffff) {    Pike_error("Character 0x%08x (index %ld) "    "is out of range (0x00000000..0x0010ffff).",    str2[i], PTRDIFF_T_TO_LONG(i));    } -  /* Extra wide characters take two unicode characters in space. -  * ie One unicode character extra. +  /* Extra wide characters take two UTF16 characters in space. +  * ie One UTF16 character extra.    */    len += 2;    }    }    out = begin_shared_string(len);    j = len;    for(i = in->len; i--;) {    unsigned INT32 c = str2[i];       j -= 2;       if (c > 0xffff) {    /* Use surrogates */    c -= 0x10000;    -  out->str[j + 1] = c & 0xff; -  out->str[j] = 0xdc | ((c >> 8) & 0x03); +  out->str[j + 1 - byteorder] = c & 0xff; +  out->str[j + byteorder] = 0xdc | ((c >> 8) & 0x03);    j -= 2;    c >>= 10;    c |= 0xd800;    } -  out->str[j + 1] = c & 0xff; -  out->str[j] = c >> 8; +  out->str[j + 1 - byteorder] = c & 0xff; +  out->str[j + byteorder] = c >> 8;    }   #ifdef PIKE_DEBUG    if (j) {    Pike_fatal("string_to_unicode(): Indexing error: len:%ld, j:%ld.\n",    PTRDIFF_T_TO_LONG(len), PTRDIFF_T_TO_LONG(j));    }   #endif /* PIKE_DEBUG */    out = end_shared_string(out);    }    break;
pike.git/src/builtin_functions.c:9427:    ADD_EFUN("array_sscanf", f_sscanf,    tFunc(tStr tAttr("sscanf_format", tStr),    tArr(tAttr("sscanf_args", tMix))), OPT_TRY_OPTIMIZE);       ADD_EFUN("__handle_sscanf_format", f___handle_sscanf_format,    tFunc(tStr tStr tType(tMix) tType(tMix), tType(tMix)),    0);       /* Some Wide-string stuff */    -  /* function(string:string(0..255)) */ +  /* function(string,int(0..2)|void:string(0..255)) */    ADD_EFUN("string_to_unicode", f_string_to_unicode, -  tFunc(tStr,tStr8), OPT_TRY_OPTIMIZE); +  tFunc(tStr tOr(tInt02,tVoid),tStr8), OPT_TRY_OPTIMIZE);       /* function(string(0..255):string) */    ADD_EFUN("unicode_to_string", f_unicode_to_string,    tFunc(tStr8,tStr), OPT_TRY_OPTIMIZE);       /* function(string,int|void:string(0..255)) */    ADD_EFUN("string_to_utf8", f_string_to_utf8,    tFunc(tStr tOr(tInt,tVoid),tStr8), OPT_TRY_OPTIMIZE);       ADD_EFUN("string_filter_non_unicode", f_string_filter_non_unicode,