Branch: Tag:

2015-06-23

2015-06-23 12:54:41 by Henrik Grubbström (Grubba) <grubba@grubba.org>

string_to_unicode: Support outputting UTF8LE.

Adds an optional second argument to string_to_unicode() to
specify the wanted byte order.

1663:    * Some wide-strings related functions    */    - /*! @decl string(0..255) string_to_unicode(string s) + /*! @decl string(0..255) string_to_unicode(string s, int(0..2)|void byteorder)    *!    *! Converts a string into an UTF16 compliant byte-stream.    *! -  +  *! @param s +  *! String to convert to UTF16. +  *! +  *! @param byteorder +  *! Byte-order for the output. One of: +  *! @int +  *! @value 0 +  *! Network (aka big-endian) byte-order (default). +  *! @value 1 +  *! Little-endian byte-order. +  *! @value 2 +  *! Native byte-order. +  *! @endint +  *!    *! @note    *! Throws an error if characters not legal in an UTF16 stream are    *! encountered. Valid characters are in the range 0x00000 - 0x10ffff,
1684:    struct pike_string *out = NULL;    ptrdiff_t len;    ptrdiff_t i; +  unsigned INT_TYPE byteorder = 0;    -  get_all_args("string_to_unicode", args, "%W", &in); +  get_all_args("string_to_unicode", args, "%W.%i", &in, &byteorder);    -  +  if (byteorder >= 2) { +  if (byteorder == 2) { + #if PIKE_BYTEORDER == 1234 +  /* Little endian. */ +  byteorder = 1; + #else +  byteorder = 0; + #endif +  } else { +  SIMPLE_BAD_ARG_ERROR("string_to_unicode", 2, "int(0..2)|void"); +  } +  } +     switch(in->size_shift) {    case 0:    /* Just 8bit characters */
1694:    out = begin_shared_string(len);    if (len) {    memset(out->str, 0, len); /* Clear the upper (and lower) byte */ - #ifdef PIKE_DEBUG -  if (d_flag) { -  for(i = len; i--;) { -  if (out->str[i]) { -  Pike_fatal("memset didn't clear byte %ld of %ld\n", -  PTRDIFF_T_TO_LONG(i+1), -  PTRDIFF_T_TO_LONG(len)); -  } -  } -  } - #endif /* PIKE_DEBUG */ +     for(i = in->len; i--;) { -  out->str[i * 2 + 1] = in->str[i]; +  out->str[i * 2 + 1 - byteorder] = in->str[i];    }    }    out = end_shared_string(out);
1716:    /* FIXME: Should we check for 0xfffe & 0xffff here too? */    len = in->len * 2;    out = begin_shared_string(len); +  if (byteorder ==   #if (PIKE_BYTEORDER == 4321) -  /* Big endian -- We don't need to do much... -  * -  * FIXME: Future optimization: Check if refcount is == 1, -  * and perform sufficient magic to be able to convert in place. -  */ -  memcpy(out->str, in->str, len); +  1 /* Little endian. */   #else -  +  0 /* Big endian. */ + #endif +  ) {    /* Other endianness, may need to do byte-order conversion also. */ -  { +     p_wchar1 *str1 = STR1(in);    for(i = in->len; i--;) {    unsigned INT32 c = str1[i]; -  out->str[i * 2 + 1] = c & 0xff; -  out->str[i * 2] = c >> 8; +  out->str[i * 2 + 1 - byteorder] = c & 0xff; +  out->str[i * 2 + byteorder] = c >> 8;    } -  +  } else { +  /* Native byte order -- We don't need to do much... +  * +  * FIXME: Future optimization: Check if refcount is == 1, +  * and perform sufficient magic to be able to convert in place. +  */ +  memcpy(out->str, in->str, len);    } - #endif +     out = end_shared_string(out);    break;    case 2:
1758:    "is out of range (0x00000000..0x0010ffff).",    str2[i], PTRDIFF_T_TO_LONG(i));    } -  /* Extra wide characters take two unicode characters in space. -  * ie One unicode character extra. +  /* Extra wide characters take two UTF16 characters in space. +  * ie One UTF16 character extra.    */    len += 2;    }
1775:    /* Use surrogates */    c -= 0x10000;    -  out->str[j + 1] = c & 0xff; -  out->str[j] = 0xdc | ((c >> 8) & 0x03); +  out->str[j + 1 - byteorder] = c & 0xff; +  out->str[j + byteorder] = 0xdc | ((c >> 8) & 0x03);    j -= 2;    c >>= 10;    c |= 0xd800;    } -  out->str[j + 1] = c & 0xff; -  out->str[j] = c >> 8; +  out->str[j + 1 - byteorder] = c & 0xff; +  out->str[j + byteorder] = c >> 8;    }   #ifdef PIKE_DEBUG    if (j) {
10024:       /* Some Wide-string stuff */    - /* function(string:string(0..255)) */ +  /* function(string,int(0..2)|void:string(0..255)) */    ADD_EFUN("string_to_unicode", f_string_to_unicode, -  tFunc(tStr,tStr8), OPT_TRY_OPTIMIZE); +  tFunc(tStr tOr(tInt02,tVoid),tStr8), OPT_TRY_OPTIMIZE);      /* function(string(0..255):string) */    ADD_EFUN("unicode_to_string", f_unicode_to_string,