Branch: Tag:

1998-10-09

1998-10-09 22:24:49 by Henrik Grubbström (Grubba) <grubba@grubba.org>

Added string_to_unicode() and unicode_to_string().

Rev: src/builtin_functions.c:1.123

4:   ||| See the files COPYING and DISCLAIMER for more information.   \*/   #include "global.h" - RCSID("$Id: builtin_functions.c,v 1.122 1998/09/19 13:32:24 grubba Exp $"); + RCSID("$Id: builtin_functions.c,v 1.123 1998/10/09 22:24:49 grubba Exp $");   #include "interpret.h"   #include "svalue.h"   #include "pike_macros.h"
597:    }   }    + /* +  * Some wide-strings related functions +  */ +  + void f_string_to_unicode(INT32 args) + { +  struct pike_string *in; +  struct pike_string *out = NULL; +  INT32 len; +  int i; +  +  get_all_args("string_to_unicode", args, "%S", &in); +  +  switch(in->size_shift) { +  case 0: +  /* Just 8bit characters */ +  len = in->len * 2; +  out = begin_shared_string(len); +  MEMSET(out->str, 0, len); /* Clear the upper (and lower) byte */ +  for(i = in->len; i--;) { +  out->str[i * 2] = in->str[i]; +  } +  out = end_shared_string(out); +  break; +  case 1: +  /* 16 bit characters */ +  /* FIXME: Should we check for 0xfffe & 0xffff here too? */ +  len = in->len * 2; +  out = begin_shared_string(len); + #if (BYTEORDER == 4321) +  /* Big endian -- We don't need to do much... +  * +  * FIXME: Future optimization: Check if refcount is == 1, +  * and perform sufficient magic to be able to convert in place. +  */ +  MEMCPY(out->str, in->str, len); + #else +  /* Other endianness, may need to do byte-order conversion also. */ +  { +  p_wchar1 *str1 = STR1(in); +  for(i = in->len; i--;) { +  unsigned INT32 c = str1[i]; +  out->str[i * 2] = c & 0xff; +  out->str[i * 2 + 1] = c >> 8; +  } +  } + #endif +  out = end_shared_string(out); +  break; +  case 2: +  /* 32 bit characters -- Is someone writing in Klingon? */ +  { +  p_wchar2 *str2 = STR2(in); +  int j = (len = in->len * 2); +  /* Check how many extra wide characters there are. */ +  for(i = in->len; i--;) { +  if (str2[i] > 0xfffd) { +  if (str2[i] < 0x10000) { +  /* 0xfffe: Byte-order detection illegal character. +  * 0xffff: Illegal character. +  */ +  error("string_to_unicode(): Illegal character 0x%04x (index %d) " +  "is not a Unicode character.", str2[i], i); +  } +  if (str2[i] > 0x10ffff) { +  error("string_to_unicode(): Character 0x%08x (index %d) " +  "is out of range (0x00000000 - 0x0010ffff).", str2[i], i); +  } +  /* Extra wide characters take two unicode characters in space. +  * ie One unicode character extra. +  */ +  len += 2; +  } +  } +  out = begin_shared_string(len); +  /* j is initialized above. */ +  for(i = in->len; i--;) { +  unsigned INT32 c = str2[i]; +  +  j -= 2; +  +  if (c > 0xffff) { +  /* Use surrogates */ +  c -= 0x10000; +  +  out->str[j + 1] = c & 0xff; +  out->str[j] = 0xdc | ((c >> 8) & 0x03); +  j -= 2; +  c >>= 10; +  c |= 0xd800; +  } +  out->str[j + 1] = c & 0xff; +  out->str[j] = c >> 8; +  } + #ifdef DEBUG +  if (j) { +  fatal("string_to_unicode(): Indexing error: len:%d, j:%d.\n", len, j); +  } + #endif /* DEBUG */ +  out = end_shared_string(out); +  } +  break; +  default: +  error("string_to_unicode(): Bad string shift: %d!\n", in->size_shift); +  break; +  } +  pop_n_elems(args); +  push_string(out); + } +  + void f_unicode_to_string(INT32 args) + { +  struct pike_string *in; +  struct pike_string *out = NULL; +  INT32 len; +  +  get_all_args("unicode_to_string", args, "%S", &in); +  +  if (in->size_shift) { +  error("unicode_to_string(): Argument in not an 8bit string.\n"); +  } +  +  if (in->len & 1) { +  error("unicode_to_string(): String length is odd.\n"); +  } +  +  /* FIXME: In the future add support for decoding of surrogates. */ +  +  len = in->len / 2; +  +  out = begin_shared_string(in->len + 1); +  out->size_shift = 1; +  out->len = len; + #if (BYTEORDER == 4321) +  /* Big endian +  * +  * FIXME: Future optimization: Perform sufficient magic +  * to do the conversion in place if the ref-count is == 1. +  */ +  /* NOTE: We copy the zero-termination byte too */ +  MEMCPY(out->str, in->str, in->len + 1); + #else +  /* Little endian */ +  { +  int i; +  p_wchar1 *str1 = STR1(out); +  +  str1[len] = 0; /* Force proper zero termination */ +  for (i = len; i--;) { +  str1[i] = in->str[i*2]<<8 + in->str[i*2 + 1]; +  } +  } + #endif /* BYTEORDER == 4321 */ +  out = end_shared_string(out); +  pop_n_elems(args); +  push_string(out); + } +    void f_all_constants(INT32 args)   {    pop_n_elems(args);
3092:    add_efun("zero_type",f_zero_type,"function(mixed:int)",0);    add_efun("array_sscanf",f_sscanf,"function(string,string:array)",0);    +  /* Some Wide-string stuff */ +  add_efun("string_to_unicode", f_string_to_unicode, "function(string:string)", OPT_TRY_OPTIMIZE); +  add_efun("unicode_to_string", f_unicode_to_string, "function(string:string)", OPT_TRY_OPTIMIZE); +    #ifdef HAVE_LOCALTIME    add_efun("localtime",f_localtime,"function(int:mapping(string:int))",OPT_EXTERNAL_DEPEND);   #endif