50f0872017-04-08Martin Nilsson /* charsetmod.cmod -*- c -*- */
e576bb2002-10-11Martin Nilsson /* || This file is part of Pike. For copyright information see COPYRIGHT. || Pike is distributed under GPL, LGPL and MPL. See the file COPYING || for more information. */
f886061998-10-15Marcus Comstedt #include "config.h"
4197c82002-09-09Marcus Comstedt #include "global.h"
f886061998-10-15Marcus Comstedt #include "program.h" #include "interpret.h" #include "stralloc.h" #include "object.h" #include "module_support.h"
b2d3e42000-12-01Fredrik Hübinette (Hubbe) #include "pike_error.h"
c0d5e42008-06-29Martin Stjernholm #include "builtin_functions.h"
f886061998-10-15Marcus Comstedt 
c0d5e42008-06-29Martin Stjernholm #include "charsetmod.h"
f886061998-10-15Marcus Comstedt  #ifdef __CHAR_UNSIGNED__ #define SIGNED signed #else #define SIGNED #endif
7df2d12017-04-06Henrik Grubbström (Grubba) #define DEFAULT_CMOD_STORAGE static
0c415f2017-06-16Henrik Grubbström (Grubba) /*! @module _Charset *! *! Low-level tables and code for the @[Charset] module. *! *! This is probably not the module you want; try the @[Charset] module. *! *! @seealso *! @[Charset] */
f3201e2017-07-16Henrik Grubbström (Grubba) /* Remap old symbols */ #define std_cs_stor cq__Charset_Std_CS_struct #define std_cs_program cq__Charset_Std_CS_program
0d375e2001-06-07Henrik Grubbström (Grubba) static struct program *utf1_program = NULL, *utf1e_program = NULL;
f886061998-10-15Marcus Comstedt static struct program *utf7_program = NULL, *utf8_program = NULL;
3c39ab1998-11-16Marcus Comstedt static struct program *utf7e_program = NULL, *utf8e_program = NULL;
e89b612006-01-12Henrik Grubbström (Grubba) static struct program *utf_ebcdic_program = NULL, *utf_ebcdice_program = NULL;
e8ebdb2001-05-10Henrik Grubbström (Grubba) static struct program *utf7_5_program = NULL, *utf7_5e_program = NULL;
eeccd82001-06-05Marcus Comstedt static struct program *euc_program = NULL, *sjis_program = NULL;
c0d5e42008-06-29Martin Stjernholm static struct program *gbke_program = NULL; static struct program *multichar_program = NULL, *gb18030e_program = NULL; static struct program *rfc_base_program = NULL; /* The following inherit rfc_base_program. */ static struct program *std_rfc_program = NULL;
9c87942001-06-08Marcus Comstedt static struct program *euce_program = NULL, *sjise_program = NULL;
f886061998-10-15Marcus Comstedt static struct program *std_94_program = NULL, *std_96_program = NULL; static struct program *std_9494_program = NULL, *std_9696_program = NULL;
1ee32f2001-06-06Stefan Wallström static struct program *std_big5_program = NULL;
0e4d631998-11-16Marcus Comstedt static struct program *std_8bit_program = NULL, *std_8bite_program = NULL;
fecae61999-01-05Marcus Comstedt static struct program *std_16bite_program = NULL;
c0d5e42008-06-29Martin Stjernholm  static size_t rfc_charset_name_offs = 0;
f886061998-10-15Marcus Comstedt 
b6c29f2011-04-16Henrik Grubbström (Grubba) static struct array *double_custom_chars = NULL; static struct array *double_combiner_chars = NULL;
f3201e2017-07-16Henrik Grubbström (Grubba) DECLARATIONS;
f886061998-10-15Marcus Comstedt  struct std_rfc_stor {
3674bf2000-08-03Henrik Grubbström (Grubba)  UNICHAR const *table;
f886061998-10-15Marcus Comstedt };
f5a2cf2000-08-09Henrik Grubbström (Grubba) static size_t std_rfc_stor_offs = 0;
f886061998-10-15Marcus Comstedt 
9db11a1998-11-05Marcus Comstedt struct std_misc_stor { int lo, hi; };
f5a2cf2000-08-09Henrik Grubbström (Grubba) static size_t std_misc_stor_offs = 0;
9db11a1998-11-05Marcus Comstedt 
f886061998-10-15Marcus Comstedt struct utf7_stor { INT32 dat, surro; int shift, datbit; };
f5a2cf2000-08-09Henrik Grubbström (Grubba) static size_t utf7_stor_offs = 0;
f886061998-10-15Marcus Comstedt 
eeccd82001-06-05Marcus Comstedt struct euc_stor {
cb43502005-12-07Marcus Comstedt  UNICHAR const *table, *table2, *table3;
c0d5e42008-06-29Martin Stjernholm  struct pike_string *name;
eeccd82001-06-05Marcus Comstedt }; static size_t euc_stor_offs = 0;
3337b22004-08-17Martin Nilsson struct multichar_stor { const struct multichar_table *table;
aeba852006-01-13Henrik Grubbström (Grubba)  int is_gb18030;
c0d5e42008-06-29Martin Stjernholm  struct pike_string *name;
3337b22004-08-17Martin Nilsson }; static size_t multichar_stor_offs = 0;
0e4d631998-11-16Marcus Comstedt struct std8e_stor { p_wchar0 *revtab;
d134da2008-06-29Martin Stjernholm  unsigned lowtrans; int lo, hi;
e82da92008-07-03Henrik Grubbström (Grubba)  p_wchar2 zero_char; /* Character at code point 0x00 */
0e4d631998-11-16Marcus Comstedt };
f5a2cf2000-08-09Henrik Grubbström (Grubba) static size_t std8e_stor_offs = 0;
0e4d631998-11-16Marcus Comstedt 
fecae61999-01-05Marcus Comstedt struct std16e_stor { p_wchar1 *revtab;
d134da2008-06-29Martin Stjernholm  unsigned lowtrans; int lo, hi;
1ec5352005-12-08Marcus Comstedt  int sshift;
fecae61999-01-05Marcus Comstedt };
f5a2cf2000-08-09Henrik Grubbström (Grubba) static size_t std16e_stor_offs = 0;
fecae61999-01-05Marcus Comstedt 
f886061998-10-15Marcus Comstedt static SIGNED char rev64t['z'-'+'+1];
5453451998-11-16Marcus Comstedt static char fwd64t[64]= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
f886061998-10-15Marcus Comstedt 
f3201e2017-07-16Henrik Grubbström (Grubba) /* Start std_cs */ PIKECLASS Std_CS flags ID_PROTECTED;
3c39ab1998-11-16Marcus Comstedt {
f3201e2017-07-16Henrik Grubbström (Grubba)  CVAR struct string_builder strbuild; CVAR struct pike_string *retain, *replace; PIKEVAR mixed _repcb flags ID_PROTECTED;
3c39ab1998-11-16Marcus Comstedt 
f3201e2017-07-16Henrik Grubbström (Grubba)  DECLARE_STORAGE;
3c39ab1998-11-16Marcus Comstedt 
f3201e2017-07-16Henrik Grubbström (Grubba) PIKEFUN void create(string|void replace, function(string:string)|void repcb) {
abe2c52018-02-07Henrik Grubbström (Grubba)  struct std_cs_stor *s = THIS;
f3201e2017-07-16Henrik Grubbström (Grubba)  if(replace) {
3c39ab1998-11-16Marcus Comstedt  if(s->replace != NULL) free_string(s->replace);
f3201e2017-07-16Henrik Grubbström (Grubba)  add_ref(s->replace = replace);
3c39ab1998-11-16Marcus Comstedt  }
f3201e2017-07-16Henrik Grubbström (Grubba)  if(repcb && TYPEOF(*repcb) == T_FUNCTION) assign_svalue(&s->_repcb, repcb);
65810c2017-12-28Martin Nilsson  pop_n_elems(args);
f3201e2017-07-16Henrik Grubbström (Grubba) } PIKEFUN void set_replacement_callback(function(string:string) repcb) {
abe2c52018-02-07Henrik Grubbström (Grubba)  struct std_cs_stor *s = THIS;
f3201e2017-07-16Henrik Grubbström (Grubba)  if(repcb) assign_svalue(&s->_repcb, repcb); push_object(this_object());
3c39ab1998-11-16Marcus Comstedt }
f3201e2017-07-16Henrik Grubbström (Grubba) PIKEFUN string drain()
7a492f1999-04-27Marcus Comstedt {
abe2c52018-02-07Henrik Grubbström (Grubba)  struct std_cs_stor *s = THIS;
7a492f1999-04-27Marcus Comstedt 
f3201e2017-07-16Henrik Grubbström (Grubba)  pop_n_elems(args); push_string(finish_string_builder(&s->strbuild)); init_string_builder(&s->strbuild, 0); }
7a492f1999-04-27Marcus Comstedt 
f3201e2017-07-16Henrik Grubbström (Grubba) PIKEFUN object clear() {
abe2c52018-02-07Henrik Grubbström (Grubba)  struct std_cs_stor *s = THIS;
f3201e2017-07-16Henrik Grubbström (Grubba)  pop_n_elems(args); if(s->retain != NULL) { free_string(s->retain); s->retain = NULL; } reset_string_builder(&s->strbuild);
a6b38d2016-12-07Martin Nilsson  push_object(this_object());
7a492f1999-04-27Marcus Comstedt }
f3201e2017-07-16Henrik Grubbström (Grubba) INIT {
abe2c52018-02-07Henrik Grubbström (Grubba)  struct std_cs_stor *s = THIS;
f3201e2017-07-16Henrik Grubbström (Grubba)  s->retain = NULL; s->replace = NULL; init_string_builder(&s->strbuild,0); } EXIT {
abe2c52018-02-07Henrik Grubbström (Grubba)  struct std_cs_stor *s = THIS;
f3201e2017-07-16Henrik Grubbström (Grubba)  if(s->retain != NULL) free_string(s->retain); if(s->replace != NULL) free_string(s->replace); free_string_builder(&s->strbuild); } } /* End std_cs */
48656a1999-07-27Marcus Comstedt static int call_repcb(struct svalue *repcb, p_wchar2 ch) { push_string(make_shared_binary_string2(&ch, 1)); apply_svalue(repcb, 1);
50f0872017-04-08Martin Nilsson  if(TYPEOF(Pike_sp[-1]) == T_STRING)
48656a1999-07-27Marcus Comstedt  return 1; pop_stack(); return 0; }
c0d5e42008-06-29Martin Stjernholm static struct svalue decode_err_prog = SVALUE_INIT_INT (0); static struct svalue encode_err_prog = SVALUE_INIT_INT (0); static void DECLSPEC(noreturn) transcode_error_va ( struct pike_string *str, ptrdiff_t pos, struct pike_string *charset, int encode, const char *reason, va_list args) ATTRIBUTE((noreturn)); static void DECLSPEC(noreturn) transcode_error_va ( struct pike_string *str, ptrdiff_t pos, struct pike_string *charset, int encode, const char *reason, va_list args) /* Note: Consumes a ref to charset. */ { struct svalue *err_prog; if (encode) {
017b572011-10-28Henrik Grubbström (Grubba)  if (TYPEOF(encode_err_prog) == T_INT) {
5e9fc02015-08-18Per Hedbor  push_static_text ("Charset.EncodeError");
c0d5e42008-06-29Martin Stjernholm  SAFE_APPLY_MASTER ("resolv", 1);
50f0872017-04-08Martin Nilsson  if (TYPEOF(Pike_sp[-1]) != T_PROGRAM && TYPEOF(Pike_sp[-1]) != T_FUNCTION)
0b8d2f2013-06-17Martin Nilsson  Pike_error ("Failed to resolve Charset.EncodeError "
c0d5e42008-06-29Martin Stjernholm  "to a program - unable to throw an encode error.\n");
50f0872017-04-08Martin Nilsson  move_svalue (&encode_err_prog, --Pike_sp);
c0d5e42008-06-29Martin Stjernholm  } err_prog = &encode_err_prog; } else {
017b572011-10-28Henrik Grubbström (Grubba)  if (TYPEOF(decode_err_prog) == T_INT) {
5e9fc02015-08-18Per Hedbor  push_static_text ("Charset.DecodeError");
c0d5e42008-06-29Martin Stjernholm  SAFE_APPLY_MASTER ("resolv", 1);
50f0872017-04-08Martin Nilsson  if (TYPEOF(Pike_sp[-1]) != T_PROGRAM && TYPEOF(Pike_sp[-1]) != T_FUNCTION)
0b8d2f2013-06-17Martin Nilsson  Pike_error ("Failed to resolve Charset.DecodeError "
c0d5e42008-06-29Martin Stjernholm  "to a program - unable to throw an decode error.\n");
50f0872017-04-08Martin Nilsson  move_svalue (&decode_err_prog, --Pike_sp);
c0d5e42008-06-29Martin Stjernholm  } err_prog = &decode_err_prog; } ref_push_string (str); push_int (pos);
6bdfee2017-08-01Henrik Grubbström (Grubba)  ref_push_string (charset);
c0d5e42008-06-29Martin Stjernholm  if (reason) { struct string_builder s; init_string_builder (&s, 0); string_builder_vsprintf (&s, reason, args); push_string (finish_string_builder (&s)); } else push_int (0); f_backtrace (0); apply_svalue (err_prog, 5); f_throw (1); } void DECLSPEC(noreturn) transcode_error ( struct pike_string *str, ptrdiff_t pos, struct pike_string *charset, int encode, const char *reason, ...) { va_list args; va_start (args, reason); transcode_error_va (str, pos, charset, encode, reason, args); va_end (args); } void DECLSPEC(noreturn) transcoder_error ( struct pike_string *str, ptrdiff_t pos, int encode, const char *reason, ...) {
6bdfee2017-08-01Henrik Grubbström (Grubba)  struct svalue charset_str;
c0d5e42008-06-29Martin Stjernholm  va_list args; va_start (args, reason);
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL_SUBTYPE(charset_str, 0);
c0d5e42008-06-29Martin Stjernholm  MAKE_CONST_STRING (charset_str.u.string, "charset");
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL_TYPE(charset_str, T_STRING);
6bdfee2017-08-01Henrik Grubbström (Grubba)  object_index_no_free (Pike_sp, Pike_fp->current_object, 0, &charset_str); Pike_sp++; if (TYPEOF(Pike_sp[-1]) == T_STRING) { transcode_error_va (str, pos, Pike_sp[-1].u.string, encode, reason, args); } else { transcode_error_va (str, pos, MK_STRING("UNDEFINED"), encode, reason, args); }
c0d5e42008-06-29Martin Stjernholm  va_end (args); } #define REPLACE_CHAR(ch, func, ctx, str, pos) do { \
690e292005-04-03Martin Stjernholm  if(repcb != NULL && call_repcb(repcb, ch)) { \
50f0872017-04-08Martin Nilsson  func(ctx, sb, Pike_sp[-1].u.string, rep, NULL); \
690e292005-04-03Martin Stjernholm  pop_stack(); \ } else if(rep != NULL) \ func(ctx, sb, rep, NULL, NULL); \ else \
a898bb2013-12-04Martin Nilsson  transcoder_error (str, pos, 1, "Unsupported character %d.\n",ch); \
690e292005-04-03Martin Stjernholm  } while (0)
48656a1999-07-27Marcus Comstedt 
017b572011-10-28Henrik Grubbström (Grubba) #define MKREPCB(c) (TYPEOF(c) == T_FUNCTION? &(c):NULL)
48656a1999-07-27Marcus Comstedt 
3edf2a2007-06-19Henrik Grubbström (Grubba) static void f_drain_rfc1345(INT32 args) {
50f0872017-04-08Martin Nilsson  struct std_cs_stor *s = (struct std_cs_stor *)Pike_fp->current_storage;
3edf2a2007-06-19Henrik Grubbström (Grubba)  UNICHAR trailer = 0;
b6c29f2011-04-16Henrik Grubbström (Grubba)  int double_combiners = 0;
3edf2a2007-06-19Henrik Grubbström (Grubba)  if (s->strbuild.s->size_shift) { ptrdiff_t i, len = s->strbuild.s->len; /* We need to check for private-space characters. */ switch(s->strbuild.s->size_shift) { case 1: { p_wchar1 *s1 = STR1(s->strbuild.s); for (i=0; i < len; i++) {
b6c29f2011-04-16Henrik Grubbström (Grubba)  if ((s1[i] & 0xf000) == 0xe000) { if ((s1[i] & 0xff00) == 0xe300) { /* Non-spacing character ==> combiner */ trailer = s1[i++]; if (i < len) { s1[i-1] = s1[i]; s1[i] = trailer & 0x0fff; trailer = 0; } else { s->strbuild.s->len--; break; }
7aec9d2011-04-16Henrik Grubbström (Grubba)  } else if ((s1[i] & 0xff00) == 0xee00) { /* Non-spacing character ==> combiner */ trailer = s1[i++]; if (i < len) { s1[i-1] = s1[i]; s1[i] = 0xf000 | (trailer & 0x0fff); trailer = 0; } else { s->strbuild.s->len--; break; }
b6c29f2011-04-16Henrik Grubbström (Grubba)  } else if ((s1[i] & 0xff00) == 0xe100) { /* Non-spacing character ==> double combiner * * Reorder here, and expand later. */ trailer = s1[i++]; if (i < len) { s1[i-1] = s1[i]; s1[i] = trailer; trailer = 0; double_combiners = 1; } else { s->strbuild.s->len--; break; }
13670c2015-05-25Martin Nilsson  }
3edf2a2007-06-19Henrik Grubbström (Grubba)  } } } break; case 2: { p_wchar2 *s2 = STR2(s->strbuild.s); for (i=0; i < len; i++) {
b6c29f2011-04-16Henrik Grubbström (Grubba)  if ((s2[i] & 0xf000) == 0xe000) { if ((s2[i] & 0xff00) == 0xe300) { /* Non-spacing character ==> combiner */ trailer = s2[i++]; if (i < len) { s2[i-1] = s2[i]; s2[i] = trailer & 0x0fff; trailer = 0; } else { s->strbuild.s->len--; break; }
7aec9d2011-04-16Henrik Grubbström (Grubba)  } else if ((s2[i] & 0xff00) == 0xee00) { /* Non-spacing character ==> combiner */ trailer = s2[i++]; if (i < len) { s2[i-1] = s2[i]; s2[i] = 0xf000 | (trailer & 0x0fff); trailer = 0; } else { s->strbuild.s->len--; break; }
b6c29f2011-04-16Henrik Grubbström (Grubba)  } else if ((s2[i] & 0xff00) == 0xe100) { /* Non-spacing character ==> double combiner * * Reorder here, and expand later. */ trailer = s2[i++]; if (i < len) { s2[i-1] = s2[i]; s2[i] = trailer; trailer = 0; double_combiners = 1; } else { s->strbuild.s->len--; break; }
3edf2a2007-06-19Henrik Grubbström (Grubba)  } } } } break;
4f6cde2014-10-03Jonas Walldén  case 0: /* Dummy case to silence clang warning */ break;
3edf2a2007-06-19Henrik Grubbström (Grubba)  } }
f3201e2017-07-16Henrik Grubbström (Grubba)  f_cq__Charset_Std_CS_drain(args);
3edf2a2007-06-19Henrik Grubbström (Grubba)  if (trailer) { /* The last character was a non-spacing character. * Restore it for the next pass. */ string_builder_putchar(&s->strbuild, trailer); }
b6c29f2011-04-16Henrik Grubbström (Grubba)  if (double_combiners) { /* There were non-spacing double modifiers used. */ ref_push_array(double_custom_chars); ref_push_array(double_combiner_chars); f_replace(3); }
3edf2a2007-06-19Henrik Grubbström (Grubba) }
c0d5e42008-06-29Martin Stjernholm static void f_std_feed(INT32 args, ptrdiff_t (*func)(struct pike_string *,
f5a2cf2000-08-09Henrik Grubbström (Grubba)  struct std_cs_stor *))
f886061998-10-15Marcus Comstedt {
50f0872017-04-08Martin Nilsson  struct std_cs_stor *s = (struct std_cs_stor *)Pike_fp->current_storage;
690e292005-04-03Martin Stjernholm  struct pike_string *str;
f5a2cf2000-08-09Henrik Grubbström (Grubba)  ptrdiff_t l;
f886061998-10-15Marcus Comstedt 
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%W", &str);
f886061998-10-15Marcus Comstedt  if(str->size_shift>0)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error("Can't feed on wide strings!\n");
f886061998-10-15Marcus Comstedt  if(s->retain != NULL) {
690e292005-04-03Martin Stjernholm  str = add_shared_strings(s->retain, str); push_string (str); args++;
f886061998-10-15Marcus Comstedt  }
c0d5e42008-06-29Martin Stjernholm  l = func(str, s);
f886061998-10-15Marcus Comstedt 
690e292005-04-03Martin Stjernholm  if (s->retain) { free_string(s->retain); s->retain = NULL; }
f886061998-10-15Marcus Comstedt  if(l>0)
01a9572000-02-03Henrik Grubbström (Grubba)  s->retain = make_shared_binary_string((char *)STR0(str)+str->len-l, l);
f886061998-10-15Marcus Comstedt  pop_n_elems(args); push_object(this_object()); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_utf8(struct pike_string *str, struct std_cs_stor *s)
f886061998-10-15Marcus Comstedt {
690e292005-04-03Martin Stjernholm  static const int utf8cont[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0 }; static const unsigned int first_char_mask[] = {0x1f, 0x0f, 0x07, 0x03, 0x01};
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
690e292005-04-03Martin Stjernholm  for (; l > 0; l--) { unsigned int ch = *p++; if (ch & 0x80) { int cl = utf8cont[(ch>>1) - 64], i; if (!cl)
c0d5e42008-06-29Martin Stjernholm  transcoder_error (str, p - STR0(str) - 1, 0, "Invalid byte.\n");
690e292005-04-03Martin Stjernholm  ch &= first_char_mask[cl - 1]; for (i = cl >= l ? l - 1 : cl; i--;) { unsigned int c = *p++; if ((c & 0xc0) != 0x80)
c0d5e42008-06-29Martin Stjernholm  /* Report the start of the invalid sequence to make things * easier for code that tries to recover from invalid UTF-8. */ transcoder_error (str, p - STR0(str) - ((cl >= l ? l - 1 : cl) - i) - 1, 0, "Truncated UTF-8 sequence.\n");
690e292005-04-03Martin Stjernholm  ch = (ch << 6) | (c & 0x3f); } if(cl >= l) return l; l -= cl; switch (cl) {
fd1df72018-02-13Marcus Comstedt  case 1: if (ch >= (1 << 7)) break; /* FALLTHRU */ case 2: if (ch >= (1 << 11)) break; /* FALLTHRU */
690e292005-04-03Martin Stjernholm  case 3: if (ch >= (1 << 16)) break;
c0d5e42008-06-29Martin Stjernholm  transcoder_error (str, p - STR0(str) - cl - 1, 0, "Non-shortest form of character U+%04X.\n", ch);
690e292005-04-03Martin Stjernholm  }
c0d5e42008-06-29Martin Stjernholm  if ((ch >= 0xd800 && ch <= 0xdfff) || ch > 0x10ffff) transcoder_error (str, p - STR0(str) - cl - 1, 0, "Character U+%04X is outside the valid range.\n", ch);
f886061998-10-15Marcus Comstedt  }
690e292005-04-03Martin Stjernholm  string_builder_putchar(&s->strbuild, ch);
f886061998-10-15Marcus Comstedt  }
690e292005-04-03Martin Stjernholm 
f886061998-10-15Marcus Comstedt  return l; } static void f_feed_utf8(INT32 args) { f_std_feed(args, feed_utf8); }
e89b612006-01-12Henrik Grubbström (Grubba) /* From http://www.unicode.org/reports/tr16/ * Table 3: Byte map from UTF-EBCDIC byte-sequence to I8-sequence. */ static const unsigned char utf_ebcdic_to_i8_conv[] = { 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x9d, 0x0a, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 0x26, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 0x2d, 0x2f, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 0xc5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0xd4, 0xd5, 0xd6, 0x5b, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0x5d, 0xe6, 0xe7, 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0x5c, 0xf4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x9f, };
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_utf_ebcdic(struct pike_string *str,
e89b612006-01-12Henrik Grubbström (Grubba)  struct std_cs_stor *s) { static const int cont[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 0, 0, }; static const unsigned int first_char_mask[] = {0x1f, 0x0f, 0x07, 0x03, 0x01};
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
e89b612006-01-12Henrik Grubbström (Grubba)  for (; l > 0; l--) { unsigned int ch = utf_ebcdic_to_i8_conv[*p++]; if ((ch & 0x80) && (ch & 0x60)) { int cl = cont[(ch>>1) - 80]; int i; if (!cl)
c0d5e42008-06-29Martin Stjernholm  transcoder_error (str, p - STR0(str) - 1, 0, "Invalid byte.\n");
e89b612006-01-12Henrik Grubbström (Grubba)  ch &= first_char_mask[cl - 1]; for (i = cl >= l ? l - 1 : cl; i--;) { unsigned int c = utf_ebcdic_to_i8_conv[*p++]; if ((c & 0xe0) != 0xa0)
c0d5e42008-06-29Martin Stjernholm  /* Report the start of the invalid sequence to make things * easier for code that tries to recover from invalid UTF-EBCDIC. */ transcoder_error (str, p - STR0(str) - ((cl >= l ? l - 1 : cl) - i) - 1, 0, "Truncated UTF-EBCDIC I8-sequence.\n");
e89b612006-01-12Henrik Grubbström (Grubba)  ch = (ch << 5) | (c & 0x1f); } if(cl >= l) return l; l -= cl; #if 0 /* FIXME: Detect non-shortest sequence. */ switch (cl) { case 1: if (ch >= (1 << 7)) break; case 2: if (ch >= (1 << 11)) break; case 3: if (ch >= (1 << 16)) break;
c0d5e42008-06-29Martin Stjernholm  transcoder_error (str, p - STR0(str) - cl - 1, 0, "Non-shortest form of character U+%04X.\n", ch);
e89b612006-01-12Henrik Grubbström (Grubba)  }
c0d5e42008-06-29Martin Stjernholm  if ((ch >= 0xd800 && ch <= 0xdfff) || ch > 0x10ffff) transcoder_error (str, p - STR0(str) - cl - 1, 0, "Character U+%04X is outside the valid range.\n", ch);
e89b612006-01-12Henrik Grubbström (Grubba) #endif /* 0 */ } string_builder_putchar(&s->strbuild, ch); } return l; } static void f_feed_utf_ebcdic(INT32 args) { f_std_feed(args, feed_utf_ebcdic); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_utf7_5(struct pike_string *str, struct std_cs_stor *s)
e8ebdb2001-05-10Henrik Grubbström (Grubba) { static int utf7_5len[] = { 0, 0, 0, 0, 0, 0, 0, 0, -1,-1, 1, 2,-1,-1,-1,-1, };
f75dda2004-07-25Martin Nilsson  static const unsigned INT32 utf7_5of[] = { 0ul, 0x28c0ul, 0xb30c0ul };
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
e8ebdb2001-05-10Henrik Grubbström (Grubba)  while(l>0) { unsigned INT32 ch = 0; int cl = utf7_5len[(*p)>>4]; if(cl>--l) return l+1; switch(cl) {
fd1df72018-02-13Marcus Comstedt  case 2: ch += *p++; ch<<=6; /* FALLTHRU */ case 1: ch += *p++; ch<<=6; /* FALLTHRU */
e8ebdb2001-05-10Henrik Grubbström (Grubba)  case 0: ch += *p++; break; case -1: /* FIXME: Encoding error if cl < 0. */ cl = 0; break; } l-=cl; string_builder_putchar(&s->strbuild, (ch-utf7_5of[cl])&0x7fffffffl); } return l; } static void f_feed_utf7_5(INT32 args) { f_std_feed(args, feed_utf7_5); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_utf7(struct pike_string *str, struct std_cs_stor *s)
f886061998-10-15Marcus Comstedt { struct utf7_stor *u7 = (struct utf7_stor *)(((char*)s)+utf7_stor_offs); INT32 dat = u7->dat, surro = u7->surro; int shift = u7->shift, datbit = u7->datbit;
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
f886061998-10-15Marcus Comstedt  if(l<=0) return l;
a23b762000-07-24Andreas Lange  if(shift==2) {
f886061998-10-15Marcus Comstedt  if(*p=='-') { string_builder_putchar(&s->strbuild, '+'); if(--l==0) { u7->shift=0; return l; } p++; shift=0; } else shift=1;
a23b762000-07-24Andreas Lange  }
f886061998-10-15Marcus Comstedt  for(;;) if(shift) {
4dc1d11999-09-25Henrik Grubbström (Grubba)  int c = 0, z;
f886061998-10-15Marcus Comstedt  while(l-->0 && (c=(*p++)-'+')>=0 && c<=('z'-'+') && (z=rev64t[c])>=0) { dat = (dat<<6)|z; if((datbit+=6)>=16) { INT32 uc = dat>>(datbit-16); if((uc&0xfc00)==0xd800) { if(surro) string_builder_putchar(&s->strbuild, surro); surro = uc; } else if(surro) { if((uc&0xfc00)==0xdc00) string_builder_putchar(&s->strbuild, 0x00010000+ ((surro&0x3ff)<<10)+(uc&0x3ff)); else { string_builder_putchar(&s->strbuild, surro); string_builder_putchar(&s->strbuild, uc); } surro = 0; } else string_builder_putchar(&s->strbuild, uc); datbit -= 16; dat &= (1<<datbit)-1; } } if(l<0) { l++; break; } if(surro) { string_builder_putchar(&s->strbuild, surro); surro = 0; } /* should check that dat is 0 here. */ shift=0; dat=0; datbit=0; if(c!=('-'-'+')) { l++; --p; } else if(l==0)
13670c2015-05-25Martin Nilsson  break;
f886061998-10-15Marcus Comstedt  } else { while(l-->0 && *p!='+') string_builder_putchar(&s->strbuild, *p++); if(l<0) { l++; break; } p++; if(l==0) { shift=2; break; } if(*p=='-') { string_builder_putchar(&s->strbuild, '+'); if(--l==0) break; p++; } else shift = 1; } u7->dat = dat; u7->surro = surro; u7->shift = shift; u7->datbit = datbit; return l; } static void f_clear_utf7(INT32 args) { struct utf7_stor *u7 =
50f0872017-04-08Martin Nilsson  (struct utf7_stor *)(Pike_fp->current_storage+utf7_stor_offs);
f886061998-10-15Marcus Comstedt 
f3201e2017-07-16Henrik Grubbström (Grubba)  f_cq__Charset_Std_CS_clear(args);
13670c2015-05-25Martin Nilsson 
f886061998-10-15Marcus Comstedt  u7->dat = 0; u7->surro = 0; u7->shift = 0; u7->datbit = 0; }
74dfe82012-12-30Jonas Walldén static void utf7_init_stor(struct object *UNUSED(o))
f886061998-10-15Marcus Comstedt { struct utf7_stor *u7 =
50f0872017-04-08Martin Nilsson  (struct utf7_stor *)(Pike_fp->current_storage+utf7_stor_offs);
f886061998-10-15Marcus Comstedt  u7->dat = 0; u7->surro = 0; u7->shift = 0; u7->datbit = 0; } static void f_feed_utf7(INT32 args) { f_std_feed(args, feed_utf7); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_sjis(struct pike_string *str, struct std_cs_stor *s)
eeccd82001-06-05Marcus Comstedt {
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
eeccd82001-06-05Marcus Comstedt  while(l>0) { unsigned INT32 ch = *p++; if(ch < 0x80) { if(ch == 0x5c) ch = 0xa5; else if(ch == 0x7e) ch = 0x203e; string_builder_putchar(&s->strbuild, ch); --l; } else if(ch < 0xa1 || ch >= 0xe0) { if(ch == 0x80 || ch == 0xa0 || ch >= 0xeb) { string_builder_putchar(&s->strbuild, 0xfffd); --l; } else { int lo; if(l<2) return l; lo = *p++; l -= 2; if(ch > 0xa0) ch -= 0x40;
9c87942001-06-08Marcus Comstedt  if(lo >= 0x40 && lo <= 0x9e && lo != 0x7f) { if(lo > 0x7f) --lo;
eeccd82001-06-05Marcus Comstedt  ch = map_JIS_C6226_1983[(ch-0x81)*188+(lo-0x40)];
9c87942001-06-08Marcus Comstedt  } else if(lo >= 0x9f && lo <= 0xfc)
eeccd82001-06-05Marcus Comstedt  ch = map_JIS_C6226_1983[(ch-0x81)*188+94+(lo-0x9f)]; else ch = 0xfffd; string_builder_putchar(&s->strbuild, ch); } } else { string_builder_putchar(&s->strbuild, ch+0xfec0); --l; } } return l; } static void f_feed_sjis(INT32 args) { f_std_feed(args, feed_sjis); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_euc(struct pike_string *str, struct std_cs_stor *s)
eeccd82001-06-05Marcus Comstedt { struct euc_stor *euc = (struct euc_stor *)(((char*)s)+euc_stor_offs); UNICHAR const *map = euc->table;
cb43502005-12-07Marcus Comstedt  UNICHAR const *map2 = euc->table2; UNICHAR const *map3 = euc->table3;
eeccd82001-06-05Marcus Comstedt 
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
eeccd82001-06-05Marcus Comstedt  while(l>0) { unsigned INT32 ch = *p++; if(ch < 0x80) { string_builder_putchar(&s->strbuild, ch); --l; } else if(ch > 0xa0 && ch < 0xff) { int lo; if(l<2) return l; lo = (*p++)|0x80; if(lo > 0xa0 && lo < 0xff) ch = map[(ch-0xa1)*94+(lo-0xa1)]; else ch = 0xfffd; string_builder_putchar(&s->strbuild, ch); l -= 2;
cb43502005-12-07Marcus Comstedt  } else if(ch == 0x8e) { if(l<2) return l; ch = (*p++)|0x80;
b40d3e2011-11-21Henrik Grubbström (Grubba)  if(map2 && (ch > 0xa0 && ch < 0xff))
cb43502005-12-07Marcus Comstedt  ch = map2[ch-0xa1]; else ch = 0xfffd; string_builder_putchar(&s->strbuild, ch); l -= 2; } else if(ch == 0x8f) { int lo; if(l<3) return l; ch = (*p++)|0x80; lo = (*p++)|0x80;
b40d3e2011-11-21Henrik Grubbström (Grubba)  if(map3 && (ch > 0xa0 && ch < 0xff && lo > 0xa0 && lo < 0xff))
cb43502005-12-07Marcus Comstedt  ch = map3[(ch-0xa1)*94+(lo-0xa1)]; else ch = 0xfffd; string_builder_putchar(&s->strbuild, ch); l -= 3;
eeccd82001-06-05Marcus Comstedt  } else { string_builder_putchar(&s->strbuild, 0xfffd); --l; } } return l; } static void f_feed_euc(INT32 args) { f_std_feed(args, feed_euc); } static void f_create_euc(INT32 args) {
50f0872017-04-08Martin Nilsson  struct euc_stor *s = (struct euc_stor *)(Pike_fp->current_storage + euc_stor_offs);
eeccd82001-06-05Marcus Comstedt  struct pike_string *str; int lo=0, hi=num_charset_def-1;
d103422018-08-05Martin Nilsson  check_all_args(NULL, args, BIT_STRING, BIT_STRING, 0);
eeccd82001-06-05Marcus Comstedt 
50f0872017-04-08Martin Nilsson  str = Pike_sp[-args].u.string;
eeccd82001-06-05Marcus Comstedt  if(str->size_shift>0) hi = -1; while(lo<=hi) { int c, mid = (lo+hi)>>1; if((c = strcmp((char *)STR0(str), charset_map[mid].name))==0) { if(charset_map[mid].mode == MODE_9494) s->table = charset_map[mid].table; break; } if(c<0) hi=mid-1; else lo=mid+1; } if(s->table == NULL) Pike_error("Unknown charset in EUCDec\n");
b40d3e2011-11-21Henrik Grubbström (Grubba)  if(s->table == iso2022_9494[2]) { /* jis-x0208 */ s->table2 = iso2022_94[9]; /* jis-x0201 */ s->table3 = iso2022_9494[4]; /* jis-x0212 */
cb43502005-12-07Marcus Comstedt  } else { s->table2 = NULL; s->table3 = NULL; }
50f0872017-04-08Martin Nilsson  copy_shared_string (s->name, Pike_sp[1-args].u.string);
65810c2017-12-28Martin Nilsson  pop_n_elems(args);
eeccd82001-06-05Marcus Comstedt }
3337b22004-08-17Martin Nilsson static void f_create_multichar(INT32 args) {
b2e7a42006-01-06Martin Nilsson  char *name;
bbeb442008-06-29Martin Stjernholm  const struct multichar_def *def = multichar_map;
50f0872017-04-08Martin Nilsson  struct multichar_stor *s = (struct multichar_stor *)(Pike_fp->current_storage + multichar_stor_offs);
3337b22004-08-17Martin Nilsson 
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%s", &name);
3337b22004-08-17Martin Nilsson 
b2e7a42006-01-06Martin Nilsson  while(1) { if(def->name == 0) Pike_error("Unknown multichar table.\n"); if( strcmp(name, def->name)==0 ) break; def++;
3337b22004-08-17Martin Nilsson  }
b2e7a42006-01-06Martin Nilsson  s->table = def->table;
aeba852006-01-13Henrik Grubbström (Grubba)  /* NOTE: gb18030 is the first in the multichar map! */ s->is_gb18030 = (def == multichar_map);
c0d5e42008-06-29Martin Stjernholm 
50f0872017-04-08Martin Nilsson  copy_shared_string (s->name, Pike_sp[-args].u.string);
65810c2017-12-28Martin Nilsson  pop_n_elems(args);
aeba852006-01-13Henrik Grubbström (Grubba) } #include "gb18030.h" /* Used for gb18030 to decode code points outside GBK. */ static ptrdiff_t feed_gb18030(const p_wchar0 *p, ptrdiff_t l, struct std_cs_stor *s) { p_wchar2 index = 0;
5b0d532006-01-14Henrik Grubbström (Grubba)  if (l < 4) { return l; }
aeba852006-01-13Henrik Grubbström (Grubba)  /* First decode the linear offset. */
5b0d532006-01-14Henrik Grubbström (Grubba)  if ((p[0] < 0x81) || (p[0] > 0xfe)) { return 0; }
aeba852006-01-13Henrik Grubbström (Grubba)  index = p[0] - 0x81;
5b0d532006-01-14Henrik Grubbström (Grubba)  if ((p[1] < 0x30) || (p[1] > 0x39)) { return 0; }
aeba852006-01-13Henrik Grubbström (Grubba)  index *= 10; index += p[1] - 0x30;
5b0d532006-01-14Henrik Grubbström (Grubba)  if ((p[2] < 0x81) || (p[2] > 0xfe)) { return 0; }
aeba852006-01-13Henrik Grubbström (Grubba)  index *= 126;
2d04902006-01-14Henrik Grubbström (Grubba)  index += p[2] - 0x81;
aeba852006-01-13Henrik Grubbström (Grubba) 
5b0d532006-01-14Henrik Grubbström (Grubba)  if ((p[3] < 0x30) || (p[3] > 0x39)) { return 0; }
aeba852006-01-13Henrik Grubbström (Grubba)  index *= 10; index += p[3] - 0x30; /* Convert to Unicode. */ string_builder_putchar(&s->strbuild, gb18030_to_unicode(index)); /* We've used 4 bytes of input. */ return -4;
3337b22004-08-17Martin Nilsson }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_multichar(struct pike_string *str,
3337b22004-08-17Martin Nilsson  struct std_cs_stor *s) {
50f0872017-04-08Martin Nilsson  struct multichar_stor *m = (struct multichar_stor *)(Pike_fp->current_storage + multichar_stor_offs);
3337b22004-08-17Martin Nilsson  const struct multichar_table *table = m->table;
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
3337b22004-08-17Martin Nilsson  while(l>0) { unsigned INT32 ch = *p++; if(ch < 0x81) {
5b0d532006-01-14Henrik Grubbström (Grubba)  /* FIXME: Adjust above limit to 0x80? Recent GB18030 encodes
aeba852006-01-13Henrik Grubbström (Grubba)  * U+0080 as 0x81 0x30 0x81 0x30. */
3337b22004-08-17Martin Nilsson  string_builder_putchar(&s->strbuild, ch); --l; } else { const struct multichar_table page = table[ ch-0x81 ]; if(l==1) return 1; if(ch==0xff) {
c0d5e42008-06-29Martin Stjernholm  transcoder_error (str, p - STR0(str) - 1, 0, "Illegal character.\n");
3337b22004-08-17Martin Nilsson  } ch = *p++; if( ch<page.lo || ch>page.hi ) {
aeba852006-01-13Henrik Grubbström (Grubba)  if (m->is_gb18030) {
5b0d532006-01-14Henrik Grubbström (Grubba)  int delta = feed_gb18030(p-2, l, s);
aeba852006-01-13Henrik Grubbström (Grubba)  if (delta < 0) {
5b0d532006-01-14Henrik Grubbström (Grubba)  p -= delta + 2;
aeba852006-01-13Henrik Grubbström (Grubba)  l += delta; continue; } else if (delta > 0) { /* More characters needed. */ return delta; }
c0d5e42008-06-29Martin Stjernholm  } transcoder_error (str, p - STR0(str) - 2, 0, "Illegal character pair: 0x%02x 0x%02x " "(expected 0x%02x 0x%02x..0x%02x).\n", p[-2], ch, p[-2], page.lo, page.hi);
3337b22004-08-17Martin Nilsson  } else string_builder_putchar(&s->strbuild, page.table[ch-page.lo]); l -= 2; } } return 0; } static void f_feed_multichar(INT32 args) { f_std_feed(args, feed_multichar); }
d893fc2006-01-15Henrik Grubbström (Grubba) static void feed_gb18030e(struct std_cs_stor *cs, struct string_builder *sb, struct pike_string *str, struct pike_string *rep, struct svalue *repcb) { ptrdiff_t l = str->len; const struct gb18030e_info *e_info; switch(str->size_shift) { case 0: { p_wchar0 c, *p = STR0(str); while(l--) if((c=*p++)<=0x7f) string_builder_putchar(sb, c); else if ((e_info = get_gb18030e_info(c))) { if (e_info->index < 0) { /* Two bytes (ie GBK). */ int off = ~e_info->index + (c - e_info->ulow)*2; string_builder_putchar(sb, gb18030e_bytes[off]); string_builder_putchar(sb, gb18030e_bytes[off+1]); } else { /* Four bytes */ int index = e_info->index + c - e_info->ulow; c = index/12600; string_builder_putchar(sb, 0x81 + c); index -= c*12600; c = index/1260; string_builder_putchar(sb, 0x30 + c); index -= c*1260; c = index/10; string_builder_putchar(sb, 0x81 + c); index -= c*10; string_builder_putchar(sb, 0x30 + index); } } else {
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_gb18030e, cs, str, p - STR0(str) - 1);
d893fc2006-01-15Henrik Grubbström (Grubba)  } } break; case 1: { p_wchar1 c, *p = STR1(str); while(l--) if((c=*p++)<=0x7f) string_builder_putchar(sb, c); else if ((e_info = get_gb18030e_info(c))) { if (e_info->index < 0) { /* Two bytes (ie GBK). */ int off = ~e_info->index + (c - e_info->ulow)*2; string_builder_putchar(sb, gb18030e_bytes[off]); string_builder_putchar(sb, gb18030e_bytes[off+1]); } else { /* Four bytes */ int index = e_info->index + c - e_info->ulow; c = index/12600; string_builder_putchar(sb, 0x81 + c); index -= c*12600; c = index/1260; string_builder_putchar(sb, 0x30 + c); index -= c*1260; c = index/10; string_builder_putchar(sb, 0x81 + c); index -= c*10; string_builder_putchar(sb, 0x30 + index); } } else {
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_gb18030e, cs, str, p - STR1(str) - 1);
d893fc2006-01-15Henrik Grubbström (Grubba)  } } break; case 2: { p_wchar2 c, *p = STR2(str); while(l--) { if((c=*p++)<=0x7f) { string_builder_putchar(sb, c); } else if ((e_info = get_gb18030e_info(c))) { if (e_info->index < 0) { /* Two bytes (ie GBK). */ int off = ~e_info->index + (c - e_info->ulow)*2; string_builder_putchar(sb, gb18030e_bytes[off]); string_builder_putchar(sb, gb18030e_bytes[off+1]); } else { /* Four bytes */ int index = e_info->index + c - e_info->ulow; c = index/12600; string_builder_putchar(sb, 0x81 + c); index -= c*12600; c = index/1260; string_builder_putchar(sb, 0x30 + c); index -= c*1260; c = index/10; string_builder_putchar(sb, 0x81 + c); index -= c*10; string_builder_putchar(sb, 0x30 + index); } } else {
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_gb18030e, cs, str, p - STR2(str) - 1);
d893fc2006-01-15Henrik Grubbström (Grubba)  } } } break; } } static void f_feed_gb18030e(INT32 args) { struct pike_string *str;
50f0872017-04-08Martin Nilsson  struct std_cs_stor *cs = (struct std_cs_stor *)Pike_fp->current_storage;
d893fc2006-01-15Henrik Grubbström (Grubba) 
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%W", &str);
d893fc2006-01-15Henrik Grubbström (Grubba) 
f3201e2017-07-16Henrik Grubbström (Grubba)  feed_gb18030e(cs, &cs->strbuild, str, cs->replace, MKREPCB(cs->_repcb));
d893fc2006-01-15Henrik Grubbström (Grubba)  pop_n_elems(args); push_object(this_object()); }
9ca8b82006-01-17Henrik Grubbström (Grubba) static void feed_gbke(struct std_cs_stor *cs, struct string_builder *sb, struct pike_string *str, struct pike_string *rep, struct svalue *repcb) { ptrdiff_t l = str->len; const struct gb18030e_info *e_info; switch(str->size_shift) { case 0: { p_wchar0 c, *p = STR0(str); while(l--) if((c=*p++)<=0x7f) string_builder_putchar(sb, c); else if ((e_info = get_gb18030e_info(c)) && (e_info->index < 0)) { /* Two bytes (ie GBK). */ int off = ~e_info->index + (c - e_info->ulow)*2; string_builder_putchar(sb, gb18030e_bytes[off]); string_builder_putchar(sb, gb18030e_bytes[off+1]); } else {
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_gbke, cs, str, p - STR0(str) - 1);
9ca8b82006-01-17Henrik Grubbström (Grubba)  } } break; case 1: { p_wchar1 c, *p = STR1(str); while(l--) if((c=*p++)<=0x7f) string_builder_putchar(sb, c); else if ((e_info = get_gb18030e_info(c)) && (e_info->index < 0)) { /* Two bytes (ie GBK). */ int off = ~e_info->index + (c - e_info->ulow)*2; string_builder_putchar(sb, gb18030e_bytes[off]); string_builder_putchar(sb, gb18030e_bytes[off+1]); } else {
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_gbke, cs, str, p - STR1(str) - 1);
9ca8b82006-01-17Henrik Grubbström (Grubba)  } } break; case 2: { p_wchar2 c, *p = STR2(str); while(l--) { if((c=*p++)<=0x7f) { string_builder_putchar(sb, c); } else if ((e_info = get_gb18030e_info(c)) && (e_info->index < 0)) { /* Two bytes (ie GBK). */ int off = ~e_info->index + (c - e_info->ulow)*2; string_builder_putchar(sb, gb18030e_bytes[off]); string_builder_putchar(sb, gb18030e_bytes[off+1]); } else {
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_gbke, cs, str, p - STR2(str) - 1);
9ca8b82006-01-17Henrik Grubbström (Grubba)  } } } break; } } static void f_feed_gbke(INT32 args) { struct pike_string *str;
50f0872017-04-08Martin Nilsson  struct std_cs_stor *cs = (struct std_cs_stor *)Pike_fp->current_storage;
9ca8b82006-01-17Henrik Grubbström (Grubba) 
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%W", &str);
9ca8b82006-01-17Henrik Grubbström (Grubba) 
f3201e2017-07-16Henrik Grubbström (Grubba)  feed_gbke(cs, &cs->strbuild, str, cs->replace, MKREPCB(cs->_repcb));
9ca8b82006-01-17Henrik Grubbström (Grubba)  pop_n_elems(args); push_object(this_object()); }
9c87942001-06-08Marcus Comstedt static void f_create_sjise(INT32 args) {
50f0872017-04-08Martin Nilsson  struct std16e_stor *s = (struct std16e_stor *)(Pike_fp->current_storage + std16e_stor_offs);
9c87942001-06-08Marcus Comstedt  int i, j, z; s->lowtrans = 0x5c; s->lo = 0x5c; s->hi = 0xfffd;
dc8d022014-04-27Martin Nilsson  s->revtab = xcalloc(s->hi-s->lo, sizeof(p_wchar1));
9c87942001-06-08Marcus Comstedt  for(z=0, i=33; i<=126; i++, z+=94) for(j=33; j<=126; j++) { UNICHAR c; if((c=map_JIS_C6226_1983[z+j-33])!=0xfffd && c>=s->lo) { if(i&1) s->revtab[c-s->lo]=(((i>>1)+(i<95? 113:177))<<8)|(j+(j<96? 31:32)); else s->revtab[c-s->lo]=(((i>>1)+(i<95? 112:176))<<8)|(j+126); } } for(i=0x5d; i<0x7e; i++) s->revtab[i-s->lo] = i; for(i=1; i<64; i++) s->revtab[i+0xff60-s->lo] = 0xa0+i; s->revtab[0xa5 - s->lo] = 0x5c; s->revtab[0x203e - s->lo] = 0x7e;
c0d5e42008-06-29Martin Stjernholm  /* Could use a program constant in this case, but that'd require a * quirky inherit structure. /mast */
50f0872017-04-08Martin Nilsson  REF_MAKE_CONST_STRING (*(struct pike_string **) (Pike_fp->current_storage +
c0d5e42008-06-29Martin Stjernholm  rfc_charset_name_offs),
6c8aa32015-11-16Martin Nilsson  "shiftjis");
f3201e2017-07-16Henrik Grubbström (Grubba)  f_cq__Charset_Std_CS_create(args);
9c87942001-06-08Marcus Comstedt } static void f_create_euce(INT32 args) {
50f0872017-04-08Martin Nilsson  struct std16e_stor *s = (struct std16e_stor *)(Pike_fp->current_storage + std16e_stor_offs);
9c87942001-06-08Marcus Comstedt  struct pike_string *str; int i, j, z, lo=0, hi=num_charset_def-1; UNICHAR const *table=NULL;
d103422018-08-05Martin Nilsson  check_all_args(NULL, args, BIT_STRING, BIT_STRING,
c0d5e42008-06-29Martin Stjernholm  BIT_STRING|BIT_VOID|BIT_INT,
9c87942001-06-08Marcus Comstedt  BIT_FUNCTION|BIT_VOID|BIT_INT, 0);
50f0872017-04-08Martin Nilsson  str = Pike_sp[-args].u.string;
9c87942001-06-08Marcus Comstedt  if(str->size_shift>0) hi = -1; while(lo<=hi) { int c, mid = (lo+hi)>>1; if((c = strcmp((char *)STR0(str), charset_map[mid].name))==0) { if(charset_map[mid].mode == MODE_9494) table = charset_map[mid].table; break; } if(c<0) hi=mid-1; else lo=mid+1; } if(table == NULL)
1ec5352005-12-08Marcus Comstedt  Pike_error("Unknown charset in EUCEnc\n");
9c87942001-06-08Marcus Comstedt  s->lowtrans = 128; s->lo = 128; s->hi = 128;
dc8d022014-04-27Martin Nilsson  s->revtab = xcalloc(65536-s->lo, sizeof(p_wchar1));
9c87942001-06-08Marcus Comstedt  for(z=0, i=33; i<=126; i++, z+=94) for(j=33; j<=126; j++) { UNICHAR c; if((c=table[z+j-33])!=0xfffd && c>=s->lo) { s->revtab[c-s->lo]=(i<<8)|j|0x8080; if(c>=s->hi) s->hi = c+1; } }
1ec5352005-12-08Marcus Comstedt  if(table == iso2022_9494[2]) { s->sshift = 1; table = iso2022_94[9]; for(j=33; j<=126; j++) { UNICHAR c; if((c=table[j-33])!=0xfffd && c>=s->lo && !s->revtab[c-s->lo]) { s->revtab[c-s->lo]=j; if(c>=s->hi) s->hi = c+1; } } table = iso2022_9494[4]; for(z=0, i=33; i<=126; i++, z+=94) for(j=33; j<=126; j++) { UNICHAR c; if((c=table[z+j-33])!=0xfffd && c>=s->lo && !s->revtab[c-s->lo]) { s->revtab[c-s->lo]=(i<<8)|j|0x8000; if(c>=s->hi) s->hi = c+1; } } }
50f0872017-04-08Martin Nilsson  copy_shared_string (*(struct pike_string **) (Pike_fp->current_storage +
c0d5e42008-06-29Martin Stjernholm  rfc_charset_name_offs),
50f0872017-04-08Martin Nilsson  Pike_sp[1-args].u.string);
c0d5e42008-06-29Martin Stjernholm 
f3201e2017-07-16Henrik Grubbström (Grubba)  f_cq__Charset_Std_CS_create(args-2);
65810c2017-12-28Martin Nilsson  pop_n_elems(2);
9c87942001-06-08Marcus Comstedt }
0e4d631998-11-16Marcus Comstedt static struct std8e_stor *push_std_8bite(int args, int allargs, int lo, int hi) { struct std8e_stor *s8;
c0d5e42008-06-29Martin Stjernholm  struct object *o = clone_object(std_8bite_program, args); allargs -= args; copy_shared_string (*(struct pike_string **) (o->storage + rfc_charset_name_offs),
50f0872017-04-08Martin Nilsson  Pike_sp[-allargs].u.string);
c0d5e42008-06-29Martin Stjernholm  pop_n_elems(allargs); push_object(o);
50f0872017-04-08Martin Nilsson  s8 = (struct std8e_stor *)(Pike_sp[-1].u.object->storage+std8e_stor_offs);
dc8d022014-04-27Martin Nilsson  s8->revtab = xcalloc(hi-lo, sizeof(p_wchar0));
0e4d631998-11-16Marcus Comstedt  s8->lo = lo; s8->hi = hi; s8->lowtrans = 0; return s8; }
f886061998-10-15Marcus Comstedt 
fecae61999-01-05Marcus Comstedt static struct std16e_stor *push_std_16bite(int args, int allargs, int lo, int hi) { struct std16e_stor *s16;
c0d5e42008-06-29Martin Stjernholm  struct object *o = clone_object(std_16bite_program, args); allargs -= args; copy_shared_string (*(struct pike_string **) (o->storage + rfc_charset_name_offs),
50f0872017-04-08Martin Nilsson  Pike_sp[-allargs].u.string);
c0d5e42008-06-29Martin Stjernholm  pop_n_elems(allargs); push_object(o);
50f0872017-04-08Martin Nilsson  s16 = (struct std16e_stor *)(Pike_sp[-1].u.object->storage+std16e_stor_offs);
dc8d022014-04-27Martin Nilsson  s16->revtab = xcalloc(hi-lo, sizeof(p_wchar1));
fecae61999-01-05Marcus Comstedt  s16->lo = lo; s16->hi = hi; s16->lowtrans = 0; return s16; }
7df2d12017-04-06Henrik Grubbström (Grubba) /*! @decl object rfc1345(string charset, int(0..1)|void encoder, string|void rep, @ *! function(string:string)|void repcb) *! *! Low-level charset codec factory. *!
27ac812017-07-25Pontus Östlund  *! @param charset
7df2d12017-04-06Henrik Grubbström (Grubba)  *! Canonical name of character set to look up. *!
27ac812017-07-25Pontus Östlund  *! @param encoder
7df2d12017-04-06Henrik Grubbström (Grubba)  *! Flag indicating that an encoder and not a decoder is wanted. *!
27ac812017-07-25Pontus Östlund  *! @param rep
7df2d12017-04-06Henrik Grubbström (Grubba)  *! String to use for characters not representable in the @[charset]. *! Only used for encoders. *!
27ac812017-07-25Pontus Östlund  *! @param repcb
7df2d12017-04-06Henrik Grubbström (Grubba)  *! Function to call for characters not representable in the @[charset]. *! Only used for encoders. *! *! This is the main entrypoint into the low-level @[_Charset] module. *! *| @returns *! Returns a suitable encoder or decoder on success and @expr{0@} *! (zero) on failure. *! *! @seealso *! @[Charset.encoder()], @[Charset.decoder()] */ PIKEFUN object rfc1345(string charset, int|void encoder, string|void rep, function(string:string)|void repcb)
f886061998-10-15Marcus Comstedt { struct pike_string *str; int lo=0, hi=num_charset_def-1;
bbeb442008-06-29Martin Stjernholm  p_wchar1 const *tabl;
f886061998-10-15Marcus Comstedt 
e155672017-04-07Henrik Grubbström (Grubba)  if(charset->size_shift) { push_int(0); return; }
f886061998-10-15Marcus Comstedt  while(lo<=hi) { int c, mid = (lo+hi)>>1;
e155672017-04-07Henrik Grubbström (Grubba)  if(!(c = strcmp((char *)STR0(charset), charset_map[mid].name))) {
36a00c2000-08-15Henrik Grubbström (Grubba)  struct program *p = NULL;
3c39ab1998-11-16Marcus Comstedt 
e155672017-04-07Henrik Grubbström (Grubba)  if(encoder && encoder->u.integer) {
d134da2008-06-29Martin Stjernholm  unsigned lowtrans = 0; int i, j, lo2=0, hi2=0, z, c;
0e4d631998-11-16Marcus Comstedt  switch(charset_map[mid].mode) { case MODE_94: lowtrans=lo=33; hi=126; break; case MODE_96: lowtrans=128; lo=160; hi=255; break;
fecae61999-01-05Marcus Comstedt  case MODE_9494: lowtrans=lo=lo2=33; hi=hi2=126; break; case MODE_9696: lowtrans=32; lo=lo2=160; hi=hi2=255; break;
1ee32f2001-06-06Stefan Wallström  case MODE_BIG5: lowtrans=32; lo=0xa1; lo2=0x40; hi=0xf9; hi2=0xfe; break;
0e4d631998-11-16Marcus Comstedt  default:
5aad932002-08-15Marcus Comstedt  Pike_fatal("Internal error in rfc1345\n");
0e4d631998-11-16Marcus Comstedt  }
13670c2015-05-25Martin Nilsson 
fecae61999-01-05Marcus Comstedt  if(hi2) { struct std16e_stor *s16;
7a492f1999-04-27Marcus Comstedt  s16 = push_std_16bite((args>2? args-2:0), args, lowtrans, 65536);
13670c2015-05-25Martin Nilsson 
fecae61999-01-05Marcus Comstedt  s16->lowtrans = lowtrans; s16->lo = lowtrans; s16->hi = lowtrans;
13670c2015-05-25Martin Nilsson 
fecae61999-01-05Marcus Comstedt  for(z=0, i=lo; i<=hi; i++, z+=(hi2-lo2+1)) for(j=lo2; j<=hi2; j++) if((c=charset_map[mid].table[z+j-lo2])!=0xfffd && c>=s16->lo) { s16->revtab[c-s16->lo]=(i<<8)|j; if(c>=s16->hi) s16->hi = c+1; } } else { struct std8e_stor *s8;
7a492f1999-04-27Marcus Comstedt  s8 = push_std_8bite((args>2? args-2:0), args, lowtrans, 65536);
13670c2015-05-25Martin Nilsson 
fecae61999-01-05Marcus Comstedt  s8->lowtrans = lowtrans; s8->lo = lowtrans; s8->hi = lowtrans;
13670c2015-05-25Martin Nilsson 
fecae61999-01-05Marcus Comstedt  for(i=lo; i<=hi; i++) if((c=charset_map[mid].table[i-lo])!=0xfffd && c>=s8->lo) { s8->revtab[c-s8->lo]=i; if(c>=s8->hi) s8->hi = c+1; } }
0e4d631998-11-16Marcus Comstedt  return; }
3c39ab1998-11-16Marcus Comstedt 
f886061998-10-15Marcus Comstedt  switch(charset_map[mid].mode) { case MODE_94: p = std_94_program; break; case MODE_96: p = std_96_program; break; case MODE_9494: p = std_9494_program; break; case MODE_9696: p = std_9696_program; break;
1ee32f2001-06-06Stefan Wallström  case MODE_BIG5: p = std_big5_program; break;
f886061998-10-15Marcus Comstedt  default:
5aad932002-08-15Marcus Comstedt  Pike_fatal("Internal error in rfc1345\n");
f886061998-10-15Marcus Comstedt  }
c0d5e42008-06-29Martin Stjernholm  { struct object *o = clone_object(p, 0); ((struct std_rfc_stor *)(o->storage+std_rfc_stor_offs)) ->table = charset_map[mid].table; copy_shared_string (*(struct pike_string **) (o->storage + rfc_charset_name_offs),
e155672017-04-07Henrik Grubbström (Grubba)  charset);
c0d5e42008-06-29Martin Stjernholm  push_object (o); }
f886061998-10-15Marcus Comstedt  return; } if(c<0) hi=mid-1; else lo=mid+1; }
e155672017-04-07Henrik Grubbström (Grubba)  if((tabl = misc_charset_lookup((char *)STR0(charset), &lo, &hi))) {
3c39ab1998-11-16Marcus Comstedt 
e155672017-04-07Henrik Grubbström (Grubba)  if(encoder && encoder->u.integer) {
0e4d631998-11-16Marcus Comstedt  struct std8e_stor *s8;
d134da2008-06-29Martin Stjernholm  int i, c;
0e4d631998-11-16Marcus Comstedt 
7a492f1999-04-27Marcus Comstedt  s8 = push_std_8bite((args>2? args-2:0), args, lo, 65536);
0e4d631998-11-16Marcus Comstedt  s8->lowtrans = lo; s8->lo = lo; s8->hi = lo;
e82da92008-07-03Henrik Grubbström (Grubba)  s8->zero_char = 0xfffd; for(i=lo; i<=hi; i++) {
0e4d631998-11-16Marcus Comstedt  if((c=tabl[i-lo])!=0xfffd && c>=s8->lo) { s8->revtab[c-lo]=i; if(c>=s8->hi) s8->hi = c+1; }
e82da92008-07-03Henrik Grubbström (Grubba)  } if (!lo && (c=tabl[0])!=0xfffd && c>=s8->lo) { /* Character 0x00 is a valid character in the encoding * for this character set (eg GSM 03.38). * * Note: We need to encode this character separately * due to 0x00 being used in revtab for the replacement * character. */ s8->zero_char = c; }
0e4d631998-11-16Marcus Comstedt  return; }
3c39ab1998-11-16Marcus Comstedt 
c0d5e42008-06-29Martin Stjernholm  { struct object *o = clone_object(std_8bit_program, 0); ((struct std_rfc_stor *)(o->storage+std_rfc_stor_offs)) ->table = (UNICHAR *)tabl; ((struct std_misc_stor *)(o->storage+std_misc_stor_offs)) ->lo = lo; ((struct std_misc_stor *)(o->storage+std_misc_stor_offs)) ->hi = hi; copy_shared_string (*(struct pike_string **) (o->storage + rfc_charset_name_offs),
e155672017-04-07Henrik Grubbström (Grubba)  charset);
c0d5e42008-06-29Martin Stjernholm  push_object(o); } return;
9db11a1998-11-05Marcus Comstedt  }
f886061998-10-15Marcus Comstedt  push_int(0); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_94(struct pike_string *str, struct std_cs_stor *s)
f886061998-10-15Marcus Comstedt {
3674bf2000-08-03Henrik Grubbström (Grubba)  UNICHAR const *table =
f886061998-10-15Marcus Comstedt  ((struct std_rfc_stor *)(((char*)s)+std_rfc_stor_offs))->table;
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
f886061998-10-15Marcus Comstedt  while(l--) { p_wchar0 x = *p++; if(x<=0x20 || x>=0x7f) string_builder_putchar(&s->strbuild, x);
4568672011-04-22Henrik Grubbström (Grubba)  else { UNICHAR uc = table[x-0x21]; if ((uc & 0xf800) == 0xd800) { /* We use the surrogate block as an offset after the 94 table * to a NUL-terminated string of UNICHARs, for the case where * the mapping doesn't fit in a single UNICHAR. */
357a522011-04-22Henrik Grubbström (Grubba)  string_builder_utf16_strcat(&s->strbuild, table + 94 + (uc & 0x07ff));
4568672011-04-22Henrik Grubbström (Grubba)  } else if (uc != 0xe000) string_builder_putchar(&s->strbuild, uc); }
f886061998-10-15Marcus Comstedt  } return 0; } static void f_feed_94(INT32 args) { f_std_feed(args, feed_94); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_96(struct pike_string *str, struct std_cs_stor *s)
f886061998-10-15Marcus Comstedt {
3674bf2000-08-03Henrik Grubbström (Grubba)  UNICHAR const *table =
f886061998-10-15Marcus Comstedt  ((struct std_rfc_stor *)(((char*)s)+std_rfc_stor_offs))->table;
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
f886061998-10-15Marcus Comstedt  while(l--) { p_wchar0 x = *p++; if(x<0xa0) string_builder_putchar(&s->strbuild, x);
4568672011-04-22Henrik Grubbström (Grubba)  else { UNICHAR uc = table[x-0xa0]; if ((uc & 0xf800) == 0xd800) { /* We use the surrogate block as an offset after the 96 table * to a NUL-terminated string of UNICHARs, for the case where * the mapping doesn't fit in a single UNICHAR. */
357a522011-04-22Henrik Grubbström (Grubba)  string_builder_utf16_strcat(&s->strbuild, table + 96 + (uc & 0x07ff));
4568672011-04-22Henrik Grubbström (Grubba)  } else if (uc != 0xe000) string_builder_putchar(&s->strbuild, table[x-0xa0]); }
f886061998-10-15Marcus Comstedt  } return 0; } static void f_feed_96(INT32 args) { f_std_feed(args, feed_96); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_9494(struct pike_string *str, struct std_cs_stor *s)
f886061998-10-15Marcus Comstedt {
3674bf2000-08-03Henrik Grubbström (Grubba)  UNICHAR const *table =
f886061998-10-15Marcus Comstedt  ((struct std_rfc_stor *)(((char*)s)+std_rfc_stor_offs))->table;
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
f886061998-10-15Marcus Comstedt  while(l--) { p_wchar0 y, x = (*p++)&0x7f; if(x<=0x20 || x>=0x7f) string_builder_putchar(&s->strbuild, x); else if(l==0) return 1; else if((y=(*p)&0x7f)>0x20 && y<0x7f) {
4568672011-04-22Henrik Grubbström (Grubba)  UNICHAR uc;
f886061998-10-15Marcus Comstedt  --l; p++;
4568672011-04-22Henrik Grubbström (Grubba)  uc = table[(x-0x21)*94+(y-0x21)]; if ((uc & 0xf800) == 0xd800) { /* We use the surrogate block as an offset after the 9494 table * to a NUL-terminated string of UNICHARs, for the case where * the mapping doesn't fit in a single UNICHAR. */
357a522011-04-22Henrik Grubbström (Grubba)  string_builder_utf16_strcat(&s->strbuild, table + 94*94 + (uc & 0x07ff));
4568672011-04-22Henrik Grubbström (Grubba)  } else if (uc != 0xe000) { string_builder_putchar(&s->strbuild, uc);
53ade32007-06-21Henrik Grubbström (Grubba)  }
f886061998-10-15Marcus Comstedt  } else { string_builder_putchar(&s->strbuild, x); } } return 0; } static void f_feed_9494(INT32 args) { f_std_feed(args, feed_9494); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_9696(struct pike_string *str, struct std_cs_stor *s)
f886061998-10-15Marcus Comstedt {
3674bf2000-08-03Henrik Grubbström (Grubba)  UNICHAR const *table =
f886061998-10-15Marcus Comstedt  ((struct std_rfc_stor *)(((char*)s)+std_rfc_stor_offs))->table;
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
f886061998-10-15Marcus Comstedt  while(l--) { p_wchar0 y, x = (*p++)&0x7f; if(x<0x20) string_builder_putchar(&s->strbuild, x); else if(l==0) return 1; else if((y=(*p)&0x7f)>=0x20) {
4568672011-04-22Henrik Grubbström (Grubba)  UNICHAR uc;
f886061998-10-15Marcus Comstedt  --l; p++;
4568672011-04-22Henrik Grubbström (Grubba)  uc = table[(x-0x20)*96+(y-0x20)]; if ((uc & 0xf800) == 0xd800) { /* We use the surrogate block as an offset after the 9696 table * to a NUL-terminated string of UNICHARs, for the case where * the mapping doesn't fit in a single UNICHAR. */
357a522011-04-22Henrik Grubbström (Grubba)  string_builder_utf16_strcat(&s->strbuild, table + 96*96 + (uc & 0x07ff));
4568672011-04-22Henrik Grubbström (Grubba)  } else if (uc != 0xe000) {
53ade32007-06-21Henrik Grubbström (Grubba)  string_builder_putchar(&s->strbuild, table[(x-0x20)*96+(y-0x20)]); }
f886061998-10-15Marcus Comstedt  } else { string_builder_putchar(&s->strbuild, x); } } return 0; } static void f_feed_9696(INT32 args) { f_std_feed(args, feed_9696); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_big5(struct pike_string *str, struct std_cs_stor *s)
1ee32f2001-06-06Stefan Wallström {
0d375e2001-06-07Henrik Grubbström (Grubba)  UNICHAR const *table =
1ee32f2001-06-06Stefan Wallström  ((struct std_rfc_stor *)(((char*)s)+std_rfc_stor_offs))->table;
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
1ee32f2001-06-06Stefan Wallström  while(l--) { p_wchar0 y, x = (*p++); if(x<0xa1 || x>0xf9 ) string_builder_putchar(&s->strbuild, x); else if(l==0) return 1; else if((y=(*p))>=0x40 && y<=0xfe ) { --l; p++; string_builder_putchar(&s->strbuild, table[(x-0xa1 )*(0xfe -0x40 +1)+(y-0x40 )]); } else { string_builder_putchar(&s->strbuild, x); } } return 0; } static void f_feed_big5(INT32 args) { f_std_feed(args, feed_big5); }
c0d5e42008-06-29Martin Stjernholm static ptrdiff_t feed_8bit(struct pike_string *str, struct std_cs_stor *s)
9db11a1998-11-05Marcus Comstedt {
3674bf2000-08-03Henrik Grubbström (Grubba)  UNICHAR const *table =
9db11a1998-11-05Marcus Comstedt  ((struct std_rfc_stor *)(((char*)s)+std_rfc_stor_offs))->table; struct std_misc_stor *misc = ((struct std_misc_stor *)(((char*)s)+std_misc_stor_offs)); int lo = misc->lo, hi = misc->hi;
c0d5e42008-06-29Martin Stjernholm  const p_wchar0 *p = STR0(str); ptrdiff_t l = str->len;
9db11a1998-11-05Marcus Comstedt  while(l--) { p_wchar0 x = *p++; if(x<lo || (x>0x7f && hi<=0x7f)) string_builder_putchar(&s->strbuild, x); else if(x>hi) string_builder_putchar(&s->strbuild, DEFCHAR); else string_builder_putchar(&s->strbuild, table[x-lo]); } return 0; } static void f_feed_8bit(INT32 args) { f_std_feed(args, feed_8bit); }
3c39ab1998-11-16Marcus Comstedt 
48656a1999-07-27Marcus Comstedt static void feed_utf8e(struct std_cs_stor *cs, struct string_builder *sb, struct pike_string *str, struct pike_string *rep, struct svalue *repcb)
3c39ab1998-11-16Marcus Comstedt {
f5a2cf2000-08-09Henrik Grubbström (Grubba)  ptrdiff_t l = str->len;
df28e62018-02-25Henrik Grubbström (Grubba)  ptrdiff_t new_len = str->len; if (!new_len) return;
3c39ab1998-11-16Marcus Comstedt  switch(str->size_shift) { case 0: { p_wchar0 c, *p = STR0(str); while(l--)
df28e62018-02-25Henrik Grubbström (Grubba)  if((*p++)>0x7f) new_len++; if (new_len == str->len) { string_builder_shared_strcat(sb, str); return; } l = str->len; p = STR0(str); string_build_mkspace(sb, new_len, 0); while(l--)
3c39ab1998-11-16Marcus Comstedt  if((c=*p++)<=0x7f) string_builder_putchar(sb, c); else { string_builder_putchar(sb, 0xc0|(c>>6));
13670c2015-05-25Martin Nilsson  string_builder_putchar(sb, 0x80|(c&0x3f));
3c39ab1998-11-16Marcus Comstedt  } } break; case 1: { p_wchar1 c, *p = STR1(str); while(l--) if((c=*p++)<=0x7f) string_builder_putchar(sb, c); else if(c<=0x7ff) { string_builder_putchar(sb, 0xc0|(c>>6));
13670c2015-05-25Martin Nilsson  string_builder_putchar(sb, 0x80|(c&0x3f));
690e292005-04-03Martin Stjernholm  } else if (c <= 0xd7ff || c >= 0xe000) {
3c39ab1998-11-16Marcus Comstedt  string_builder_putchar(sb, 0xe0|(c>>12)); string_builder_putchar(sb, 0x80|((c>>6)&0x3f));
13670c2015-05-25Martin Nilsson  string_builder_putchar(sb, 0x80|(c&0x3f));
690e292005-04-03Martin Stjernholm  } else
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_utf8e, cs, str, p - STR1(str) - 1);
3c39ab1998-11-16Marcus Comstedt  } break; case 2: { p_wchar2 c, *p = STR2(str);
690e292005-04-03Martin Stjernholm  while(l--) { if((c=*p++)<=0x7f) {
3c39ab1998-11-16Marcus Comstedt  string_builder_putchar(sb, c);
690e292005-04-03Martin Stjernholm  continue; }
3c39ab1998-11-16Marcus Comstedt  else if(c<=0x7ff) { string_builder_putchar(sb, 0xc0|(c>>6));
13670c2015-05-25Martin Nilsson  string_builder_putchar(sb, 0x80|(c&0x3f));
690e292005-04-03Martin Stjernholm  continue;
3c39ab1998-11-16Marcus Comstedt  } else if(c<=0xffff) {
690e292005-04-03Martin Stjernholm  if (c <= 0xd7ff || c >= 0xe000) { string_builder_putchar(sb, 0xe0|(c>>12)); string_builder_putchar(sb, 0x80|((c>>6)&0x3f)); string_builder_putchar(sb, 0x80|(c&0x3f)); continue; } } else if(c<=0x10ffff) {
3c39ab1998-11-16Marcus Comstedt  string_builder_putchar(sb, 0xf0|(c>>18)); string_builder_putchar(sb, 0x80|((c>>12)&0x3f)); string_builder_putchar(sb, 0x80|((c>>6)&0x3f));
13670c2015-05-25Martin Nilsson  string_builder_putchar(sb, 0x80|(c&0x3f));
690e292005-04-03Martin Stjernholm  continue; }
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_utf8e, cs, str, p - STR2(str) - 1);
690e292005-04-03Martin Stjernholm  }
3c39ab1998-11-16Marcus Comstedt  } break; } } static void f_feed_utf8e(INT32 args) { struct pike_string *str;
50f0872017-04-08Martin Nilsson  struct std_cs_stor *cs = (struct std_cs_stor *)Pike_fp->current_storage;
3c39ab1998-11-16Marcus Comstedt 
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%W", &str);
3c39ab1998-11-16Marcus Comstedt 
f3201e2017-07-16Henrik Grubbström (Grubba)  feed_utf8e(cs, &cs->strbuild, str, cs->replace, MKREPCB(cs->_repcb));
3c39ab1998-11-16Marcus Comstedt  pop_n_elems(args); push_object(this_object()); }
e89b612006-01-12Henrik Grubbström (Grubba) /* From http://www.unicode.org/reports/tr16/ * Table 2: Byte map from I8-sequence to UTF-EBCDIC byte sequence */ static const unsigned char i8_to_utf_ebcdic_conv[] = { 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x15, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, 0x40, 0x5a, 0x7f, 0x7b, 0x5b, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, 0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xad, 0xe0, 0xbd, 0x5f, 0x6d, 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0xa1, 0x07, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b, 0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3a, 0x3b, 0x04, 0x14, 0x3e, 0xff, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xaa, 0xab, 0xac, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbe, 0xbf, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe1, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, }; static void feed_utf_ebcdice(struct std_cs_stor *cs, struct string_builder *sb, struct pike_string *str, struct pike_string *rep, struct svalue *repcb) { ptrdiff_t l = str->len; switch(str->size_shift) { case 0: { p_wchar0 c, *p = STR0(str); while(l--) if((c=*p++)<=0x9f) string_builder_putchar(sb, i8_to_utf_ebcdic_conv[c]); else { string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xc0|(c>>5)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|(c&0x1f)]); } } break; case 1: { p_wchar1 c, *p = STR1(str); while(l--) if((c=*p++)<=0x9f) string_builder_putchar(sb, i8_to_utf_ebcdic_conv[c]); else if(c<=0x3ff) { string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xc0|(c>>5)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|(c&0x1f)]); } else if (c <= 0x3fff) { string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xe0|(c>>10)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|((c>>5)&0x1f)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|(c&0x1f)]); } else if (c <= 0xd7ff || c >= 0xe000) { string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xf0|(c>>15)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|((c>>10)&0x1f)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|((c>>5)&0x1f)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|(c&0x1f)]); } else
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_utf_ebcdice, cs, str, p - STR1(str) - 1);
e89b612006-01-12Henrik Grubbström (Grubba)  } break; case 2: { p_wchar2 c, *p = STR2(str); while(l--) { if((c=*p++)<=0x9f) { string_builder_putchar(sb, i8_to_utf_ebcdic_conv[c]); continue; } else if(c<=0x3ff) { string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xc0|(c>>5)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|(c&0x1f)]); continue; } else if(c<=0x3fff) { string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xe0|(c>>10)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|((c>>5)&0x1f)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|(c&0x1f)]); continue; } else if(c<=0x3ffff) { if (c <= 0xd7ff || c >= 0xe000) { string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xe0|(c>>15)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|((c>>10)&0x1f)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|((c>>5)&0x1f)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|(c&0x1f)]); continue; } } else if(c<=0x10ffff) { string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xf0|(c>>20)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|((c>>15)&0x1f)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|((c>>10)&0x1f)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|((c>>5)&0x1f)]); string_builder_putchar(sb, i8_to_utf_ebcdic_conv[0xa0|(c&0x1f)]); continue; }
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_utf_ebcdice, cs, str, p - STR2(str) - 1);
e89b612006-01-12Henrik Grubbström (Grubba)  } } break; } } static void f_feed_utf_ebcdice(INT32 args) { struct pike_string *str;
50f0872017-04-08Martin Nilsson  struct std_cs_stor *cs = (struct std_cs_stor *)Pike_fp->current_storage;
e89b612006-01-12Henrik Grubbström (Grubba) 
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%W", &str);
e89b612006-01-12Henrik Grubbström (Grubba) 
f3201e2017-07-16Henrik Grubbström (Grubba)  feed_utf_ebcdice(cs, &cs->strbuild, str, cs->replace, MKREPCB(cs->_repcb));
e89b612006-01-12Henrik Grubbström (Grubba)  pop_n_elems(args); push_object(this_object()); }
e8ebdb2001-05-10Henrik Grubbström (Grubba) static void feed_utf7_5e(struct std_cs_stor *cs, struct string_builder *sb, struct pike_string *str, struct pike_string *rep, struct svalue *repcb) { ptrdiff_t l = str->len; switch(str->size_shift) { case 0: { p_wchar0 c, *p = STR0(str); while(l--) if((c=*p++)<=0x7f) string_builder_putchar(sb, c); else { string_builder_putchar(sb, 0xa0|(c>>6));
13670c2015-05-25Martin Nilsson  string_builder_putchar(sb, 0xc0|(c&0x3f));
e8ebdb2001-05-10Henrik Grubbström (Grubba)  } } break; case 1: { p_wchar1 c, *p = STR1(str); while(l--) if((c=*p++)<=0x7f) string_builder_putchar(sb, c); else if(c<=0x3ff) { string_builder_putchar(sb, 0xa0|(c>>6));
13670c2015-05-25Martin Nilsson  string_builder_putchar(sb, 0xc0|(c&0x3f));
e8ebdb2001-05-10Henrik Grubbström (Grubba)  } else { string_builder_putchar(sb, 0xb0|(c>>12)); string_builder_putchar(sb, 0xc0|((c>>6)&0x3f));
13670c2015-05-25Martin Nilsson  string_builder_putchar(sb, 0xc0|(c&0x3f));
e8ebdb2001-05-10Henrik Grubbström (Grubba)  } } break; case 2: { p_wchar2 c, *p = STR2(str); while(l--) if((c=*p++)<=0x7f) string_builder_putchar(sb, c); else if(c<=0x3ff) { string_builder_putchar(sb, 0xa0|(c>>6));
13670c2015-05-25Martin Nilsson  string_builder_putchar(sb, 0xc0|(c&0x3f));
e8ebdb2001-05-10Henrik Grubbström (Grubba)  } else if(c<=0xffff) { string_builder_putchar(sb, 0xb0|(c>>12)); string_builder_putchar(sb, 0xc0|((c>>6)&0x3f));
13670c2015-05-25Martin Nilsson  string_builder_putchar(sb, 0xc0|(c&0x3f));
e8ebdb2001-05-10Henrik Grubbström (Grubba)  } else
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_utf7_5e, cs, str, p - STR2(str) - 1);
e8ebdb2001-05-10Henrik Grubbström (Grubba)  /* FIXME: Encode using surrogates? */ } break; } } static void f_feed_utf7_5e(INT32 args) { struct pike_string *str;
50f0872017-04-08Martin Nilsson  struct std_cs_stor *cs = (struct std_cs_stor *)Pike_fp->current_storage;
e8ebdb2001-05-10Henrik Grubbström (Grubba) 
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%W", &str);
e8ebdb2001-05-10Henrik Grubbström (Grubba) 
f3201e2017-07-16Henrik Grubbström (Grubba)  feed_utf7_5e(cs, &cs->strbuild, str, cs->replace, MKREPCB(cs->_repcb));
e8ebdb2001-05-10Henrik Grubbström (Grubba)  pop_n_elems(args); push_object(this_object()); }
5453451998-11-16Marcus Comstedt static void feed_utf7e(struct utf7_stor *u7, struct string_builder *sb,
48656a1999-07-27Marcus Comstedt  struct pike_string *str, struct pike_string *rep, struct svalue *repcb)
5453451998-11-16Marcus Comstedt {
d098b22000-08-09Henrik Grubbström (Grubba)  ptrdiff_t l = str->len; INT32 dat = u7->dat;
5453451998-11-16Marcus Comstedt  int shift = u7->shift, datbit = u7->datbit; switch(str->size_shift) { case 0: { p_wchar0 c, *p = STR0(str); while(l--)
77608b2001-10-07Marcus Comstedt  if(((c=*p++)>=32 && c<=125 && c!=43 && c!=92) || c==9 || c==10 || c==13) {
5453451998-11-16Marcus Comstedt  if(shift) { if(datbit) { string_builder_putchar(sb, fwd64t[dat<<(6-datbit)]); dat=0; datbit=0; } if(c>='+' && c<='z' && rev64t[c-'+']>=0) string_builder_putchar(sb, '-');
13670c2015-05-25Martin Nilsson  shift = 0;
5453451998-11-16Marcus Comstedt  } string_builder_putchar(sb, c); } else if(c==43 && !shift) { string_builder_putchar(sb, '+'); string_builder_putchar(sb, '-'); } else { if(!shift) { string_builder_putchar(sb, '+'); shift = 1; } dat=(dat<<16)|c; string_builder_putchar(sb, fwd64t[dat>>(datbit+10)]); string_builder_putchar(sb, fwd64t[(dat>>(datbit+4))&0x3f]); if((datbit+=4)>=6) { string_builder_putchar(sb, fwd64t[(dat>>(datbit-6))&0x3f]); datbit-=6; } dat&=(1<<datbit)-1; } } break; case 1: { p_wchar1 c, *p = STR1(str); while(l--)
77608b2001-10-07Marcus Comstedt  if(((c=*p++)>=32 && c<=125 && c!=43 && c!=92) || c==9 || c==10 || c==13) {
5453451998-11-16Marcus Comstedt  if(shift) { if(datbit) { string_builder_putchar(sb, fwd64t[dat<<(6-datbit)]); dat=0; datbit=0; } if(c>='+' && c<='z' && rev64t[c-'+']>=0) string_builder_putchar(sb, '-');
13670c2015-05-25Martin Nilsson  shift = 0;
5453451998-11-16Marcus Comstedt  } string_builder_putchar(sb, c); } else if(c==43 && !shift) { string_builder_putchar(sb, '+'); string_builder_putchar(sb, '-'); } else { if(!shift) { string_builder_putchar(sb, '+'); shift = 1; } dat=(dat<<16)|c; string_builder_putchar(sb, fwd64t[dat>>(datbit+10)]); string_builder_putchar(sb, fwd64t[(dat>>(datbit+4))&0x3f]); if((datbit+=4)>=6) { string_builder_putchar(sb, fwd64t[(dat>>(datbit-6))&0x3f]); datbit-=6; } dat&=(1<<datbit)-1; } } break; case 2: { p_wchar2 c, *p = STR2(str); while(l--)
77608b2001-10-07Marcus Comstedt  if(((c=*p++)>=32 && c<=125 && c!=43 && c!=92) || c==9 || c==10 || c==13) {
5453451998-11-16Marcus Comstedt  if(shift) { if(datbit) { string_builder_putchar(sb, fwd64t[dat<<(6-datbit)]); dat=0; datbit=0; } if(c>='+' && c<='z' && rev64t[c-'+']>=0) string_builder_putchar(sb, '-');
13670c2015-05-25Martin Nilsson  shift = 0;
5453451998-11-16Marcus Comstedt  } string_builder_putchar(sb, c); } else if(c==43 && !shift) { string_builder_putchar(sb, '+'); string_builder_putchar(sb, '-'); } else if(c>0x10ffff) {
48656a1999-07-27Marcus Comstedt  u7->dat = dat; u7->shift = shift; u7->datbit = datbit;
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_utf7e, u7, str, p - STR2(str) - 1);
48656a1999-07-27Marcus Comstedt  dat = u7->dat; shift = u7->shift; datbit = u7->datbit;
5453451998-11-16Marcus Comstedt  } else { if(!shift) { string_builder_putchar(sb, '+'); shift = 1; } if(c>0xffff) { dat=(dat<<16)|(0xd800+(c>>10)-64); string_builder_putchar(sb, fwd64t[dat>>(datbit+10)]); string_builder_putchar(sb, fwd64t[(dat>>(datbit+4))&0x3f]); if((datbit+=4)>=6) { string_builder_putchar(sb, fwd64t[(dat>>(datbit-6))&0x3f]); datbit-=6; } dat&=(1<<datbit)-1; c=0xdc00+(c&1023); } dat=(dat<<16)|c; string_builder_putchar(sb, fwd64t[dat>>(datbit+10)]); string_builder_putchar(sb, fwd64t[(dat>>(datbit+4))&0x3f]); if((datbit+=4)>=6) { string_builder_putchar(sb, fwd64t[(dat>>(datbit-6))&0x3f]); datbit-=6; } dat&=(1<<datbit)-1; } } break; } u7->dat = dat; u7->shift = shift; u7->datbit = datbit; } static void f_feed_utf7e(INT32 args) { struct pike_string *str;
50f0872017-04-08Martin Nilsson  struct std_cs_stor *cs = (struct std_cs_stor *)Pike_fp->current_storage;
5453451998-11-16Marcus Comstedt 
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%W", &str);
5453451998-11-16Marcus Comstedt 
50f0872017-04-08Martin Nilsson  feed_utf7e((struct utf7_stor *)(((char*)Pike_fp->current_storage)+utf7_stor_offs),
f3201e2017-07-16Henrik Grubbström (Grubba)  &cs->strbuild, str, cs->replace, MKREPCB(cs->_repcb));
5453451998-11-16Marcus Comstedt  pop_n_elems(args); push_object(this_object()); } static void f_drain_utf7e(INT32 args) {
50f0872017-04-08Martin Nilsson  struct std_cs_stor *cs = (struct std_cs_stor *)Pike_fp->current_storage;
5453451998-11-16Marcus Comstedt  struct utf7_stor *u7 =
50f0872017-04-08Martin Nilsson  (struct utf7_stor *)(Pike_fp->current_storage+utf7_stor_offs);
5453451998-11-16Marcus Comstedt  if(u7->shift) { if(u7->datbit) { string_builder_putchar(&cs->strbuild, fwd64t[u7->dat<<(6-u7->datbit)]); u7->dat=0; u7->datbit=0; } string_builder_putchar(&cs->strbuild, '-');
13670c2015-05-25Martin Nilsson  u7->shift = 0;
5453451998-11-16Marcus Comstedt  }
f3201e2017-07-16Henrik Grubbström (Grubba)  f_cq__Charset_Std_CS_drain(args);
5453451998-11-16Marcus Comstedt }
74dfe82012-12-30Jonas Walldén static void std_8bite_init_stor(struct object *UNUSED(o))
0e4d631998-11-16Marcus Comstedt { struct std8e_stor *s8 =
50f0872017-04-08Martin Nilsson  (struct std8e_stor *)(Pike_fp->current_storage+std8e_stor_offs);
0e4d631998-11-16Marcus Comstedt  s8->revtab = NULL; s8->lowtrans = 32; s8->lo = 0; s8->hi = 0;
e82da92008-07-03Henrik Grubbström (Grubba)  s8->zero_char = 0xfffd;
0e4d631998-11-16Marcus Comstedt }
74dfe82012-12-30Jonas Walldén static void std_8bite_exit_stor(struct object *UNUSED(o))
0e4d631998-11-16Marcus Comstedt { struct std8e_stor *s8 =
50f0872017-04-08Martin Nilsson  (struct std8e_stor *)(Pike_fp->current_storage+std8e_stor_offs);
0e4d631998-11-16Marcus Comstedt 
40338f2017-07-10Martin Nilsson  if(s8->revtab)
0e4d631998-11-16Marcus Comstedt  free(s8->revtab); } static void feed_std8e(struct std8e_stor *s8, struct string_builder *sb,
48656a1999-07-27Marcus Comstedt  struct pike_string *str, struct pike_string *rep, struct svalue *repcb)
0e4d631998-11-16Marcus Comstedt {
f5a2cf2000-08-09Henrik Grubbström (Grubba)  ptrdiff_t l = str->len;
0e4d631998-11-16Marcus Comstedt  p_wchar0 *tab = s8->revtab;
d134da2008-06-29Martin Stjernholm  unsigned lowtrans = s8->lowtrans; int lo = s8->lo, hi = s8->hi;
0e4d631998-11-16Marcus Comstedt  p_wchar0 ch; switch(str->size_shift) { case 0: { p_wchar0 c, *p = STR0(str); while(l--) if((c=*p++)<lowtrans) string_builder_putchar(sb, c); else if(c>=lo && c<hi && (ch=tab[c-lo])!=0) string_builder_putchar(sb, ch);
e82da92008-07-03Henrik Grubbström (Grubba)  else if (!lo && (c == s8->zero_char)) string_builder_putchar(sb, 0);
0e4d631998-11-16Marcus Comstedt  else
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_std8e, s8, str, p - STR0(str) - 1);
0e4d631998-11-16Marcus Comstedt  } break; case 1: { p_wchar1 c, *p = STR1(str); while(l--) if((c=*p++)<lowtrans) string_builder_putchar(sb, c); else if(c>=lo && c<hi && (ch=tab[c-lo])!=0) string_builder_putchar(sb, ch);
e82da92008-07-03Henrik Grubbström (Grubba)  else if (!lo && (c == s8->zero_char) && (c != 0xfffd)) string_builder_putchar(sb, 0);
0e4d631998-11-16Marcus Comstedt  else
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_std8e, s8, str, p - STR1(str) - 1);
0e4d631998-11-16Marcus Comstedt  } break; case 2: { p_wchar2 c, *p = STR2(str); while(l--)
6fefa82008-06-29Marcus Comstedt  if((unsigned INT32) (c=*p++)<lowtrans)
0e4d631998-11-16Marcus Comstedt  string_builder_putchar(sb, c); else if(c>=lo && c<hi && (ch=tab[c-lo])!=0) string_builder_putchar(sb, ch);
e82da92008-07-03Henrik Grubbström (Grubba)  else if (!lo && (c == s8->zero_char) && (c != 0xfffd)) string_builder_putchar(sb, 0);
0e4d631998-11-16Marcus Comstedt  else
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_std8e, s8, str, p - STR2(str) - 1);
0e4d631998-11-16Marcus Comstedt  } break; } } static void f_feed_std8e(INT32 args) { struct pike_string *str;
50f0872017-04-08Martin Nilsson  struct std_cs_stor *cs = (struct std_cs_stor *)Pike_fp->current_storage;
0e4d631998-11-16Marcus Comstedt 
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%W", &str);
0e4d631998-11-16Marcus Comstedt 
50f0872017-04-08Martin Nilsson  feed_std8e((struct std8e_stor *)(((char*)Pike_fp->current_storage)+
0e4d631998-11-16Marcus Comstedt  std8e_stor_offs),
f3201e2017-07-16Henrik Grubbström (Grubba)  &cs->strbuild, str, cs->replace, MKREPCB(cs->_repcb));
0e4d631998-11-16Marcus Comstedt  pop_n_elems(args); push_object(this_object()); }
74dfe82012-12-30Jonas Walldén static void std_16bite_init_stor(struct object *UNUSED(o))
fecae61999-01-05Marcus Comstedt { struct std16e_stor *s16 =
50f0872017-04-08Martin Nilsson  (struct std16e_stor *)(Pike_fp->current_storage+std16e_stor_offs);
fecae61999-01-05Marcus Comstedt  s16->revtab = NULL; s16->lowtrans = 32; s16->lo = 0; s16->hi = 0;
1ec5352005-12-08Marcus Comstedt  s16->sshift = 0;
fecae61999-01-05Marcus Comstedt }
74dfe82012-12-30Jonas Walldén static void std_16bite_exit_stor(struct object *UNUSED(o))
fecae61999-01-05Marcus Comstedt { struct std16e_stor *s16 =
50f0872017-04-08Martin Nilsson  (struct std16e_stor *)(Pike_fp->current_storage+std16e_stor_offs);
fecae61999-01-05Marcus Comstedt 
40338f2017-07-10Martin Nilsson  if(s16->revtab)
fecae61999-01-05Marcus Comstedt  free(s16->revtab); } static void feed_std16e(struct std16e_stor *s16, struct string_builder *sb,
48656a1999-07-27Marcus Comstedt  struct pike_string *str, struct pike_string *rep, struct svalue *repcb)
fecae61999-01-05Marcus Comstedt {
f5a2cf2000-08-09Henrik Grubbström (Grubba)  ptrdiff_t l = str->len;
fecae61999-01-05Marcus Comstedt  p_wchar1 *tab = s16->revtab;
d134da2008-06-29Martin Stjernholm  unsigned lowtrans = s16->lowtrans; int lo = s16->lo, hi = s16->hi;
1ec5352005-12-08Marcus Comstedt  int sshift = s16->sshift;
fecae61999-01-05Marcus Comstedt  p_wchar1 ch; switch(str->size_shift) { case 0: { p_wchar0 c, *p = STR0(str); while(l--) if((c=*p++)<lowtrans) string_builder_putchar(sb, c); else if(c>=lo && c<hi && (ch=tab[c-lo])!=0) {
1ec5352005-12-08Marcus Comstedt  if(sshift && !(ch & 0x80)) { ch |= 0x80; string_builder_putchar(sb, (ch > 0xff? 0x8f : 0x8e)); }
9c87942001-06-08Marcus Comstedt  if(ch > 0xff) string_builder_putchar(sb, (ch>>8)&0xff);
fecae61999-01-05Marcus Comstedt  string_builder_putchar(sb, ch&0xff);
48656a1999-07-27Marcus Comstedt  } else
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_std16e, s16, str, p - STR0(str) - 1);
fecae61999-01-05Marcus Comstedt  } break; case 1: { p_wchar1 c, *p = STR1(str); while(l--) if((c=*p++)<lowtrans) string_builder_putchar(sb, c); else if(c>=lo && c<hi && (ch=tab[c-lo])!=0) {
1ec5352005-12-08Marcus Comstedt  if(sshift && !(ch & 0x80)) { ch |= 0x80; string_builder_putchar(sb, (ch > 0xff? 0x8f : 0x8e)); }
9c87942001-06-08Marcus Comstedt  if(ch > 0xff) string_builder_putchar(sb, (ch>>8)&0xff);
fecae61999-01-05Marcus Comstedt  string_builder_putchar(sb, ch&0xff);
48656a1999-07-27Marcus Comstedt  } else
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_std16e, s16, str, p - STR1(str) - 1);
fecae61999-01-05Marcus Comstedt  } break; case 2: { p_wchar2 c, *p = STR2(str); while(l--)
6fefa82008-06-29Marcus Comstedt  if((unsigned INT32) (c=*p++)<lowtrans)
fecae61999-01-05Marcus Comstedt  string_builder_putchar(sb, c); else if(c>=lo && c<hi && (ch=tab[c-lo])!=0) {
1ec5352005-12-08Marcus Comstedt  if(sshift && !(ch & 0x80)) { ch |= 0x80; string_builder_putchar(sb, (ch > 0xff? 0x8f : 0x8e)); }
9c87942001-06-08Marcus Comstedt  if(ch > 0xff) string_builder_putchar(sb, (ch>>8)&0xff);
fecae61999-01-05Marcus Comstedt  string_builder_putchar(sb, ch&0xff);
48656a1999-07-27Marcus Comstedt  } else
c0d5e42008-06-29Martin Stjernholm  REPLACE_CHAR(c, feed_std16e, s16, str, p - STR2(str) - 1);
fecae61999-01-05Marcus Comstedt  } break; } } static void f_feed_std16e(INT32 args) { struct pike_string *str;
50f0872017-04-08Martin Nilsson  struct std_cs_stor *cs = (struct std_cs_stor *)Pike_fp->current_storage;
fecae61999-01-05Marcus Comstedt 
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%W", &str);
fecae61999-01-05Marcus Comstedt 
50f0872017-04-08Martin Nilsson  feed_std16e((struct std16e_stor *)(((char*)Pike_fp->current_storage)+
fecae61999-01-05Marcus Comstedt  std16e_stor_offs),
f3201e2017-07-16Henrik Grubbström (Grubba)  &cs->strbuild, str, cs->replace, MKREPCB(cs->_repcb));
fecae61999-01-05Marcus Comstedt  pop_n_elems(args); push_object(this_object()); }
0e4d631998-11-16Marcus Comstedt 
51ef5c2002-10-21Marcus Comstedt PIKE_MODULE_INIT
f886061998-10-15Marcus Comstedt {
b6c29f2011-04-16Henrik Grubbström (Grubba)  int i,n;
f886061998-10-15Marcus Comstedt  struct svalue prog;
b6c29f2011-04-16Henrik Grubbström (Grubba)  static p_wchar1 doubles_first_char[] = { 0x0308, 0x0313, 0x0314 }; static p_wchar1 doubles_second_char[] = { 0x0300, 0x0301, 0x0342 }; p_wchar1 double_char[2]; /* Handling of double non-spacing characters used by eg ISO-IR-31. */ n = 0; for(i = 0xe100; i < 0xe130; i += 0x0010) { int j; for(j = 0; j < 3; j++,n++) { p_wchar1 c = i+j; push_string(make_shared_binary_string1(&c, 1)); } } double_custom_chars = aggregate_array(n); n = 0; for (i = 0; i < 3; i++) { int j; double_char[0] = doubles_first_char[i]; for (j = 0; j < 3; j++,n++) { double_char[1] = doubles_second_char[j]; push_string(make_shared_binary_string1(double_char, 2)); } } double_combiner_chars = aggregate_array(n);
f5955c1999-02-23Marcus Comstedt  iso2022_init();
f886061998-10-15Marcus Comstedt 
ccc70d2017-04-05Henrik Grubbström (Grubba)  INIT;
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL(prog, T_PROGRAM, 0, program, std_cs_program);
f886061998-10-15Marcus Comstedt  memset(rev64t, -1, sizeof(rev64t)); for(i=0; i<64; i++)
5453451998-11-16Marcus Comstedt  rev64t[fwd64t[i]-'+']=i;
f886061998-10-15Marcus Comstedt  start_new_program(); do_inherit(&prog, 0, NULL);
90e9781999-01-31Fredrik Hübinette (Hubbe)  utf7_stor_offs = ADD_STORAGE(struct utf7_stor);
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "utf7", 0);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_utf7,tFunc(tStr,tObj), 0); /* function(:object) */
07228a1999-06-19Fredrik Hübinette (Hubbe)  ADD_FUNCTION("clear", f_clear_utf7,tFunc(tNone,tObj), 0);
f886061998-10-15Marcus Comstedt  set_init_callback(utf7_init_stor);
ecc9382008-06-29Martin Nilsson  add_program_constant("UTF7dec", utf7_program = end_program(), ID_PROTECTED|ID_FINAL);
f886061998-10-15Marcus Comstedt  start_new_program(); do_inherit(&prog, 0, NULL);
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "utf8", 0);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_utf8,tFunc(tStr,tObj), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("UTF8dec", utf8_program = end_program(), ID_PROTECTED|ID_FINAL);
3c39ab1998-11-16Marcus Comstedt 
5453451998-11-16Marcus Comstedt  prog.u.program = utf7_program;
3c39ab1998-11-16Marcus Comstedt  start_new_program(); do_inherit(&prog, 0, NULL);
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "utf7", 0);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_utf7e,tFunc(tStr,tObj), 0); /* function(:string) */
07228a1999-06-19Fredrik Hübinette (Hubbe)  ADD_FUNCTION("drain", f_drain_utf7e,tFunc(tNone,tStr), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("UTF7enc", utf7e_program = end_program(), ID_PROTECTED|ID_FINAL);
5453451998-11-16Marcus Comstedt  prog.u.program = std_cs_program;
3c39ab1998-11-16Marcus Comstedt  start_new_program(); do_inherit(&prog, 0, NULL);
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "utf8", 0);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_utf8e,tFunc(tStr,tObj), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("UTF8enc", utf8e_program = end_program(), ID_PROTECTED|ID_FINAL);
f886061998-10-15Marcus Comstedt  start_new_program(); do_inherit(&prog, 0, NULL);
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "utfebcdic", 0);
e8ebdb2001-05-10Henrik Grubbström (Grubba)  /* function(string:object) */
e89b612006-01-12Henrik Grubbström (Grubba)  ADD_FUNCTION("feed", f_feed_utf_ebcdic,tFunc(tStr,tObj), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("UTF_EBCDICdec", utf_ebcdic_program = end_program(), ID_PROTECTED|ID_FINAL);
e89b612006-01-12Henrik Grubbström (Grubba)  start_new_program(); do_inherit(&prog, 0, NULL);
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "utfebcdic", 0);
e89b612006-01-12Henrik Grubbström (Grubba)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_utf_ebcdice,tFunc(tStr,tObj), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("UTF_EBCDICenc", utf_ebcdice_program = end_program(), ID_PROTECTED|ID_FINAL);
e89b612006-01-12Henrik Grubbström (Grubba)  start_new_program(); do_inherit(&prog, 0, NULL); /* function(string:object) */
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "utf75", 0);
e8ebdb2001-05-10Henrik Grubbström (Grubba)  ADD_FUNCTION("feed", f_feed_utf7_5,tFunc(tStr,tObj), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("UTF7_5dec", utf7_5_program = end_program(), ID_PROTECTED|ID_FINAL);
e8ebdb2001-05-10Henrik Grubbström (Grubba)  start_new_program(); do_inherit(&prog, 0, NULL);
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "utf75", 0);
e8ebdb2001-05-10Henrik Grubbström (Grubba)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_utf7_5e,tFunc(tStr,tObj), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("UTF7_5enc", utf7_5e_program = end_program(), ID_PROTECTED|ID_FINAL);
e8ebdb2001-05-10Henrik Grubbström (Grubba)  start_new_program(); do_inherit(&prog, 0, NULL);
eeccd82001-06-05Marcus Comstedt  euc_stor_offs = ADD_STORAGE(struct euc_stor);
c0d5e42008-06-29Martin Stjernholm  PIKE_MAP_VARIABLE ("charset", euc_stor_offs + OFFSETOF (euc_stor, name), tStr, T_STRING, 0);
eeccd82001-06-05Marcus Comstedt  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_euc,tFunc(tStr,tObj), 0);
c0d5e42008-06-29Martin Stjernholm  /* function(string,string:) */ ADD_FUNCTION("create", f_create_euc,tFunc(tStr tStr,tVoid), ID_PROTECTED);
ecc9382008-06-29Martin Nilsson  add_program_constant("EUCDec", euc_program = end_program(), ID_PROTECTED|ID_FINAL);
eeccd82001-06-05Marcus Comstedt  start_new_program(); do_inherit(&prog, 0, NULL);
3337b22004-08-17Martin Nilsson  multichar_stor_offs = ADD_STORAGE(struct multichar_stor);
c0d5e42008-06-29Martin Stjernholm  PIKE_MAP_VARIABLE ("charset", multichar_stor_offs + OFFSETOF (multichar_stor, name), tStr, T_STRING, 0);
ecc9382008-06-29Martin Nilsson  ADD_FUNCTION("create", f_create_multichar,tFunc(tStr,tVoid), ID_PROTECTED);
3337b22004-08-17Martin Nilsson  ADD_FUNCTION("feed", f_feed_multichar,tFunc(tStr,tObj), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("MulticharDec", multichar_program = end_program(), ID_PROTECTED|ID_FINAL);
f75dda2004-07-25Martin Nilsson  start_new_program(); do_inherit(&prog, 0, NULL);
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "gb18030", 0);
eeccd82001-06-05Marcus Comstedt  /* function(string:object) */
d893fc2006-01-15Henrik Grubbström (Grubba)  ADD_FUNCTION("feed", f_feed_gb18030e,tFunc(tStr,tObj), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("GB18030Enc", gb18030e_program = end_program(), ID_PROTECTED|ID_FINAL);
d893fc2006-01-15Henrik Grubbström (Grubba)  start_new_program(); do_inherit(&prog, 0, NULL);
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "gbk", 0);
d893fc2006-01-15Henrik Grubbström (Grubba)  /* function(string:object) */
9ca8b82006-01-17Henrik Grubbström (Grubba)  ADD_FUNCTION("feed", f_feed_gbke,tFunc(tStr,tObj), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("GBKenc", gbke_program = end_program(), ID_PROTECTED|ID_FINAL);
9ca8b82006-01-17Henrik Grubbström (Grubba)  start_new_program(); do_inherit(&prog, 0, NULL);
c0d5e42008-06-29Martin Stjernholm  add_string_constant ("charset", "shiftjis", 0);
9ca8b82006-01-17Henrik Grubbström (Grubba)  /* function(string:object) */
eeccd82001-06-05Marcus Comstedt  ADD_FUNCTION("feed", f_feed_sjis,tFunc(tStr,tObj), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("ShiftJisDec", sjis_program = end_program(), ID_PROTECTED|ID_FINAL);
eeccd82001-06-05Marcus Comstedt  start_new_program();
c0d5e42008-06-29Martin Stjernholm  do_inherit (&prog, 0, NULL); rfc_charset_name_offs = ADD_STORAGE (struct pike_string *); PIKE_MAP_VARIABLE ("charset", rfc_charset_name_offs, tStr, T_STRING, 0); rfc_base_program = end_program(); prog.u.program = rfc_base_program; start_new_program();
eeccd82001-06-05Marcus Comstedt  do_inherit(&prog, 0, NULL);
90e9781999-01-31Fredrik Hübinette (Hubbe)  std8e_stor_offs = ADD_STORAGE(struct std8e_stor);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_std8e,tFunc(tStr,tObj), 0);
0e4d631998-11-16Marcus Comstedt  set_init_callback(std_8bite_init_stor); set_exit_callback(std_8bite_exit_stor); std_8bite_program = end_program(); start_new_program(); do_inherit(&prog, 0, NULL);
90e9781999-01-31Fredrik Hübinette (Hubbe)  std16e_stor_offs = ADD_STORAGE(struct std16e_stor);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_std16e,tFunc(tStr,tObj), 0);
fecae61999-01-05Marcus Comstedt  set_init_callback(std_16bite_init_stor); set_exit_callback(std_16bite_exit_stor); std_16bite_program = end_program(); start_new_program(); do_inherit(&prog, 0, NULL);
90e9781999-01-31Fredrik Hübinette (Hubbe)  std_rfc_stor_offs = ADD_STORAGE(struct std_rfc_stor);
3edf2a2007-06-19Henrik Grubbström (Grubba)  ADD_FUNCTION("drain", f_drain_rfc1345, tFunc(tNone,tStr), 0);
f886061998-10-15Marcus Comstedt  std_rfc_program = end_program();
9c87942001-06-08Marcus Comstedt  prog.u.program = std_16bite_program; start_new_program(); do_inherit(&prog, 0, NULL);
c0d5e42008-06-29Martin Stjernholm  /* function(string,string,string|void,function(string:string)|void:void) */ ADD_FUNCTION("create", f_create_euce,tFunc(tStr tStr tOr(tStr,tVoid) tOr(tFunc(tStr,tStr),tVoid),tVoid), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("EUCEnc", euce_program = end_program(), ID_PROTECTED|ID_FINAL);
9c87942001-06-08Marcus Comstedt  start_new_program(); do_inherit(&prog, 0, NULL); /* function(string|void,function(string:string)|void:void) */ ADD_FUNCTION("create", f_create_sjise,tFunc(tOr(tStr,tVoid) tOr(tFunc(tStr,tStr),tVoid),tVoid), 0);
ecc9382008-06-29Martin Nilsson  add_program_constant("ShiftJisEnc", sjise_program = end_program(), ID_PROTECTED|ID_FINAL);
9c87942001-06-08Marcus Comstedt 
f886061998-10-15Marcus Comstedt  prog.u.program = std_rfc_program; start_new_program(); do_inherit(&prog, 0, NULL);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_94,tFunc(tStr,tObj), 0);
f886061998-10-15Marcus Comstedt  std_94_program = end_program(); start_new_program(); do_inherit(&prog, 0, NULL);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_96,tFunc(tStr,tObj), 0);
f886061998-10-15Marcus Comstedt  std_96_program = end_program(); start_new_program(); do_inherit(&prog, 0, NULL);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_9494,tFunc(tStr,tObj), 0);
f886061998-10-15Marcus Comstedt  std_9494_program = end_program(); start_new_program(); do_inherit(&prog, 0, NULL);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_9696,tFunc(tStr,tObj), 0);
f886061998-10-15Marcus Comstedt  std_9696_program = end_program();
9db11a1998-11-05Marcus Comstedt  start_new_program(); do_inherit(&prog, 0, NULL);
1ee32f2001-06-06Stefan Wallström  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_big5,tFunc(tStr,tObj), 0); std_big5_program = end_program(); start_new_program(); do_inherit(&prog, 0, NULL);
90e9781999-01-31Fredrik Hübinette (Hubbe)  std_misc_stor_offs = ADD_STORAGE(struct std_misc_stor);
45ee5d1999-02-10Fredrik Hübinette (Hubbe)  /* function(string:object) */ ADD_FUNCTION("feed", f_feed_8bit,tFunc(tStr,tObj), 0);
9db11a1998-11-05Marcus Comstedt  std_8bit_program = end_program();
c0d5e42008-06-29Martin Stjernholm  PIKE_MODULE_EXPORT (_Charset, transcode_error_va);
f886061998-10-15Marcus Comstedt }
51ef5c2002-10-21Marcus Comstedt PIKE_MODULE_EXIT
f886061998-10-15Marcus Comstedt {
ccc70d2017-04-05Henrik Grubbström (Grubba)  EXIT;
3c39ab1998-11-16Marcus Comstedt  if(utf7e_program != NULL) free_program(utf7e_program); if(utf8e_program != NULL) free_program(utf8e_program);
f886061998-10-15Marcus Comstedt  if(utf7_program != NULL) free_program(utf7_program); if(utf8_program != NULL) free_program(utf8_program);
e89b612006-01-12Henrik Grubbström (Grubba)  if(utf_ebcdic_program != NULL) free_program(utf_ebcdic_program); if(utf_ebcdice_program != NULL) free_program(utf_ebcdice_program);
e8ebdb2001-05-10Henrik Grubbström (Grubba)  if(utf7_5_program != NULL) free_program(utf7_5_program); if(utf7_5e_program != NULL) free_program(utf7_5e_program);
eeccd82001-06-05Marcus Comstedt  if(euc_program != NULL) free_program(euc_program); if(sjis_program != NULL) free_program(sjis_program);
9c87942001-06-08Marcus Comstedt  if(euce_program != NULL) free_program(euce_program); if(sjise_program != NULL) free_program(sjise_program);
f886061998-10-15Marcus Comstedt  if(std_94_program != NULL) free_program(std_94_program); if(std_96_program != NULL) free_program(std_96_program); if(std_9494_program != NULL) free_program(std_9494_program); if(std_9696_program != NULL) free_program(std_9696_program);
13670c2015-05-25Martin Nilsson 
1ee32f2001-06-06Stefan Wallström  if(std_big5_program != NULL) free_program(std_big5_program);
f886061998-10-15Marcus Comstedt 
9db11a1998-11-05Marcus Comstedt  if(std_8bit_program != NULL) free_program(std_8bit_program);
0e4d631998-11-16Marcus Comstedt  if(std_8bite_program != NULL) free_program(std_8bite_program);
fecae61999-01-05Marcus Comstedt  if(std_16bite_program != NULL) free_program(std_16bite_program);
c0d5e42008-06-29Martin Stjernholm  if(rfc_base_program != NULL) free_program(rfc_base_program);
f886061998-10-15Marcus Comstedt  if(std_rfc_program != NULL) free_program(std_rfc_program);
d893fc2006-01-15Henrik Grubbström (Grubba)  if(gb18030e_program != NULL) free_program(gb18030e_program);
9ca8b82006-01-17Henrik Grubbström (Grubba)  if(gbke_program != NULL) free_program(gbke_program);
28c6e92004-08-17Martin Nilsson  if(multichar_program != NULL) free_program(multichar_program);
f886061998-10-15Marcus Comstedt  iso2022_exit();
c0d5e42008-06-29Martin Stjernholm 
017b572011-10-28Henrik Grubbström (Grubba)  if (TYPEOF(encode_err_prog) != T_INT) free_svalue (&encode_err_prog); if (TYPEOF(decode_err_prog) != T_INT) free_svalue (&decode_err_prog);
b6c29f2011-04-16Henrik Grubbström (Grubba)  free_array(double_custom_chars); free_array(double_combiner_chars);
f886061998-10-15Marcus Comstedt }
0c415f2017-06-16Henrik Grubbström (Grubba)  /*! @endmodule */