pike.git / src / modules / _Charset / tables.c

version» Context lines:

pike.git/src/modules/_Charset/tables.c:10:      /* Table encoding notes    * ====================    *    * Surrogates and UTF-16    * ---------------------    *    * When a character needs to be mapped to multiple UTF-16 characters,    * this is done by having a set of NUL-terminated UTF-16 strings    * after the main table, and using a character in the surrogate block -  * (0xd800 - 0xdfff) to encode the offset to the string from the main -  * table. This is currently only used by the JIS X0213 tables. +  * (0xd800 - 0xdfff) to encode the offset to the string from the end of +  * the main table. This is currently only used by a few tables.    *    * Private characters    * ------------------    *    * The private characters used by RFC1345 have been mapped as follows:    *    * RFC1345 Pike Name    * e000 fffd REPLACEMENT CHARACTER    * e001 - Join with next line.    *
pike.git/src/modules/_Charset/tables.c:111:    * 0308 COMBINING GREEK DIALYTIKA    * 0313 COMBINING GREEK PSILI    * 0314 COMBINING GREEK DASIA    *    * followed by one of    *    * 0300 COMBINING GREEK VARIA    * 0301 COMBINING GREEK OXIA    * 0342 COMBINING GREEK PERISPOMENI    * -  * There are thus only two private characters left after reordering -  * and filtering. +  * There are thus only two private characters from RFC1345 left after +  * reordering and filtering. +  * +  * The Mosaic characters from ISO-IR-129 / CCITT T.101 Data Syntax III +  * are mapped as a bitmap starting at e200, with the 6 pixels numbered +  * as follows: +  * +  * 0 1 +  * 2 3 +  * 4 5 +  * +  * NB: This encoding is compatible with the encoding used for the mosaic +  * characters in the http://galax.xyz/TELETEXT/MODE7GX.TTF font.    */      static const UNICHAR map_ISO_646_irv_1983[] = {    0x0021, 0x0022, 0x0023, 0x00a4, 0x0025, 0x0026, 0x0027, 0x0028,    0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030,    0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038,    0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040,    0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048,    0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050,    0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058,
pike.git/src/modules/_Charset/tables.c:12487:    0x9e17, 0x9f48, 0x6207, 0x6b1e, 0x7227, 0x864c, 0x8ea8, 0x9482,    0x9480, 0x9481, 0x9a69, 0x9a68, 0x9b2e, 0x9e19, 0x7229, 0x864b,    0x8b9f, 0x9483, 0x9c79, 0x9eb7, 0x7675, 0x9a6b, 0x9c7a, 0x9e1d,    0x7069, 0x706a, 0x9ea4, 0x9f7e, 0x9f49, 0x9f98, 0x7881, 0x92b9,    0x88cf, 0x58bb, 0x6052, 0x7ca7, 0x5afa, 0x2554, 0x2566, 0x2557,    0x2560, 0x256c, 0x2563, 0x255a, 0x2569, 0x255d, 0x2552, 0x2564,    0x2555, 0x255e, 0x256a, 0x2561, 0x2558, 0x2567, 0x255b, 0x2553,    0x2565, 0x2556, 0x255f, 0x256b, 0x2562, 0x2559, 0x2568, 0x255c,    0x2551, 0x2550, 0x256d, 0x256e, 0x2570, 0x256f, 0x2593, };    + static const UNICHAR map_CCITT_T_101_III_suppl[] = { +  0xe200, 0xe201, 0xe202, 0xe203, 0xe204, 0xe205, 0xe206, 0xe207, +  0xe208, 0xe209, 0xe20a, 0xe20b, 0xe20c, 0xe20d, 0xe20e, 0xe20f, +  0xe210, 0xe211, 0xe212, 0xe213, 0xe214, 0xe215, 0xe216, 0xe217, +  0xe218, 0xe219, 0xe21a, 0xe21b, 0xe21c, 0xe21d, 0xe21e, 0xe21f, +  0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, +  0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, +  0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, +  0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xe23f, +  0xe220, 0xe221, 0xe222, 0xe223, 0xe224, 0xe225, 0xe226, 0xe227, +  0xe228, 0xe229, 0xe22a, 0xe22b, 0xe22c, 0xe22d, 0xe22e, 0xe22f, +  0xe230, 0xe231, 0xe232, 0xe233, 0xe234, 0xe235, 0xe236, 0xe237, +  0xe238, 0xe239, 0xe23a, 0xe23b, 0xe23c, 0xe23d, 0xe23e, 0xe23f, }; +    #include "jisx0213_tables.h"      const UNICHAR * const iso2022_94[] = {    map_ISO_646_irv_1983, map_BS_4730, map_ANSI_X3_4_1968, map_NATS_SEFI,    map_NATS_SEFI_ADD, map_NATS_DANO, map_NATS_DANO_ADD, map_SEN_850200_B,    map_SEN_850200_C, map_JIS_C6220_1969_jp, map_JIS_C6220_1969_ro, map_DIN_66003,    map_PT, map_ISO_6438, map_ISO_5427, map_DIN_31624,    map_ISO_5426_1980, map_ISO_5427_1981, map_NF_Z_62_010_1973, map_ISO_5428_1980,    map_GB_1988_80, map_Latin_greek_1, map_BS_viewdata, map_INIS,    map_ISO_5428_bibl, map_IT, map_ES, map_greek7_old,
pike.git/src/modules/_Charset/tables.c:12539:    map_TIS_620_2533_1990, map_ISO_IR_167, map_ISO_8859_10_1998, NULL,    map_latin_lap, map_ISO_8859_13_1998, map_TCVN_5712_1993, map_ISO_IR_181,    map_ISO_IR_182, map_ISO_IR_197, map_ISO_8859_8_1999, map_ISO_8859_14_1998,    map_ISO_IR_200, map_ISO_IR_201, map_ISO_8859_15_1999, map_ISO_IR_204,    map_ISO_IR_205, map_ISO_IR_206, map_SR_14111_1998, map_IS_434_1997,    map_ISO_IR_209, map_ISO_8859_7_2003, map_SI_1311_2002, NULL,    NULL, NULL, NULL, NULL,    NULL, NULL, NULL, NULL,    NULL, NULL, NULL, NULL,    NULL, NULL, NULL, NULL, -  NULL, NULL, NULL }; +  NULL, map_CCITT_T_101_III_suppl, NULL };   const UNICHAR * const iso2022_9494[] = {    map_JIS_C6226_1978, map_GB_2312_80, map_JIS_C6226_1983, map_KS_C_5601_1987,    map_JIS_X0212_1990, /*ISOIR165*/NULL, /*ISOIR169*/NULL, /*ISOIR171*/NULL,    /*ISOIR172*/NULL, /*ISOIR183*/NULL, /*ISOIR184*/NULL, /*ISOIR185*/NULL,    /*ISOIR186*/NULL, /*ISOIR187*/NULL, map_KPS_9566_97, map_JIS_X0213_2000_1,    map_JIS_X0213_2000_2, map_JIS_X0213_2004_1, NULL, NULL,    NULL, NULL, NULL, NULL,    NULL, NULL, NULL, NULL,    NULL, NULL, NULL, NULL };   const UNICHAR * const iso2022_9696[] = {
pike.git/src/modules/_Charset/tables.c:15243:    { "apl", map_ISO_IR_68, MODE_94 }, /* :: apl */    { "arabic", map_ISO_8859_6_1999, MODE_96 }, /* :: arabic */    { "arabic7", map_ASMO_449, MODE_94 }, /* :: arabic7 */    { "ascii", map_ANSI_X3_4_1968, MODE_94 }, /* :: ascii */    { "asmo449", map_ASMO_449, MODE_94 }, /* :: asmo_449 */    { "asmo708", map_ISO_8859_6_1999, MODE_96 }, /* :: asmo-708 */    { "big5", map_BIG5, MODE_BIG5 }, /* :: big5 */    { "bs4730", map_BS_4730, MODE_94 }, /* :: bs_4730 */    { "bsviewdata", map_BS_viewdata, MODE_94 }, /* :: bs_viewdata */    { "ca", map_CSA_Z243_4_1985_1, MODE_94 }, /* :: ca */ +  { "ccittt101iiisuppl", map_CCITT_T_101_III_suppl, MODE_96 }, /* :: ccitt-t.101-III_suppl */    { "celtic", map_ISO_8859_14_1998, MODE_96 }, /* :: celtic */    { "chinese", map_GB_2312_80, MODE_9494 }, /* :: chinese */    { "cn", map_GB_1988_80, MODE_94 }, /* :: cn */    { "codaru", map_CODAR_U, MODE_94 }, /* :: codar-u */    { "csa71", map_CSA_Z243_4_1985_1, MODE_94 }, /* :: csa7-1 */    { "csa72", map_CSA_Z243_4_1985_2, MODE_94 }, /* :: csa7-2 */    { "csaz2434", map_CSA_Z243_4_1985_1, MODE_94 }, /* :: csa z243.4 */    { "csaz24341973", map_ANSI_X3_4_1968, MODE_94 }, /* :: csa z243.4-1973 */    { "csaz243419851", map_CSA_Z243_4_1985_1, MODE_94 }, /* :: csa_z243.4-1985-1 */    { "csaz243419852", map_CSA_Z243_4_1985_2, MODE_94 }, /* :: csa_z243.4-1985-2 */
pike.git/src/modules/_Charset/tables.c:15452:    { "isoir110", map_ISO_8859_4_1998, MODE_96 }, /* :: iso-ir-110 */    { "isoir111", map_ECMA_cyrillic, MODE_96 }, /* :: iso-ir-111 */    { "isoir121", map_CSA_Z243_4_1985_1, MODE_94 }, /* :: iso-ir-121 */    { "isoir122", map_CSA_Z243_4_1985_2, MODE_94 }, /* :: iso-ir-122 */    { "isoir123", map_CSA_Z243_4_1985_gr, MODE_96 }, /* :: iso-ir-123 */    /* :: iso-ir-124: Only control characters. */    /* :: iso-ir-125: ISO 9040 Virtual Terminal Service Transparent Set */    { "isoir126", map_ISO_8859_7_1987, MODE_96 }, /* :: iso-ir-126 */    { "isoir127", map_ISO_8859_6_1999, MODE_96 }, /* :: iso-ir-127 */    /* :: iso-ir-128: Defined in misc.c. */ -  /* :: iso-ir-129: CCITT T101-III Mosaic drawing. */ +  { "isoir129", map_CCITT_T_101_III_suppl, MODE_96 }, /* :: iso-ir-129 */    { "isoir13", map_JIS_C6220_1969_jp, MODE_94 }, /* :: iso-ir-13 */    /* :: iso-ir-130: Only control characters. */    /* :: iso-ir-131: CCITT T.101 Data Syntax I */    /* :: iso-ir-132: Only control characters. */    /* :: iso-ir-133: Only control characters. */    /* :: iso-ir-134: Only control characters. */    /* :: iso-ir-135: Only control characters. */    /* :: iso-ir-136: Only control characters. */    /* :: iso-ir-137: CCITT T.101-I Mosaic drawing. */    { "isoir138", map_ECMA_121, MODE_96 }, /* :: iso-ir-138 */
pike.git/src/modules/_Charset/tables.c:15713:    { "sen850200b", map_SEN_850200_B, MODE_94 }, /* :: sen_850200_b */    { "sen850200c", map_SEN_850200_C, MODE_94 }, /* :: sen_850200_c */    { "serbian", map_JUS_I_B1_003_serb, MODE_94 }, /* :: serbian */    { "si1311", map_SI_1311_2002, MODE_96 }, /* :: si 1311:2002 */    { "si13112002", map_SI_1311_2002, MODE_96 }, /* :: si 1311:2002 */    { "sisd47", map_SEN_850200_B, MODE_94 }, /* :: sis d47 */    { "sise47", map_SEN_850200_C, MODE_94 }, /* :: sis e47 */    { "sr14111", map_SR_14111_1998, MODE_96 }, /* :: sr 14111 */    { "sr141111998", map_SR_14111_1998, MODE_96 }, /* :: sr 14111:1998 */    { "stsev35888", map_GOST_19768_74, MODE_96 }, /* :: st_sev_358-88 */ +  { "t101iiisuppl", map_CCITT_T_101_III_suppl, MODE_96 }, /* :: t.101-III_suppl */    { "t617bit", map_T_61_7bit, MODE_94 }, /* :: t.61-7bit */    { "tds565", map_TDS_565, MODE_94 }, /* :: tds 565 */    { "tds616", map_TDS_616_2003, MODE_94 }, /* :: tds 616 */    { "tds6162003", map_TDS_616_2003, MODE_94 }, /* :: tds 616-2003 */    { "tis6202533", map_TIS_620_2533_1990, MODE_96 }, /* :: tis 620-2533 */    { "tis62025331990", map_TIS_620_2533_1990, MODE_96 }, /* :: tis 620-2533 (1990) */    { "uk", map_BS_4730, MODE_94 }, /* :: uk */    { "us", map_ANSI_X3_4_1968, MODE_94 }, /* :: us */    { "usascii", map_ANSI_X3_4_1968, MODE_94 }, /* :: us-ascii */    { "vietnamese", map_TCVN_5712_1993, MODE_96 }, /* :: vietnamese */    { "visual", map_ISO_8859_8_1999, MODE_96 }, /* :: visual */    { "vscii", map_TCVN_5712_1993, MODE_96 }, /* :: vscii */    { "welsh", map_ISO_IR_182, MODE_96 }, /* :: welsh */    { "x02017", map_JIS_C6220_1969_jp, MODE_94 }, /* :: x0201-7 */    { "x0208", map_JIS_C6226_1983, MODE_9494 }, /* :: x0208 */    { "x0212", map_JIS_X0212_1990, MODE_9494 }, /* :: x0212 */    { "yu", map_JUS_I_B1_002, MODE_94 }, /* :: yu */   };   const int num_charset_def = sizeof(charset_map)/sizeof(charset_map[0]);