Branch: Tag:

2014-08-10

2014-08-10 12:01:25 by Henrik Grubbström (Grubba) <grubba@grubba.org>

Standards.ASN1: Removed the TeletexString type.

This ASN.1 string type was encoded according to T.51/T.61,
and was not handled properly by other software at the time
(~1995) when it was in use.

836:    constant type_name = "PrintableString";   }    - protected Regexp asn1_teletex_invalid_chars = Regexp ("([\\\\{}\240®©¬¦\255Ð])"); -  +    //! - int(0..1) asn1_teletex_valid (string s) - { -  if (global.String.width(s)!=8) -  // T.61 encoding of wide strings not implemented. -  return 0; -  return !asn1_teletex_invalid_chars->match (s); - } -  - //! TeletexString object - //! - //! Avoid this one; it seems to be common that this type is used to - //! label strings encoded with the ISO 8859-1 character set (use - //! asn1_broken_teletex_string for that). From - //! http://www.mindspring.com/~asn1/nlsasn.htm: - //! - //! /.../ Types GeneralString, VideotexString, TeletexString - //! (T61String), and GraphicString exist in earlier versions - //! [pre-1994] of ASN.1. They are considered difficult to use - //! correctly by applications providing national language support. - //! Varying degrees of application support for T61String values seems - //! to be most common in older applications. Correct support is made - //! more difficult, as character values available in type T61String - //! have changed with the addition of new register entries from the - //! 1984 through 1997 versions. - //! - //! This implementation is based on the description of T.61 and T.51 - //! in "Some functions for representing T.61 characters from the - //! X.500 Directory Service in ISO 8859 terminals (Version 0.2. July - //! 1994.)" by Enrique Silvestre Mora (mora@@si.uji.es), Universitat - //! Jaume I, Spain, found in the package - //! ftp://pereiii.uji.es/pub/uji-ftp/unix/ldap/iso-t61.translation.tar.Z - //! - //! The translation is only complete for 8-bit latin 1 strings. It - //! encodes strictly to T.61, but decodes from the superset T.51. - //! - //! @note - //! CCITT Recommendation T.61 is also known as ISO-IR 103:1985 - //! (graphic characters) and ISO-IR 106:1985 and ISO-IR 107:1985 - //! (control characters). - //! - //! @seealso - //! @[Charset] - class TeletexString - { -  inherit String; -  int tag = 20; -  constant type_name = "TeletexString"; // Alias: T61String -  - #define ENC_ERR(char) char - #define DEC_ERR(str) str - #define DEC_COMB_MARK "\300" -  - #define GR(char) "\301" char /* Combining grave accent */ - #define AC(char) "\302" char /* Combining acute accent */ - #define CI(char) "\303" char /* Combining circumflex accent */ - #define TI(char) "\304" char /* Combining tilde */ - #define MA(char) "\305" char /* Combining macron */ - #define BR(char) "\306" char /* Combining breve */ - #define DA(char) "\307" char /* Combining dot above */ - #define DI(char) "\310" char /* Combining diaeresis */ - #define RA(char) "\312" char /* Combining ring above */ - #define CE(char) "\313" char /* Combining cedilla */ - #define UN(char) "\314" char /* Combining underscore (note 6) */ - #define DO(char) "\315" char /* Combining double acute accent */ - #define OG(char) "\316" char /* Combining ogonek */ - #define CA(char) "\317" char /* Combining caron */ -  -  protected constant encode_from = ({ -  /*"#", "$",*/ "¤", // Note 3 -  "\\", "{", "}", // Note 7 -  "\240", // No-break space (note 7) -  "×", // Multiplication sign -  "÷", // Division sign -  "¹", // Superscript one -  "®", // Registered sign (note 7) -  "©", // Copyright sign (note 7) -  "¬", // Not sign (note 7) -  "¦", // Broken bar (note 7) -  "Æ", // Latin capital ligature ae -  "ª", // Feminine ordinal indicator -  "Ø", // Latin capital letter o with stroke -  "º", // Masculine ordinal indicator -  "Þ", // Latin capital letter thorn -  "æ", // Latin small ligature ae -  "ð", // Latin small letter eth -  "ø", // Latin small letter o with stroke -  "ß", // Latin small letter sharp s -  "þ", // Latin small letter thorn -  "\255", // Soft hyphen (note 7) -  "Ð", // Latin capital letter eth (no equivalent) -  // Combinations -  "^", "`", "~", // Note 4 -  "´", "¨", "¯", "¸", -  "À", "Á", "Â", "Ã", "Ä", "Å", -  "à", "á", "â", "ã", "ä", "å", -  "Ç", -  "ç", -  "È", "É", "Ê", "Ë", -  "è", "é", "ê", "ë", -  "Ì", "Í", "Î", "Ï", -  "ì", "í", "î", "ï", -  "Ñ", -  "ñ", -  "Ò", "Ó", "Ô", "Õ", "Ö", -  "ò", "ó", "ô", "õ", "ö", -  "Ù", "Ú", "Û", "Ü", -  "ù", "ú", "û", "ü", -  "Ý", -  "ý", -  }); -  -  protected constant encode_to = ({ -  /*"#", "$",*/ "\250", // Note 3 -  ENC_ERR("\\"), ENC_ERR("{"), ENC_ERR("}"), // Note 7 -  ENC_ERR("\240"), // No-break space (note 7) -  "\264", // Multiplication sign -  "\270", // Division sign -  "\321", // Superscript one -  ENC_ERR("®"), // Registered sign (note 7) -  ENC_ERR("©"), // Copyright sign (note 7) -  ENC_ERR("¬"), // Not sign (note 7) -  ENC_ERR("¦"), // Broken bar (note 7) -  "\341", // Latin capital ligature ae -  "\343", // Feminine ordinal indicator -  "\351", // Latin capital letter o with stroke -  "\353", // Masculine ordinal indicator -  "\354", // Latin capital letter thorn -  "\361", // Latin small ligature ae -  "\363", // Latin small letter eth -  "\371", // Latin small letter o with stroke -  "\373", // Latin small letter sharp s -  "\374", // Latin small letter thorn -  ENC_ERR("\255"), // Soft hyphen (note 7) -  ENC_ERR("Ð"), // Latin capital letter eth (no equivalent) -  // Combinations -  CI(" "), GR(" "), TI(" "), // Note 4 -  AC(" "), DI(" "), MA(" "), CE(" "), -  GR("A"), AC("A"), CI("A"), TI("A"), DI("A"), RA("A"), -  GR("a"), AC("a"), CI("a"), TI("a"), DI("a"), RA("a"), -  CE("C"), -  CE("c"), -  GR("E"), AC("E"), CI("E"), DI("E"), -  GR("e"), AC("e"), CI("e"), DI("e"), -  GR("I"), AC("I"), CI("I"), DI("I"), -  GR("i"), AC("i"), CI("i"), DI("i"), -  TI("N"), -  TI("n"), -  GR("O"), AC("O"), CI("O"), TI("O"), DI("O"), -  GR("o"), AC("o"), CI("o"), TI("o"), DI("o"), -  GR("U"), AC("U"), CI("U"), DI("U"), -  GR("u"), AC("u"), CI("u"), DI("u"), -  GR("Y"), -  GR("y"), -  }); -  -  protected constant decode_from = ({ -  /*"#", "$",*/ "\244", "\246", "\250", // Note 3 -  /*"^", "`", "~",*/ // Note 4 -  "\251", // Left single quotation mark (note 7) -  "\252", // Left double quotation mark (note 7) -  "\254", // Leftwards arrow (note 7) -  "\255", // Upwards arrow (note 7) -  "\256", // Rightwards arrow (note 7) -  "\257", // Downwards arrow (note 7) -  "\264", // Multiplication sign -  "\270", // Division sign -  "\271", // Right single quotation mark (note 7) -  "\272", // Right double quotation mark (note 7) -  "\300", // Note 5 -  GR(""), // Combining grave accent -  AC(""), // Combining acute accent -  CI(""), // Combining circumflex accent -  TI(""), // Combining tilde -  MA(""), // Combining macron -  BR(""), // Combining breve -  DA(""), // Combining dot above -  DI(""), // Combining diaeresis -  "\311", // Note 5 -  RA(""), // Combining ring above -  CE(""), // Combining cedilla -  UN(""), // Combining underscore (note 6) -  DO(""), // Combining double acute accent -  OG(""), // Combining ogonek -  CA(""), // Combining caron -  "\320", // Em dash (note 7) -  "\321", // Superscript one -  "\322", // Registered sign (note 7) -  "\323", // Copyright sign (note 7) -  "\324", // Trade mark sign (note 7) -  "\325", // Eighth note (note 7) -  "\326", // Not sign (note 7) -  "\327", // Broken bar (note 7) -  "\330", "\331", "\332", "\333", // Note 2 -  "\334", // Vulgar fraction one eighth (note 7) -  "\335", // Vulgar fraction three eighths (note 7) -  "\336", // Vulgar fraction five eighths (note 7) -  "\337", // Vulgar fraction seven eighths (note 7) -  "\340", // Ohm sign -  "\341", // Latin capital ligature ae -  "\342", // Latin capital letter d with stroke -  "\343", // Feminine ordinal indicator -  "\344", // Latin capital letter h with stroke -  "\345", // Note 2 -  "\346", // Latin capital ligature ij -  "\347", // Latin capital letter l with middle dot -  "\350", // Latin capital letter l with stroke -  "\351", // Latin capital letter o with stroke -  "\352", // Latin capital ligature oe -  "\353", // Masculine ordinal indicator -  "\354", // Latin capital letter thorn -  "\355", // Latin capital letter t with stroke -  "\356", // Latin capital letter eng -  "\357", // Latin small letter n preceded by apostrophe -  "\360", // Latin small letter kra -  "\361", // Latin small ligature ae -  "\362", // Latin small letter d with stroke -  "\363", // Latin small letter eth -  "\364", // Latin small letter h with stroke -  "\365", // Latin small letter dotless i -  "\366", // Latin small ligature ij -  "\367", // Latin small letter l with middle dot -  "\370", // Latin small letter l with stroke -  "\371", // Latin small letter o with stroke -  "\372", // Latin small ligature oe -  "\373", // Latin small letter sharp s -  "\374", // Latin small letter thorn -  "\375", // Latin small letter t with stroke -  "\376", // Latin small letter eng -  "\377", // Soft hyphen (note 7) -  }); -  -  protected constant decode_to = ({ -  /*"#", "$",*/ "$", "#", "\244", // Note 3 -  /*"^", "`", "~",*/ // Note 4 -  DEC_ERR("\251"), // Left single quotation mark (note 7) -  DEC_ERR("\252"), // Left double quotation mark (note 7) -  DEC_ERR("\254"), // Leftwards arrow (note 7) -  DEC_ERR("\255"), // Upwards arrow (note 7) -  DEC_ERR("\256"), // Rightwards arrow (note 7) -  DEC_ERR("\257"), // Downwards arrow (note 7) -  "×", // Multiplication sign -  "÷", // Division sign -  DEC_ERR("\271"), // Right single quotation mark (note 7) -  DEC_ERR("\272"), // Right double quotation mark (note 7) -  DEC_ERR("\300"), // Note 5 -  DEC_COMB_MARK GR(""), // Combining grave accent -  DEC_COMB_MARK AC(""), // Combining acute accent -  DEC_COMB_MARK CI(""), // Combining circumflex accent -  DEC_COMB_MARK TI(""), // Combining tilde -  DEC_COMB_MARK MA(""), // Combining macron -  DEC_COMB_MARK BR(""), // Combining breve -  DEC_COMB_MARK DA(""), // Combining dot above -  DEC_COMB_MARK DI(""), // Combining diaeresis -  DEC_ERR("\311"), // Note 5 -  DEC_COMB_MARK RA(""), // Combining ring above -  DEC_COMB_MARK CE(""), // Combining cedilla -  DEC_COMB_MARK UN(""), // Combining underscore (note 6) -  DEC_COMB_MARK DO(""), // Combining double acute accent -  DEC_COMB_MARK OG(""), // Combining ogonek -  DEC_COMB_MARK CA(""), // Combining caron -  DEC_ERR("\320"), // Em dash (note 7) -  "¹", // Superscript one -  "®", // Registered sign (note 7) -  "©", // Copyright sign (note 7) -  DEC_ERR("\324"), // Trade mark sign (note 7) -  DEC_ERR("\325"), // Eighth note (note 7) -  "¬", // Not sign (note 7) -  "¦", // Broken bar (note 7) -  DEC_ERR("\330"), DEC_ERR("\331"), DEC_ERR("\332"), DEC_ERR("\333"), // Note 2 -  DEC_ERR("\334"), // Vulgar fraction one eighth (note 7) -  DEC_ERR("\335"), // Vulgar fraction three eighths (note 7) -  DEC_ERR("\336"), // Vulgar fraction five eighths (note 7) -  DEC_ERR("\337"), // Vulgar fraction seven eighths (note 7) -  DEC_ERR("\340"), // Ohm sign -  "Æ", // Latin capital ligature ae -  DEC_ERR("\342"), // Latin capital letter d with stroke -  "ª", // Feminine ordinal indicator -  DEC_ERR("\344"), // Latin capital letter h with stroke -  DEC_ERR("\345"), // Note 2 -  DEC_ERR("\346"), // Latin capital ligature ij -  DEC_ERR("\347"), // Latin capital letter l with middle dot -  DEC_ERR("\350"), // Latin capital letter l with stroke -  "Ø", // Latin capital letter o with stroke -  DEC_ERR("\352"), // Latin capital ligature oe -  "º", // Masculine ordinal indicator -  "Þ", // Latin capital letter thorn -  DEC_ERR("\355"), // Latin capital letter t with stroke -  DEC_ERR("\356"), // Latin capital letter eng -  DEC_ERR("\357"), // Latin small letter n preceded by apostrophe -  DEC_ERR("\360"), // Latin small letter kra -  "æ", // Latin small ligature ae -  DEC_ERR("\362"), // Latin small letter d with stroke -  "ð", // Latin small letter eth -  DEC_ERR("\364"), // Latin small letter h with stroke -  DEC_ERR("\365"), // Latin small letter dotless i -  DEC_ERR("\366"), // Latin small ligature ij -  DEC_ERR("\367"), // Latin small letter l with middle dot -  DEC_ERR("\370"), // Latin small letter l with stroke -  "ø", // Latin small letter o with stroke -  DEC_ERR("\372"), // Latin small ligature oe -  "ß", // Latin small letter sharp s -  "þ", // Latin small letter thorn -  DEC_ERR("\375"), // Latin small letter t with stroke -  DEC_ERR("\376"), // Latin small letter eng -  "\255", // Soft hyphen (note 7) -  }); -  -  protected constant decode_comb = ([ -  GR(" "): "`", -  AC(" "): "´", -  CI(" "): "^", -  TI(" "): "~", -  DI(" "): "¨", -  // RA(" "): DEC_ERR(RA(" ")), -  MA(" "): "¯", -  // BR(" "): DEC_ERR(BR(" ")), -  // DA(" "): DEC_ERR(DA(" ")), -  CE(" "): "¸", -  // DO(" "): DEC_ERR(DO(" ")), -  // OG(" "): DEC_ERR(OG(" ")), -  // CA(" "): DEC_ERR(CA(" ")), -  GR("A"): "À", AC("A"): "Á", CI("A"): "Â", TI("A"): "Ã", DI("A"): "Ä", RA("A"): "Å", -  GR("a"): "à", AC("a"): "á", CI("a"): "â", TI("a"): "ã", DI("a"): "ä", RA("a"): "å", -  CE("C"): "Ç", -  CE("c"): "ç", -  GR("E"): "È", AC("E"): "É", CI("E"): "Ê", DI("E"): "Ë", -  GR("e"): "è", AC("e"): "é", CI("e"): "ê", DI("e"): "ë", -  GR("I"): "Ì", AC("I"): "Í", CI("I"): "Î", DI("I"): "Ï", -  GR("i"): "ì", AC("i"): "í", CI("i"): "î", DI("i"): "ï", -  TI("N"): "Ñ", -  TI("n"): "ñ", -  GR("O"): "Ò", AC("O"): "Ó", CI("O"): "Ô", TI("O"): "Õ", DI("O"): "Ö", -  GR("o"): "ò", AC("o"): "ó", CI("o"): "ô", TI("o"): "õ", DI("o"): "ö", -  GR("U"): "Ù", AC("U"): "Ú", CI("U"): "Û", DI("U"): "Ü", -  GR("u"): "ù", AC("u"): "ú", CI("u"): "û", DI("u"): "ü", -  GR("Y"): "Ý", -  GR("y"): "ý", -  ]); -  -  /* Notes from Moras paper: -  -  (1) All characters in 0xC0-0xCF are non-spacing characters. They are -  all diacritical marks. To be represented stand-alone, they need to -  be followed by a SPACE (0x20). They can appear, also, before -  letters if the couple is one of the defined combinations. -  -  (2) Reserved for future standardization. -  -  (3) Current terminals may send and receive 0xA6 and 0xA4 for the NUMBER -  SIGN and DOLLAR SIGN, respectively. When receiving codes 0x23 and -  0x24, they may interpret them as NUMBER SIGN and CURRENCY SIGN, -  respectively. Future applications should code the NUMBER SIGN, -  DOLLAR SIGN and CURRENCY SIGN as 0x23, 0x24 and 0xA8, respectively. -  -  (4) Terminals should send only the codes 0xC1, 0xC3 and 0xC4, followed -  by SPACE (0x20) for stand-alone GRAVE ACCENT, CIRCUMFLEX ACCENT and -  TILDE, respectively. Nevertheless the terminal shall interpret the -  codes 0x60, 0x5E and 0x7E as GRAVE, CIRCUMFLEX and TILDE, -  respectively. -  -  (5) This code position is reserved and shall not be used. -  -  (6) It is recommended to implement the "underline" function by means of -  the control function SGR(4) instead of the "non-spacing underline" -  graphic character. -  -  (7) Not used in current teletex service (Recommendation T.61). -  */ -  - #undef GR - #undef AC - #undef CI - #undef TI - #undef MA - #undef BR - #undef DA - #undef DI - #undef RA - #undef CE - #undef UN - #undef DO - #undef OG - #undef CA -  -  string(0..255) get_der_content() -  { -  return [string(0..255)]replace(value, [array(string)]encode_from, -  [array(string(0..255))]encode_to); -  } -  -  this_program decode_primitive (string(0..255) contents, -  function(ADT.struct, -  mapping(int:program(Object)): -  Object)|void decoder, -  mapping(int:program(Object))|void types) { -  der = contents; -  -  array(string) parts = -  replace (contents, [array(string)]decode_from, -  [array(string)]decode_to) / DEC_COMB_MARK; -  value = parts[0]; -  foreach (parts[1..], string part) -  value += (decode_comb[part[..1]] || DEC_ERR(part[..1])) + part[2..]; -  -  return this; -  } -  - #undef ENC_ERR - #undef DEC_ERR - #undef DEC_COMB_MARK - } -  - //! +    int(0..1) asn1_broken_teletex_valid (string s)   {    return global.String.width(s)==8;
1264:   class BrokenTeletexString   {    inherit String; -  int tag = 20; +  int(0..) tag = 20;    constant type_name = "TeletexString"; // Alias: T61String   }    -  + //! @class TeletexString + //! + //! This used to be a string encoded with the T.61 character coding, + //! and has been obsolete since the early 1990:s. + //! + //! @seealso + //! @[BrokenTeletexString] +  + //! @decl inherit BrokenTeletexString +  + //! @ignore + __deprecated__(program(BrokenTeletexString)) TeletexString = +  (__deprecated__(program(BrokenTeletexString))) BrokenTeletexString; + //! @endignore +  + //! @endclass +    protected Regexp asn1_IA5_invalid_chars = Regexp ("([\200-\377])");      //!