pike.git / lib / modules / Charset.pmod / module.pmod

version» Context lines:

pike.git/lib/modules/Charset.pmod/module.pmod:3:   #pike __REAL_VERSION__   //! @ignore   protected private inherit _Charset;   //! @endignore      //! The Charset module supports a wide variety of different character sets, and   //! it is flexible in regard of the names of character sets it accepts. The   //! character case is ignored, as are the most common non-alaphanumeric   //! characters appearing in character set names. E.g. @expr{"iso-8859-1"@}   //! works just as well as @expr{"ISO_8859_1"@}. All encodings specified in - //! RFC 1345 are supported. + //! @rfc{1345@} are supported.   //!   //! First of all the Charset module is capable of handling the following   //! encodings of Unicode:   //!   //! @ul   //! @item utf7   //! @item utf8   //! @item utf16   //! @item utf16be   //! @item utf16le
pike.git/lib/modules/Charset.pmod/module.pmod:95:   //! @item 1256   //! @item 1257   //! @item 1258   //! These may be prefixed with @expr{"cp"@}, @expr{"ibm"@},   //! @expr{"ms"@} or @expr{"windows"@}   //! @item mysql-latin1   //! The default charset in MySQL, similar to @expr{cp1252@}.   //! @endul   //!   //! +359 more. + //! + //! @note + //! In Pike 7.8 and earlier this module was named @[Locale.Charset].      //! Virtual base class for charset decoders.   //! @example   //! string win1252_to_string( string data )   //! {   //! return Charset.decoder("windows-1252")->feed( data )->drain();   //! }   class Decoder   {    //! @decl string charset;
pike.git/lib/modules/Charset.pmod/module.pmod:164:    //!    //! @note    //! This is not necessarily the same name that was actually given to    //! @[encoder] to produce this object.       //! Change the replacement callback function.    //!    //! @param rc    //! Function that is called to encode characters    //! outside the current character encoding. -  void set_replacement_callback(function(string:string) rc); +  //! +  //! @returns +  //! Returns the current object to allow for chaining +  //! of calls. +  this_program set_replacement_callback(function(string:string) rc);   }      private class ASCIIDec {    constant charset = "iso88591";    protected private string s = "";    this_program feed(string ss)    {    s += ss;    return this;    }
pike.git/lib/modules/Charset.pmod/module.pmod:295:    this_program clear()    {    decoder->clear();    trailer = "";    return this;    }   }      // Decode GSM 03.38.   private class GSM03_38dec { -  static Decoder decoder = rfc1345("gsm0338"); -  static string trailer = ""; +  protected Decoder decoder = rfc1345("gsm0338"); +  protected string trailer = "";    string drain()    {    // Escape sequences for GSM 03.38.    // cf http://en.wikipedia.org/wiki/Short_message_service -  +  // https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=139    string res =    replace(trailer + decoder->drain(),    "\eØ\e\u039b\e(\e)\e/\e<\e=\e>\e¡\ee"/2,    "\f^{}\\[~]|\u20ac"/1);    trailer = "";    if (sizeof(res) && res[-1] == '\e') trailer = "\e";    return replace(res, "\e", "");    }    this_program feed(string s)    {
pike.git/lib/modules/Charset.pmod/module.pmod:322:    return this;    }    this_program clear()    {    decoder->clear();    trailer = "";    return this;    }   }    - // Decode HZ encoding of EUC-CN. RFC 1843. + // Decode HZ encoding of EUC-CN. @rfc{1843@}.   private class HZ_dec   {    protected Decoder decoder = EUCDec("gb2312", "euccn");    protected int mode;      #define HZ_MODE_MARK 1   #define HZ_MODE_SHIFT 2       string drain()    {
pike.git/lib/modules/Charset.pmod/module.pmod:445:   }      //! Returns a charset decoder object.   //! @param name   //! The name of the character set to decode from. Supported charsets   //! include (not all supported charsets are enumerable):   //! "iso_8859-1:1987", "iso_8859-1:1998", "iso-8859-1", "iso-ir-100",   //! "latin1", "l1", "ansi_x3.4-1968", "iso_646.irv:1991", "iso646-us",   //! "iso-ir-6", "us", "us-ascii", "ascii", "cp367", "ibm367", "cp819",   //! "ibm819", "iso-2022" (of various kinds), "utf-7", "utf-8" and - //! various encodings as described by RFC1345. + //! various encodings as described by @rfc{1345@}.   //! @throws   //! If the asked-for @[name] was not supported, an error is thrown.   Decoder decoder(string name)   {    string orig_name = name;       name = normalize(name);       if( custom_decoders[name] )    return custom_decoders[name]();
pike.git/lib/modules/Charset.pmod/module.pmod:526:       Decoder o = rfc1345(name);       if(o)    return o;       if ((o = .Tables[name]) && (p = o->decoder)) {    return p();    }    -  error("Unknown character encoding "+name+"\n"); +  if( p = master()->resolv(orig_name+".CharsetDecoder") ) +  return p(); +  +  error("Unknown character encoding "+orig_name+"\n");   }      private class ASCIIEnc   {    constant charset = "iso88591";    protected string s = "";    protected string|void replacement;    protected function(string:string)|void repcb;    protected string low_convert(string s, string|void r,    function(string:string)|void rc)
pike.git/lib/modules/Charset.pmod/module.pmod:578:    {    string ss = s;    s = "";    return ss;    }    this_program clear()    {    s = "";    return this;    } -  void set_replacement_callback(function(string:string) rc) +  this_program set_replacement_callback(function(string:string) rc)    {    repcb = rc; -  +  return this;    }    protected void create(string|void r, string|void rc)    {    replacement = r && low_convert(r);    repcb = rc;    }   }      private class USASCIIEnc {    // 7-bit US ASCII
pike.git/lib/modules/Charset.pmod/module.pmod:734:    }    this_program clear()    {    encoder->clear();    return this;    }   }      // Encode GSM 03.38.   private class GSM03_38enc { -  static Encoder encoder; -  static void create(string|void replacement, +  protected Encoder encoder; +  protected void create(string|void replacement,    function(string:string)|void repcb)    {    encoder = rfc1345("gsm0338", 1, replacement, repcb);    }    string drain()    {    return encoder->drain();    }    this_program feed(string s)    {
pike.git/lib/modules/Charset.pmod/module.pmod:779:      //! Returns a charset encoder object.   //!   //! @param name   //! The name of the character set to encode to. Supported charsets   //! include (not all supported charsets are enumerable):   //! "iso_8859-1:1987", "iso_8859-1:1998", "iso-8859-1", "iso-ir-100",   //! "latin1", "l1", "ansi_x3.4-1968", "iso_646.irv:1991", "iso646-us",   //! "iso-ir-6", "us", "us-ascii", "ascii", "cp367", "ibm367", "cp819",   //! "ibm819", "iso-2022" (of various kinds), "utf-7", "utf-8" and - //! various encodings as described by RFC1345. + //! various encodings as described by @rfc{1345@}.   //!   //! @param replacement   //! The string to use for characters that cannot be represented in   //! the charset. It's used when @[repcb] is not given or when it returns   //! zero. If no replacement string is given then an error is thrown   //! instead.   //!   //! @param repcb   //! A function to call for every character that cannot be   //! represented in the charset. If specified it's called with one
pike.git/lib/modules/Charset.pmod/module.pmod:878:       Encoder o = rfc1345(name, 1, replacement, repcb);       if(o)    return o;       if ((o = .Tables[name]) && (p = o->encoder)) {    return p(replacement, repcb);    }    -  error("Unknown character encoding "+name+"\n"); +  error("Unknown character encoding "+orig_name+"\n");   }         protected constant MIBenum = ([    3:"ANSI_X3.4-1968",    4:"ISO_8859-1:1987",    5:"ISO_8859-2:1987",    6:"ISO_8859-3:1988",    7:"ISO_8859-4:1988",    8:"ISO_8859-5:1988",
pike.git/lib/modules/Charset.pmod/module.pmod:1198:    //! Other code may produce errors of this type. In that case this    //! name is something that @[Charset.decoder] does not accept    //! (unless it implements exactly the same charset), and it should    //! be reasonably certain that @[Charset.decoder] never accepts that    //! name in the future (unless it is extended to implement exactly    //! the same charset).       protected void create (string err_str, int err_pos, string charset,    void|string reason, void|array bt)    { -  this_program::err_str = err_str; -  this_program::err_pos = err_pos; -  this_program::charset = charset; +  this::err_str = err_str; +  this::err_pos = err_pos; +  this::charset = charset;    ::create (format_err_msg ("Error decoding",    err_str, err_pos, charset, reason),    bt);    }   }      void decode_error (string err_str, int err_pos, string charset,    void|string reason, void|mixed... args)   //! Throws a @[DecodeError] exception. See @[DecodeError.create] for   //! details about the arguments. If @[args] is given then the error
pike.git/lib/modules/Charset.pmod/module.pmod:1251:    //! Other code may produce errors of this type. In that case this    //! name is something that @[Charset.encoder] does not accept    //! (unless it implements exactly the same charset), and it should    //! be reasonably certain that @[Charset.encoder] never accepts that    //! name in the future (unless it is extended to implement exactly    //! the same charset).       protected void create (string err_str, int err_pos, string charset,    void|string reason, void|array bt)    { -  this_program::err_str = err_str; -  this_program::err_pos = err_pos; -  this_program::charset = charset; +  this::err_str = err_str; +  this::err_pos = err_pos; +  this::charset = charset;    ::create (format_err_msg ("Error encoding",    err_str, err_pos, charset, reason),    bt);    }   }      void encode_error (string err_str, int err_pos, string charset,    void|string reason, void|mixed... args)   //! Throws an @[EncodeError] exception. See @[EncodeError.create] for   //! details about the arguments. If @[args] is given then the error   //! reason is formatted using @expr{sprintf(@[reason], @@@[args])@}.   {    if (sizeof (args)) reason = sprintf (reason, @args);    throw (EncodeError (err_str, err_pos, charset, reason, backtrace()[..<1]));   }