Branch: Tag:

2004-06-30

2004-06-30 13:02:00 by Stefan Wallström <stewa@roxen.com>

Auto decode a lot of more charsets.

Rev: server/base_server/prototypes.pike:1.143
Rev: server/etc/modules/Roxen.pmod:1.183
Rev: server/protocols/ftp.pike:2.101

1:   // This is a roxen pike module. Copyright © 1999 - 2001, Roxen IS.   // - // $Id: Roxen.pmod,v 1.182 2004/06/29 10:57:59 mast Exp $ + // $Id: Roxen.pmod,v 1.183 2004/06/30 13:01:57 stewa Exp $      #include <roxen.h>   #include <config.h>
2261:      static multiset(string) charset_warned_for = (<>);    - constant magic_charset_variable_value = "åäö&#x829f;"; + constant magic_charset_variable_placeholder = "UTF-8"; + constant magic_charset_variable_value = "åäö&#x829f;@" + magic_charset_variable_placeholder;      function get_client_charset_decoder( string åäö, RequestID|void id )    //! Returns a decoder for the clients charset, given the clients
2284:    // Netscape seems to send "?" for characters that can't be represented    // by the current character set while IE encodes those characters    // as entities, while Opera uses "\201" or "?x829f;"... -  string test = replace((åäö/"\0")[0], +  string charset; +  string test = (åäö/"\0")[0]; +  array tmp = test/"@"; +  if(sizeof(tmp)>1) +  charset = tmp[1]; +  test = tmp[0]; +  +  test = replace(test,    ({ "&aring;", "&#229;", "&#xe5;",    "&auml;", "&#228;", "&#xe4;",    "&ouml;", "&#246;", "&#xf6;", -  "&#33439;","&#x829f;", "\201", "?x829f;", -  "\x829f" }), +  "&#33439;","&#x829f;", "\201", "?x829f;" }),    ({ "?", "?", "?",    "?", "?", "?",    "?", "?", "?", -  "?", "?", "?", "?", -  "?" })); +  "?", "?", "?", "?" }));       switch( test ) {    case "edv":
2340:    id && id->set_output_charset && id->set_output_charset( "shift_jis" );    return _charset_decoder(Locale.Charset.decoder("shift_jis"))->decode;    } +  +  // If the actual charset is valid, return a decoder for that charset +  catch { +  function f = _charset_decoder(Locale.Charset.decoder(charset))->decode; +  return f; +  }; +     if (!charset_warned_for[test] && (sizeof(charset_warned_for) < 256)) {    charset_warned_for[test] = 1;    report_warning( "Unable to find charset decoder for %O, vector: %O\n",