Branch: Tag:

2006-08-22

2006-08-22 11:27:08 by Henrik Grubbström (Grubba) <grubba@grubba.org>

Bugfix for Mysql 4.0 and earlier.
Added MYSQL_CHARSET_DEBUG.
Let character_set_connection mirror character_set_client to get reasonable behaviour for both narrow and wide queries.

Rev: lib/modules/Sql.pmod/mysql.pike:1.30

1:   /* -  * $Id: mysql.pike,v 1.29 2006/08/15 14:50:39 grubba Exp $ +  * $Id: mysql.pike,v 1.30 2006/08/22 11:27:08 grubba Exp $    *    * Glue for the Mysql-module    */
16:   #define UNICODE_DECODE_MODE 1 // Unicode decode mode   #define LATIN1_UNICODE_ENCODE_MODE 2 // Unicode encode mode with latin1 charset   #define UTF8_UNICODE_ENCODE_MODE 4 // Unicode encode mode with utf8 charset + #define BINARY_LATIN1_MODE 8 // Don't special-case latin1 control chars    -  + #ifdef MYSQL_CHARSET_DEBUG + #define CH_DEBUG(X...) werror("Sql.mysql: " + X) + #else + #define CH_DEBUG(X...) + #endif +    // Set to the above if the connection is in utf8-mode. Enable latin1   // unicode encode mode by default; it should be compatible with   // earlier pike versions.   static int utf8_mode;      // The charset, either "latin1" or "utf8", currently assigned to - // character_set_client when unicode encode mode is enabled. Zero when - // the connection charset has been set to something else than "latin1" - // or "unicode". + // character_set_client and character_set_connection when unicode + // encode mode is enabled. Zero when the connection charset has been + // set to something else than "latin1" or "unicode".   static string send_charset;      static void update_unicode_encode_mode_from_charset (string charset)
35:    utf8_mode |= LATIN1_UNICODE_ENCODE_MODE;    utf8_mode &= ~UTF8_UNICODE_ENCODE_MODE;    send_charset = "latin1"; +  CH_DEBUG("Entering latin1 mode.\n");    break;    case "unicode":    utf8_mode |= UTF8_UNICODE_ENCODE_MODE;    utf8_mode &= ~LATIN1_UNICODE_ENCODE_MODE;    send_charset = "utf8"; -  +  CH_DEBUG("Entering utf8 mode.\n");    break;    default:    // Wrong charset - the mode can't be used.    utf8_mode |= LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE;    send_charset = 0; -  +  CH_DEBUG("Entering other mode.\n");    break;    }   }
58:   //!   //! Unicode encode mode works as follows: Eight bit strings are sent   //! as @expr{latin1@} and wide strings are sent using @expr{utf8@}. - //! @[big_query] sends @expr{SET character_set_client@} statements as - //! necessary to update the charset on the server side. If the server - //! doesn't support that then it fails, but the wide string query - //! would fail anyway. + //! @[big_query] sends @expr{SET character_set_client@} and @expr{SET + //! character_set_connection@} statements as necessary to update the + //! charset on the server side. If the server doesn't support that + //! then it fails, but the wide string query would fail anyway.   //!   //! To make this transparent, string literals with introducers (e.g.   //! @expr{_binary 'foo'@}) are excluded from the UTF-8 encoding. This
95:   //! will get UTF-8 encoded by the server.   //!   //! @note + //! When unicode encode mode is enabled, the connection charset + //! will mirror the client charset. This is necessary for unicode + //! characters to survive for wide queries, and for binary data + //! to survive for narrow queries in a transparent manner. + //! + //! @note   //! When unicode encode mode is enabled and the connection charset   //! is @expr{latin1@}, the charset accepted by @[big_query] is not   //! quite Unicode since @expr{latin1@} is based on @expr{cp1252@}.
107:   //! @seealso   //! @[set_unicode_decode_mode], @[set_charset]   { -  if (enable) +  if (enable) { +  CH_DEBUG("Enabling unicode encode mode.\n");    update_unicode_encode_mode_from_charset (lower_case (get_charset())); -  else { +  } else {    utf8_mode &= ~(LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE);    send_charset = 0; -  +  CH_DEBUG("Disabling unicode encode mode.\n");    }    return !!send_charset;   }
154:   //! @[set_unicode_encode_mode]   {    if (enable) { +  CH_DEBUG("Enabling unicode decode mode.\n");    ::big_query ("SET character_set_results = utf8");    utf8_mode |= UNICODE_DECODE_MODE;    }    else { -  +  CH_DEBUG("Disabling unicode decode mode.\n");    ::big_query ("SET character_set_results = " + get_charset());    utf8_mode &= ~UNICODE_DECODE_MODE;    }
240:   {    charset = lower_case (charset);    +  CH_DEBUG("Setting charset to %O.\n", charset); +     ::set_charset (charset == "unicode" ? "utf8" : charset);       if (charset == "unicode" ||
312:   string latin1_to_utf8 (string s)   //! Converts a string in MySQL @expr{latin1@} format to UTF-8.   { +  CH_DEBUG("Converting latin1 query to utf8.\n");    return string_to_utf8 (replace (s, ([    "\x80": "\u20AC", /*"\x81": "\u0081",*/ "\x82": "\u201A", "\x83": "\u0192",    "\x84": "\u201E", "\x85": "\u2026", "\x86": "\u2020", "\x87": "\u2021",
546:    string restore_charset; \    if (charset) { \    restore_charset = send_charset || get_charset(); \ -  if (charset != restore_charset) \ -  ::big_query ("SET character_set_client=" + charset); \ -  else \ +  CH_DEBUG("Restore charset is %O.\n", restore_charset); \ +  if (charset != restore_charset) { \ +  ::big_query("SET character_set_client=" + charset); \ +  ::big_query("SET character_set_connection=" + charset); \ +  } else \    restore_charset = 0; \    } \    \
572:    new_send_charset = "utf8"; \    } \    } \ +  CH_DEBUG("New send charset is %O.\n", new_send_charset); \    \    if (new_send_charset != send_charset) { \ -  if (mixed err = \ -  ::big_query ("SET character_set_client=" + new_send_charset)) { \ +  CH_DEBUG("Send charset was %O.\n", send_charset); \ +  if (mixed err = catch { \ +  ::big_query("SET character_set_client=" + new_send_charset); \ +  ::big_query("SET character_set_connection=" + \ +  new_send_charset); \ +  }) { \    if (new_send_charset == "utf8") \    predef::error ("The query is a wide string " \    "and the MySQL server doesn't support UTF-8: %s\n", \ -  describe_error (err)); \ +  describe_error(err)); \    else \    throw(err); \    } \
587:    } \    } \    \ +  CH_DEBUG("Sending query %O.\n", query); \ +  \    int|object res = ::do_query(query); \    \    if (restore_charset) { \    if (send_charset && (<"latin1", "utf8">)[charset]) \    send_charset = charset; \ -  else \ -  ::big_query ("SET character_set_client=" + restore_charset); \ +  else { \ +  CH_DEBUG("Restoring charset to %O.\n", restore_charset); \ +  ::big_query("SET character_set_client=" + restore_charset); \ +  ::big_query("SET character_set_connection=" + restore_charset); \    } \ -  +  } \    \    if (!objectp(res)) return res; \    \    if (utf8_mode & UNICODE_DECODE_MODE) { \ -  +  CH_DEBUG("Adding UnicodeWrapper.\n"); \    return .sql_util.UnicodeWrapper(res); \    } \    return res;