pike.git / lib / modules / Sql.pmod / mysql.pike

version» Context lines:

pike.git/lib/modules/Sql.pmod/mysql.pike:1:   /* -  * $Id: mysql.pike,v 1.30 2006/08/22 11:27:08 grubba Exp $ +  * $Id: mysql.pike,v 1.31 2006/09/15 13:08:50 mast Exp $    *    * Glue for the Mysql-module    */      //! Implements the glue needed to access the Mysql-module from the generic   //! SQL module.      #pike __REAL_VERSION__      #if constant(Mysql.mysql)      inherit Mysql.mysql;      #define UNICODE_DECODE_MODE 1 // Unicode decode mode   #define LATIN1_UNICODE_ENCODE_MODE 2 // Unicode encode mode with latin1 charset   #define UTF8_UNICODE_ENCODE_MODE 4 // Unicode encode mode with utf8 charset - #define BINARY_LATIN1_MODE 8 // Don't special-case latin1 control chars +       #ifdef MYSQL_CHARSET_DEBUG   #define CH_DEBUG(X...) werror("Sql.mysql: " + X)   #else   #define CH_DEBUG(X...)   #endif    - // Set to the above if the connection is in utf8-mode. Enable latin1 - // unicode encode mode by default; it should be compatible with - // earlier pike versions. + // Set to the above if the connection is requested to be in one of the + // unicode modes. latin1 unicode encode mode is enabled by default; it + // should be compatible with earlier pike versions.   static int utf8_mode;      // The charset, either "latin1" or "utf8", currently assigned to - // character_set_client and character_set_connection when unicode - // encode mode is enabled. Zero when the connection charset has been - // set to something else than "latin1" or "unicode". + // character_set_client when unicode encode mode is enabled. Zero when + // the connection charset has been set to something else than "latin1" + // or "unicode".   static string send_charset;      static void update_unicode_encode_mode_from_charset (string charset)   {    switch (charset) { // Lowercase assumed.    case "latin1":    utf8_mode |= LATIN1_UNICODE_ENCODE_MODE;    utf8_mode &= ~UTF8_UNICODE_ENCODE_MODE;    send_charset = "latin1"; -  CH_DEBUG("Entering latin1 mode.\n"); +  CH_DEBUG ("Entering latin1 encode mode.\n");    break;    case "unicode":    utf8_mode |= UTF8_UNICODE_ENCODE_MODE;    utf8_mode &= ~LATIN1_UNICODE_ENCODE_MODE;    send_charset = "utf8"; -  CH_DEBUG("Entering utf8 mode.\n"); +  CH_DEBUG ("Entering unicode encode mode.\n");    break;    default:    // Wrong charset - the mode can't be used.    utf8_mode |= LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE;    send_charset = 0; -  CH_DEBUG("Entering other mode.\n"); +  CH_DEBUG ("Not entering latin1/unicode encode mode " +  "due to incompatible charset %O.\n", charset);    break;    }   }      int(0..1) set_unicode_encode_mode (int enable)   //! Enables or disables unicode encode mode.   //!   //! In this mode, if the server supports UTF-8 and the connection   //! charset is @expr{latin1@} (the default) or @expr{unicode@} then   //! @[big_query] handles wide unicode queries. Enabled by default.   //!   //! Unicode encode mode works as follows: Eight bit strings are sent   //! as @expr{latin1@} and wide strings are sent using @expr{utf8@}. - //! @[big_query] sends @expr{SET character_set_client@} and @expr{SET - //! character_set_connection@} statements as necessary to update the - //! charset on the server side. If the server doesn't support that - //! then it fails, but the wide string query would fail anyway. + //! @[big_query] sends @expr{SET character_set_client@} statements as + //! necessary to update the charset on the server side. If the server + //! doesn't support that then it fails, but the wide string query + //! would fail anyway.   //!   //! To make this transparent, string literals with introducers (e.g.   //! @expr{_binary 'foo'@}) are excluded from the UTF-8 encoding. This   //! means that @[big_query] needs to do some superficial parsing of   //! the query when it is a wide string.   //!   //! @returns   //! @int   //! @value 1   //! Unicode encode mode is enabled.
pike.git/lib/modules/Sql.pmod/mysql.pike:98:   //!   //! To fix that, do @expr{@[set_charset]("unicode")@}. That will   //! allow unicode encode mode to work while @expr{utf8@} is fully   //! enabled at the server side.   //!   //! Tip: If you enable @expr{utf8@} on the server side, you need to   //! send raw binary strings as @expr{_binary'...'@}. Otherwise they   //! will get UTF-8 encoded by the server.   //!   //! @note - //! When unicode encode mode is enabled, the connection charset - //! will mirror the client charset. This is necessary for unicode - //! characters to survive for wide queries, and for binary data - //! to survive for narrow queries in a transparent manner. - //! - //! @note +    //! When unicode encode mode is enabled and the connection charset   //! is @expr{latin1@}, the charset accepted by @[big_query] is not   //! quite Unicode since @expr{latin1@} is based on @expr{cp1252@}.   //! The differences are in the range @expr{0x80..0x9f@} where   //! Unicode have control chars.   //!   //! This small discrepancy is not present when the connection   //! charset is @expr{unicode@}.   //!   //! @seealso   //! @[set_unicode_decode_mode], @[set_charset]   { -  if (enable) { -  CH_DEBUG("Enabling unicode encode mode.\n"); +  if (enable)    update_unicode_encode_mode_from_charset (lower_case (get_charset())); -  } else { +  else {    utf8_mode &= ~(LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE);    send_charset = 0;    CH_DEBUG("Disabling unicode encode mode.\n");    }    return !!send_charset;   }      int get_unicode_encode_mode()   //! Returns nonzero if unicode encode mode is enabled, zero otherwise.   //!
pike.git/lib/modules/Sql.pmod/mysql.pike:327:   string quote(string s)   {    return replace(s,    ({ "\\", "\"", "\0", "\'", "\n", "\r" }),    ({ "\\\\", "\\\"", "\\0", "\\\'", "\\n", "\\r" }));   }      string latin1_to_utf8 (string s)   //! Converts a string in MySQL @expr{latin1@} format to UTF-8.   { -  CH_DEBUG("Converting latin1 query to utf8.\n"); +     return string_to_utf8 (replace (s, ([    "\x80": "\u20AC", /*"\x81": "\u0081",*/ "\x82": "\u201A", "\x83": "\u0192",    "\x84": "\u201E", "\x85": "\u2026", "\x86": "\u2020", "\x87": "\u2021",    "\x88": "\u02C6", "\x89": "\u2030", "\x8a": "\u0160", "\x8b": "\u2039",    "\x8c": "\u0152", /*"\x8d": "\u008D",*/ "\x8e": "\u017D", /*"\x8f": "\u008F",*/    /*"\x90": "\u0090",*/ "\x91": "\u2018", "\x92": "\u2019", "\x93": "\u201C",    "\x94": "\u201D", "\x95": "\u2022", "\x96": "\u2013", "\x97": "\u2014",    "\x98": "\u02DC", "\x99": "\u2122", "\x9a": "\u0161", "\x9b": "\u203A",    "\x9c": "\u0153", /*"\x9d": "\u009D",*/ "\x9e": "\u017E", "\x9f": "\u0178",    ])));
pike.git/lib/modules/Sql.pmod/mysql.pike:562:    }   }      #define QUERY_BODY(do_query) \    if (bindings) \    query = .sql_util.emulate_bindings(query,bindings,this); \    \    string restore_charset; \    if (charset) { \    restore_charset = send_charset || get_charset(); \ -  CH_DEBUG("Restore charset is %O.\n", restore_charset); \ +     if (charset != restore_charset) { \ -  ::big_query("SET character_set_client=" + charset); \ -  ::big_query("SET character_set_connection=" + charset); \ +  CH_DEBUG ("Switching charset from %O to %O (due to charset arg).\n", \ +  restore_charset, charset); \ +  ::big_query ("SET character_set_client=" + charset); \ +  /* Can't be changed automatically - has side effects. /mast */ \ +  /* ::big_query("SET character_set_connection=" + charset); */ \    } else \    restore_charset = 0; \    } \    \    else if (send_charset) { \    string new_send_charset; \    \    if (utf8_mode & LATIN1_UNICODE_ENCODE_MODE) { \    if (String.width (query) == 8) \    new_send_charset = "latin1"; \    else { \ -  +  CH_DEBUG ("Converting (mysql-)latin1 query to utf8.\n"); \    query = utf8_encode_query (query, latin1_to_utf8); \    new_send_charset = "utf8"; \    } \    } \    \    else { /* utf8_mode & UTF8_UNICODE_ENCODE_MODE */ \    if (_can_send_as_latin1 (query)) \    new_send_charset = "latin1"; \    else { \ -  +  CH_DEBUG ("Converting query to utf8.\n"); \    query = utf8_encode_query (query, string_to_utf8); \    new_send_charset = "utf8"; \    } \    } \ -  CH_DEBUG("New send charset is %O.\n", new_send_charset); \ +     \    if (new_send_charset != send_charset) { \ -  CH_DEBUG("Send charset was %O.\n", send_charset); \ +  CH_DEBUG ("Switching charset from %O to %O.\n", \ +  send_charset, new_send_charset); \    if (mixed err = catch { \ -  ::big_query("SET character_set_client=" + new_send_charset); \ -  ::big_query("SET character_set_connection=" + \ -  new_send_charset); \ +  ::big_query ("SET character_set_client=" + new_send_charset); \ +  /* Can't be changed automatically - has side effects. /mast */ \ +  /* ::big_query("SET character_set_connection=" + \ +  new_send_charset); */ \    }) { \    if (new_send_charset == "utf8") \    predef::error ("The query is a wide string " \    "and the MySQL server doesn't support UTF-8: %s\n", \ -  describe_error(err)); \ +  describe_error (err)); \    else \ -  throw(err); \ +  throw err; \    } \    send_charset = new_send_charset; \    } \    } \    \ -  CH_DEBUG("Sending query %O.\n", query); \ +  CH_DEBUG ("Sending query with charset %O: %O.\n", \ +  charset || send_charset, query); \    \    int|object res = ::do_query(query); \    \    if (restore_charset) { \    if (send_charset && (<"latin1", "utf8">)[charset]) \    send_charset = charset; \    else { \ -  CH_DEBUG("Restoring charset to %O.\n", restore_charset); \ -  ::big_query("SET character_set_client=" + restore_charset); \ -  ::big_query("SET character_set_connection=" + restore_charset); \ +  CH_DEBUG ("Restoring charset %O.\n", restore_charset); \ +  ::big_query ("SET character_set_client=" + restore_charset); \ +  /* Can't be changed automatically - has side effects. /mast */ \ +  /* ::big_query("SET character_set_connection=" + restore_charset); */ \    } \    } \    \    if (!objectp(res)) return res; \    \    if (utf8_mode & UNICODE_DECODE_MODE) { \ -  CH_DEBUG("Adding UnicodeWrapper.\n"); \ +  CH_DEBUG ("Using UnicodeWrapper for result.\n"); \    return .sql_util.UnicodeWrapper(res); \    } \    return res;      Mysql.mysql_result big_query (string query,    mapping(string|int:mixed)|void bindings,    void|string charset)   //! Sends a query to the server.   //!   //! @param query