pike.git
/
lib
/
modules
/
Sql.pmod
/
mysql.pike
version
»
Context lines:
10
20
40
80
file
none
3
pike.git/lib/modules/Sql.pmod/mysql.pike:1:
/*
-
* $Id: mysql.pike,v 1.
29
2006/08/
15
14
:
50
:
39
grubba Exp $
+
* $Id: mysql.pike,v 1.
30
2006/08/
22
11
:
27
:
08
grubba Exp $
* * Glue for the Mysql-module */ //! Implements the glue needed to access the Mysql-module from the generic //! SQL module. #pike __REAL_VERSION__ #if constant(Mysql.mysql) inherit Mysql.mysql; #define UNICODE_DECODE_MODE 1 // Unicode decode mode #define LATIN1_UNICODE_ENCODE_MODE 2 // Unicode encode mode with latin1 charset #define UTF8_UNICODE_ENCODE_MODE 4 // Unicode encode mode with utf8 charset
-
+
#define BINARY_LATIN1_MODE 8 // Don't special-case latin1 control chars
-
+
#ifdef MYSQL_CHARSET_DEBUG
+
#define CH_DEBUG(X...) werror("Sql.mysql: " + X)
+
#else
+
#define CH_DEBUG(X...)
+
#endif
+
// Set to the above if the connection is in utf8-mode. Enable latin1 // unicode encode mode by default; it should be compatible with // earlier pike versions. static int utf8_mode; // The charset, either "latin1" or "utf8", currently assigned to
-
// character_set_client when unicode encode mode is enabled. Zero when
-
//
the connection charset has been set to something else than "latin1"
-
//
or "unicode".
+
// character_set_client
and character_set_connection
when unicode
+
//
encode mode is enabled. Zero when the connection charset has been
+
//
set to something else than "latin1" or "unicode".
static string send_charset; static void update_unicode_encode_mode_from_charset (string charset) { switch (charset) { // Lowercase assumed. case "latin1": utf8_mode |= LATIN1_UNICODE_ENCODE_MODE; utf8_mode &= ~UTF8_UNICODE_ENCODE_MODE; send_charset = "latin1";
-
+
CH_DEBUG("Entering latin1 mode.\n");
break; case "unicode": utf8_mode |= UTF8_UNICODE_ENCODE_MODE; utf8_mode &= ~LATIN1_UNICODE_ENCODE_MODE; send_charset = "utf8";
-
+
CH_DEBUG("Entering utf8 mode.\n");
break; default: // Wrong charset - the mode can't be used. utf8_mode |= LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE; send_charset = 0;
-
+
CH_DEBUG("Entering other mode.\n");
break; } } int(0..1) set_unicode_encode_mode (int enable) //! Enables or disables unicode encode mode. //! //! In this mode, if the server supports UTF-8 and the connection //! charset is @expr{latin1@} (the default) or @expr{unicode@} then //! @[big_query] handles wide unicode queries. Enabled by default. //! //! Unicode encode mode works as follows: Eight bit strings are sent //! as @expr{latin1@} and wide strings are sent using @expr{utf8@}.
-
//! @[big_query] sends @expr{SET character_set_client@}
statements
as
-
//! necessary to update the charset on the server side. If the server
-
//!
doesn't support that then it fails, but the wide string query
-
//!
would fail anyway.
+
//! @[big_query] sends @expr{SET character_set_client@}
and
@expr{SET
+
//!
character_set_connection@} statements as
necessary to update the
+
//!
charset on the server side. If the server doesn't support that
+
//!
then it fails, but the wide string query would fail anyway.
//! //! To make this transparent, string literals with introducers (e.g. //! @expr{_binary 'foo'@}) are excluded from the UTF-8 encoding. This //! means that @[big_query] needs to do some superficial parsing of //! the query when it is a wide string. //! //! @returns //! @int //! @value 1 //! Unicode encode mode is enabled.
pike.git/lib/modules/Sql.pmod/mysql.pike:88:
//! //! To fix that, do @expr{@[set_charset]("unicode")@}. That will //! allow unicode encode mode to work while @expr{utf8@} is fully //! enabled at the server side. //! //! Tip: If you enable @expr{utf8@} on the server side, you need to //! send raw binary strings as @expr{_binary'...'@}. Otherwise they //! will get UTF-8 encoded by the server. //! //! @note
+
//! When unicode encode mode is enabled, the connection charset
+
//! will mirror the client charset. This is necessary for unicode
+
//! characters to survive for wide queries, and for binary data
+
//! to survive for narrow queries in a transparent manner.
+
//!
+
//! @note
//! When unicode encode mode is enabled and the connection charset //! is @expr{latin1@}, the charset accepted by @[big_query] is not //! quite Unicode since @expr{latin1@} is based on @expr{cp1252@}. //! The differences are in the range @expr{0x80..0x9f@} where //! Unicode have control chars. //! //! This small discrepancy is not present when the connection //! charset is @expr{unicode@}. //! //! @seealso //! @[set_unicode_decode_mode], @[set_charset] {
-
if (enable)
+
if (enable)
{
+
CH_DEBUG("Enabling unicode encode mode.\n");
update_unicode_encode_mode_from_charset (lower_case (get_charset()));
-
else {
+
}
else {
utf8_mode &= ~(LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE); send_charset = 0;
-
+
CH_DEBUG("Disabling unicode encode mode.\n");
} return !!send_charset; } int get_unicode_encode_mode() //! Returns nonzero if unicode encode mode is enabled, zero otherwise. //! //! @seealso //! @[set_unicode_encode_mode] {
pike.git/lib/modules/Sql.pmod/mysql.pike:147:
//! //! @note //! This mode is not compatible with earlier pike versions. You need //! to run in compatibility mode <= 7.6 to have it disabled by //! default. //! //! @seealso //! @[set_unicode_encode_mode] { if (enable) {
+
CH_DEBUG("Enabling unicode decode mode.\n");
::big_query ("SET character_set_results = utf8"); utf8_mode |= UNICODE_DECODE_MODE; } else {
-
+
CH_DEBUG("Disabling unicode decode mode.\n");
::big_query ("SET character_set_results = " + get_charset()); utf8_mode &= ~UNICODE_DECODE_MODE; } } int get_unicode_decode_mode() //! Returns nonzero if unicode decode mode is enabled, zero otherwise. //! //! @seealso //! @[set_unicode_decode_mode]
pike.git/lib/modules/Sql.pmod/mysql.pike:233:
//! //! You can use the @expr{mysql-latin1@} encoding in the //! @[Locale.Charset] module to do conversions, or just use the //! special @expr{"unicode"@} charset instead. //! //! @seealso //! @[get_charset], @[set_unicode_encode_mode], @[set_unicode_decode_mode] { charset = lower_case (charset);
+
CH_DEBUG("Setting charset to %O.\n", charset);
+
::set_charset (charset == "unicode" ? "utf8" : charset); if (charset == "unicode" || utf8_mode & (LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE)) update_unicode_encode_mode_from_charset (charset); if (charset == "unicode") utf8_mode |= UNICODE_DECODE_MODE; else if (utf8_mode & UNICODE_DECODE_MODE && charset != "utf8") // This setting has been overridden by ::set_charset, so we need
pike.git/lib/modules/Sql.pmod/mysql.pike:305:
string quote(string s) { return replace(s, ({ "\\", "\"", "\0", "\'", "\n", "\r" }), ({ "\\\\", "\\\"", "\\0", "\\\'", "\\n", "\\r" })); } string latin1_to_utf8 (string s) //! Converts a string in MySQL @expr{latin1@} format to UTF-8. {
+
CH_DEBUG("Converting latin1 query to utf8.\n");
return string_to_utf8 (replace (s, ([ "\x80": "\u20AC", /*"\x81": "\u0081",*/ "\x82": "\u201A", "\x83": "\u0192", "\x84": "\u201E", "\x85": "\u2026", "\x86": "\u2020", "\x87": "\u2021", "\x88": "\u02C6", "\x89": "\u2030", "\x8a": "\u0160", "\x8b": "\u2039", "\x8c": "\u0152", /*"\x8d": "\u008D",*/ "\x8e": "\u017D", /*"\x8f": "\u008F",*/ /*"\x90": "\u0090",*/ "\x91": "\u2018", "\x92": "\u2019", "\x93": "\u201C", "\x94": "\u201D", "\x95": "\u2022", "\x96": "\u2013", "\x97": "\u2014", "\x98": "\u02DC", "\x99": "\u2122", "\x9a": "\u0161", "\x9b": "\u203A", "\x9c": "\u0153", /*"\x9d": "\u009D",*/ "\x9e": "\u017E", "\x9f": "\u0178", ])));
pike.git/lib/modules/Sql.pmod/mysql.pike:539:
} } #define QUERY_BODY(do_query) \ if (bindings) \ query = .sql_util.emulate_bindings(query,bindings,this); \ \ string restore_charset; \ if (charset) { \ restore_charset = send_charset || get_charset(); \
-
if (charset != restore_charset)
\
-
::big_query
("SET character_set_client=" + charset); \
-
else
\
+
CH_DEBUG("Restore charset is %O.\n", restore_charset); \
+
if (charset != restore_charset)
{
\
+
::big_query("SET character_set_client=" + charset);
\
+
::big_query("SET
character_set_connection="
+
charset);
\
+
}
else
\
restore_charset = 0; \ } \ \ else if (send_charset) { \ string new_send_charset; \ \ if (utf8_mode & LATIN1_UNICODE_ENCODE_MODE) { \ if (String.width (query) == 8) \ new_send_charset = "latin1"; \ else { \
pike.git/lib/modules/Sql.pmod/mysql.pike:565:
} \ \ else { /* utf8_mode & UTF8_UNICODE_ENCODE_MODE */ \ if (_can_send_as_latin1 (query)) \ new_send_charset = "latin1"; \ else { \ query = utf8_encode_query (query, string_to_utf8); \ new_send_charset = "utf8"; \ } \ } \
+
CH_DEBUG("New send charset is %O.\n", new_send_charset); \
\ if (new_send_charset != send_charset) { \
-
if (mixed err = \
-
::big_query
("SET character_set_
client
=" + new_send_charset)) { \
+
CH_DEBUG("Send charset was %O.\n", send_charset); \
+
if (mixed err =
catch
{
\
+
::big_query("SET character_set_client=" + new_send_charset);
\
+
::big_query("SET character_set_
connection
=" +
\
+
new_send_charset)
; \
+
}
) {
\
if (new_send_charset == "utf8") \ predef::error ("The query is a wide string " \ "and the MySQL server doesn't support UTF-8: %s\n", \
-
describe_error
(err)); \
+
describe_error(err));
\
else \ throw(err); \ } \ send_charset = new_send_charset; \ } \ } \ \
-
+
CH_DEBUG("Sending query %O.\n", query); \
+
\
int|object res = ::do_query(query); \ \ if (restore_charset) { \ if (send_charset && (<"latin1", "utf8">)[charset]) \ send_charset = charset; \
-
else \
-
::big_query
("SET character_set_client=" + restore_charset); \
+
else
{
\
+
CH_DEBUG("Restoring charset to %O.\n", restore_charset);
\
+
::big_query("SET character_set_client=" + restore_charset);
\
+
::big_query("SET character_set_connection=" + restore_charset); \
} \
-
+
} \
\ if (!objectp(res)) return res; \ \ if (utf8_mode & UNICODE_DECODE_MODE) { \
-
+
CH_DEBUG("Adding UnicodeWrapper.\n"); \
return .sql_util.UnicodeWrapper(res); \ } \ return res; Mysql.mysql_result big_query (string query, mapping(string|int:mixed)|void bindings, void|string charset) //! Sends a query to the server. //! //! @param query