pike.git
/
lib
/
modules
/
Sql.pmod
/
mysql.pike
version
»
Context lines:
10
20
40
80
file
none
3
pike.git/lib/modules/Sql.pmod/mysql.pike:1:
/*
-
* $Id: mysql.pike,v 1.
30
2006/
08
/
22
11
:
27:
08
grubba
Exp $
+
* $Id: mysql.pike,v 1.
31
2006/
09
/
15
13
:
08
:
50
mast
Exp $
* * Glue for the Mysql-module */ //! Implements the glue needed to access the Mysql-module from the generic //! SQL module. #pike __REAL_VERSION__ #if constant(Mysql.mysql) inherit Mysql.mysql; #define UNICODE_DECODE_MODE 1 // Unicode decode mode #define LATIN1_UNICODE_ENCODE_MODE 2 // Unicode encode mode with latin1 charset #define UTF8_UNICODE_ENCODE_MODE 4 // Unicode encode mode with utf8 charset
-
#define BINARY_LATIN1_MODE 8 // Don't special-case latin1 control chars
+
#ifdef MYSQL_CHARSET_DEBUG #define CH_DEBUG(X...) werror("Sql.mysql: " + X) #else #define CH_DEBUG(X...) #endif
-
// Set to the above if the connection is in
utf8-mode.
Enable
latin1
-
// unicode encode mode by default; it should be compatible with
-
//
earlier pike versions.
+
// Set to the above if the connection is
requested to be
in
one
of
the
+
// unicode
modes. latin1 unicode
encode mode
is enabled
by default; it
+
//
should be compatible with earlier pike versions.
static int utf8_mode; // The charset, either "latin1" or "utf8", currently assigned to
-
// character_set_client
and character_set_connection
when unicode
-
//
encode mode is enabled. Zero when the connection charset has been
-
//
set to something else than "latin1" or "unicode".
+
// character_set_client when unicode encode mode is enabled. Zero when
+
//
the connection charset has been set to something else than "latin1"
+
//
or "unicode".
static string send_charset; static void update_unicode_encode_mode_from_charset (string charset) { switch (charset) { // Lowercase assumed. case "latin1": utf8_mode |= LATIN1_UNICODE_ENCODE_MODE; utf8_mode &= ~UTF8_UNICODE_ENCODE_MODE; send_charset = "latin1";
-
CH_DEBUG("Entering latin1 mode.\n");
+
CH_DEBUG
("Entering latin1
encode
mode.\n");
break; case "unicode": utf8_mode |= UTF8_UNICODE_ENCODE_MODE; utf8_mode &= ~LATIN1_UNICODE_ENCODE_MODE; send_charset = "utf8";
-
CH_DEBUG("Entering
utf8
mode.\n");
+
CH_DEBUG
("Entering
unicode
encode
mode.\n");
break; default: // Wrong charset - the mode can't be used. utf8_mode |= LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE; send_charset = 0;
-
CH_DEBUG("
Entering
other
mode.\n");
+
CH_DEBUG
("
Not
entering
latin1/unicode encode
mode
"
+
"due to incompatible charset %O
.\n"
, charset
);
break; } } int(0..1) set_unicode_encode_mode (int enable) //! Enables or disables unicode encode mode. //! //! In this mode, if the server supports UTF-8 and the connection //! charset is @expr{latin1@} (the default) or @expr{unicode@} then //! @[big_query] handles wide unicode queries. Enabled by default. //! //! Unicode encode mode works as follows: Eight bit strings are sent //! as @expr{latin1@} and wide strings are sent using @expr{utf8@}.
-
//! @[big_query] sends @expr{SET character_set_client@}
and
@expr{SET
-
//!
character_set_connection@} statements as
necessary to update the
-
//!
charset on the server side. If the server doesn't support that
-
//!
then it fails, but the wide string query would fail anyway.
+
//! @[big_query] sends @expr{SET character_set_client@}
statements
as
+
//! necessary to update the charset on the server side. If the server
+
//!
doesn't support that then it fails, but the wide string query
+
//!
would fail anyway.
//! //! To make this transparent, string literals with introducers (e.g. //! @expr{_binary 'foo'@}) are excluded from the UTF-8 encoding. This //! means that @[big_query] needs to do some superficial parsing of //! the query when it is a wide string. //! //! @returns //! @int //! @value 1 //! Unicode encode mode is enabled.
pike.git/lib/modules/Sql.pmod/mysql.pike:98:
//! //! To fix that, do @expr{@[set_charset]("unicode")@}. That will //! allow unicode encode mode to work while @expr{utf8@} is fully //! enabled at the server side. //! //! Tip: If you enable @expr{utf8@} on the server side, you need to //! send raw binary strings as @expr{_binary'...'@}. Otherwise they //! will get UTF-8 encoded by the server. //! //! @note
-
//! When unicode encode mode is enabled, the connection charset
-
//! will mirror the client charset. This is necessary for unicode
-
//! characters to survive for wide queries, and for binary data
-
//! to survive for narrow queries in a transparent manner.
-
//!
-
//! @note
+
//! When unicode encode mode is enabled and the connection charset //! is @expr{latin1@}, the charset accepted by @[big_query] is not //! quite Unicode since @expr{latin1@} is based on @expr{cp1252@}. //! The differences are in the range @expr{0x80..0x9f@} where //! Unicode have control chars. //! //! This small discrepancy is not present when the connection //! charset is @expr{unicode@}. //! //! @seealso //! @[set_unicode_decode_mode], @[set_charset] {
-
if (enable)
{
-
CH_DEBUG("Enabling unicode encode mode.\n");
+
if (enable)
update_unicode_encode_mode_from_charset (lower_case (get_charset()));
-
}
else {
+
else {
utf8_mode &= ~(LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE); send_charset = 0; CH_DEBUG("Disabling unicode encode mode.\n"); } return !!send_charset; } int get_unicode_encode_mode() //! Returns nonzero if unicode encode mode is enabled, zero otherwise. //!
pike.git/lib/modules/Sql.pmod/mysql.pike:327:
string quote(string s) { return replace(s, ({ "\\", "\"", "\0", "\'", "\n", "\r" }), ({ "\\\\", "\\\"", "\\0", "\\\'", "\\n", "\\r" })); } string latin1_to_utf8 (string s) //! Converts a string in MySQL @expr{latin1@} format to UTF-8. {
-
CH_DEBUG("Converting latin1 query to utf8.\n");
+
return string_to_utf8 (replace (s, ([ "\x80": "\u20AC", /*"\x81": "\u0081",*/ "\x82": "\u201A", "\x83": "\u0192", "\x84": "\u201E", "\x85": "\u2026", "\x86": "\u2020", "\x87": "\u2021", "\x88": "\u02C6", "\x89": "\u2030", "\x8a": "\u0160", "\x8b": "\u2039", "\x8c": "\u0152", /*"\x8d": "\u008D",*/ "\x8e": "\u017D", /*"\x8f": "\u008F",*/ /*"\x90": "\u0090",*/ "\x91": "\u2018", "\x92": "\u2019", "\x93": "\u201C", "\x94": "\u201D", "\x95": "\u2022", "\x96": "\u2013", "\x97": "\u2014", "\x98": "\u02DC", "\x99": "\u2122", "\x9a": "\u0161", "\x9b": "\u203A", "\x9c": "\u0153", /*"\x9d": "\u009D",*/ "\x9e": "\u017E", "\x9f": "\u0178", ])));
pike.git/lib/modules/Sql.pmod/mysql.pike:562:
} } #define QUERY_BODY(do_query) \ if (bindings) \ query = .sql_util.emulate_bindings(query,bindings,this); \ \ string restore_charset; \ if (charset) { \ restore_charset = send_charset || get_charset(); \
-
CH_DEBUG("Restore charset is %O.\n", restore_charset); \
+
if (charset != restore_charset) { \
-
::big_query("SET character_set_client=" + charset); \
-
::big_query("SET character_set_connection=" + charset);
\
+
CH_DEBUG ("Switching charset from %O to %O (due to charset arg).\n", \
+
restore_charset, charset); \
+
::big_query
("SET character_set_client=" + charset);
\
+
/* Can't be changed automatically - has side effects. /mast */
\
+
/*
::big_query("SET character_set_connection=" + charset);
*/
\
} else \ restore_charset = 0; \ } \ \ else if (send_charset) { \ string new_send_charset; \ \ if (utf8_mode & LATIN1_UNICODE_ENCODE_MODE) { \ if (String.width (query) == 8) \ new_send_charset = "latin1"; \ else { \
-
+
CH_DEBUG ("Converting (mysql-)latin1 query to utf8.\n"); \
query = utf8_encode_query (query, latin1_to_utf8); \ new_send_charset = "utf8"; \ } \ } \ \ else { /* utf8_mode & UTF8_UNICODE_ENCODE_MODE */ \ if (_can_send_as_latin1 (query)) \ new_send_charset = "latin1"; \ else { \
-
+
CH_DEBUG ("Converting query to utf8.\n"); \
query = utf8_encode_query (query, string_to_utf8); \ new_send_charset = "utf8"; \ } \ } \
-
CH_DEBUG("New send charset is %O.\n", new_send_charset); \
+
\ if (new_send_charset != send_charset) { \
-
CH_DEBUG("
Send
charset
was
%O.\n", send_charset); \
+
CH_DEBUG
("
Switching
charset
from
%O
to %O
.\n",
\
+
send_charset
, new_send_charset
);
\
if (mixed err = catch { \
-
::big_query("SET character_set_client=" + new_send_charset); \
-
::big_query("SET character_set_connection=" +
\
-
new_send_charset); \
+
::big_query
("SET character_set_client=" + new_send_charset);
\
+
/* Can't be changed automatically - has side effects. /mast */
\
+
/*
::big_query("SET character_set_connection=" + \
+
new_send_charset);
*/
\
}) { \ if (new_send_charset == "utf8") \ predef::error ("The query is a wide string " \ "and the MySQL server doesn't support UTF-8: %s\n", \
-
describe_error(err));
\
+
describe_error
(err)); \
else \
-
throw
(
err
)
; \
+
throw
err;
\
} \ send_charset = new_send_charset; \ } \ } \ \
-
CH_DEBUG("Sending query %O.\n",
query);
\
+
CH_DEBUG
("Sending query
with charset
%O
: %O
.\n",
\
+
charset || send_charset,
query); \
\ int|object res = ::do_query(query); \ \ if (restore_charset) { \ if (send_charset && (<"latin1", "utf8">)[charset]) \ send_charset = charset; \ else { \
-
CH_DEBUG("Restoring charset
to
%O.\n", restore_charset); \
-
::big_query("SET character_set_client=" + restore_charset); \
-
::big_query("SET character_set_connection=" + restore_charset);
\
+
CH_DEBUG
("Restoring charset %O.\n", restore_charset);
\
+
::big_query
("SET character_set_client=" + restore_charset);
\
+
/* Can't be changed automatically - has side effects. /mast */
\
+
/*
::big_query("SET character_set_connection=" + restore_charset);
*/
\
} \ } \ \ if (!objectp(res)) return res; \ \ if (utf8_mode & UNICODE_DECODE_MODE) { \
-
CH_DEBUG("
Adding
UnicodeWrapper.\n");
\
+
CH_DEBUG
("
Using
UnicodeWrapper
for result
.\n"); \
return .sql_util.UnicodeWrapper(res); \ } \ return res; Mysql.mysql_result big_query (string query, mapping(string|int:mixed)|void bindings, void|string charset) //! Sends a query to the server. //! //! @param query