2006-08-12
2006-08-12 02:57:55 by Martin Stjernholm <mast@lysator.liu.se>
-
aa16ebe49d3f921d40df1f15b8c063ba50fc24fa
(599 lines)
(+433/-166)
[
Show
| Annotate
]
Branch: 7.9
Charset and unicode support, mk2. MySQL server 4.1 or later is required to
make use of this, but the client lib can still be from the darker stone age.
Warning: Not tested with a pre-4.1 server.
Rev: lib/modules/Sql.pmod/mysql.pike:1.25
Rev: src/modules/Mysql/configure.in:1.51
Rev: src/modules/Mysql/mysql.c:1.98
Rev: src/modules/Mysql/precompiled_mysql.h:1.18
1:
/*
- * $Id: mysql.pike,v 1.24 2006/08/10 19:35:26 mast Exp $
+ * $Id: mysql.pike,v 1.25 2006/08/12 02:57:55 mast Exp $
*
* Glue for the Mysql-module
*/
13: Inside #if constant(Mysql.mysql)
inherit Mysql.mysql;
- #define UTF8_DECODE_QUERY 1
- #define UTF8_ENCODE_QUERY 2
+ #define UNICODE_DECODE_MODE 1 // Unicode decode mode
+ #define LATIN1_UNICODE_ENCODE_MODE 2 // Unicode encode mode with latin1 charset
+ #define UTF8_UNICODE_ENCODE_MODE 4 // Unicode encode mode with utf8 charset
- //! Set to the above if the connection is in utf8-mode.
+ // Set to the above if the connection is in utf8-mode. Enable latin1
+ // unicode encode mode by default; it should be compatible with
+ // earlier pike versions.
static int utf8_mode;
- //! The charset passed with the @expr{mysql_charset_name@} option.
- static string initial_charset;
+ // The charset, either "latin1" or "utf8", currently assigned to
+ // character_set_client when unicode encode mode is enabled. Zero when
+ // the connection charset has been set to something else than "latin1"
+ // or "unicode".
+ static string send_charset;
- //! Enter unicode encode/decode mode.
+ static void update_unicode_encode_mode_from_charset (string charset)
+ {
+ switch (charset) { // Lowercase assumed.
+ case "latin1":
+ utf8_mode |= LATIN1_UNICODE_ENCODE_MODE;
+ utf8_mode &= ~UTF8_UNICODE_ENCODE_MODE;
+ send_charset = "latin1";
+ break;
+ case "unicode":
+ utf8_mode |= UTF8_UNICODE_ENCODE_MODE;
+ utf8_mode &= ~LATIN1_UNICODE_ENCODE_MODE;
+ send_charset = "utf8";
+ break;
+ default:
+ // Wrong charset - the mode can't be used.
+ utf8_mode |= LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE;
+ send_charset = 0;
+ break;
+ }
+ werror ("utf8_mode %x, send_charset %O\n", utf8_mode, send_charset);
+ }
+
+ int(0..1) set_unicode_encode_mode (int enable)
+ //! Enables or disables unicode encode mode.
//!
- //! After this has been enabled, query-strings may be provided
- //! as wide (Unicode) strings, and any non-binary data will be
- //! decoded automatically according to UTF8.
+ //! In this mode, if the server supports UTF-8 and the connection
+ //! charset is @expr{latin1@} (the default) or @expr{unicode@} then
+ //! @[big_query] handles wide unicode queries. Enabled by default.
//!
- //! The statement "@expr{SET NAMES 'utf8'@}" is sent to enable UTF8
- //! mode for the connection.
+ //! Unicode encode mode works as follows: Eight bit strings are sent
+ //! as @expr{latin1@} and wide strings are sent using @expr{utf8@}.
+ //! @[big_query] sends @expr{SET character_set_client@} statements as
+ //! necessary to update the charset on the server side. If the server
+ //! doesn't support that then it fails, but the wide string query
+ //! would fail anyway.
//!
- //! @param force
- //! If this optional flag is nonzero then the statement to enable
- //! UTF8 mode is sent even if this mode already is enabled according
- //! to the internal flags.
+ //! To make this transparent, string literals with introducers (e.g.
+ //! @expr{_binary 'foo'@}) are excluded from the UTF-8 encoding. This
+ //! means that @[big_query] needs to do some superficial parsing of
+ //! the query when it is a wide string.
//!
//! @returns
- //! Returns @expr{1@} on success or @expr{0@} if the server doesn't
- //! support unicode (i.e. if the statement to enable UTF8 mode
- //! fails).
+ //! @int
+ //! @value 1
+ //! Unicode encode mode is enabled.
+ //! @value 0
+ //! Unicode encode mode couldn't be enabled because an
+ //! incompatible connection charset is set. You need to do
+ //! @expr{@[set_charset]("latin1")@} or
+ //! @expr{@[set_charset]("unicode")@} to enable it.
+ //! @endint
//!
//! @note
- //! Literal strings prefixed by the keyword @tt{BINARY@} will not be
- //! encoded using UTF8.
+ //! Note that this mode doesn't affect the MySQL system variable
+ //! @expr{character_set_connection@}, i.e. it will still be set to
+ //! @expr{latin1@} by default which means server functions like
+ //! @expr{UPPER()@} won't handle non-@expr{latin1@} characters
+ //! correctly in all cases.
//!
-
+ //! To fix that, do @expr{@[set_charset]("unicode")@}. That will
+ //! allow unicode encode mode to work while @expr{utf8@} is fully
+ //! enabled at the server side.
+ //!
+ //! Tip: If you enable @expr{utf8@} on the server side, you need to
+ //! send raw binary strings as @expr{_binary'...'@}. Otherwise they
+ //! will get UTF-8 encoded by the server.
+ //!
//! @note
- //! Unicode support was added in MySQL 4.1.
+ //! When unicode encode mode is enabled and the connection charset
+ //! is @expr{latin1@}, the charset accepted by @[big_query] is not
+ //! quite Unicode since @expr{latin1@} is based on @expr{cp1252@}.
+ //! The differences are in the range @expr{0x80..0x9f@} where
+ //! Unicode have control chars.
//!
-
+ //! This small discrepancy is not present when the connection
+ //! charset is @expr{unicode@}.
+ //!
//! @seealso
- //! @[enter_unicode_decode_mode()], @[leave_unicode_mode()]
- int(0..1) enter_unicode_mode (void|int force)
+ //! @[set_unicode_decode_mode], @[set_charset]
{
- if (force || utf8_mode != UTF8_DECODE_QUERY|UTF8_ENCODE_QUERY) {
- if (catch {
- big_query("SET NAMES 'utf8'");
- }) {
- return 0;
+ if (enable)
+ update_unicode_encode_mode_from_charset (lower_case (get_charset()));
+ else {
+ utf8_mode &= ~(LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE);
+ send_charset = 0;
}
- utf8_mode = UTF8_DECODE_QUERY|UTF8_ENCODE_QUERY;
+ return !!send_charset;
}
- return 1;
+
+ int get_unicode_encode_mode()
+ //! Returns nonzero if unicode encode mode is enabled, zero otherwise.
+ //!
+ //! @seealso
+ //! @[set_unicode_encode_mode]
+ {
+ return !!send_charset;
}
- //! Enter unicode decode mode.
+ void set_unicode_decode_mode (int enable)
+ //! Enable or disable unicode decode mode.
//!
- //! After this has been enabled, non-binary data from the database
- //! will be decoded according to UTF8.
+ //! In this mode, if the server supports UTF-8 then non-binary text
+ //! strings in results are are automatically decoded to (possibly
+ //! wide) unicode strings. Not enabled by default.
//!
//! The statement "@expr{SET character_set_results = utf8@}" is sent
- //! to enable UTF8 mode for the returned results.
+ //! to the server to enable the mode. When the mode is disabled,
+ //! "@expr{SET character_set_results = xxx@}" is sent, where
+ //! @expr{xxx@} is the connection charset that @[get_charset] returns.
//!
- //! @param force
- //! If this optional flag is nonzero then the statement to enable
- //! UTF8 encoding of results is sent even though this mode already
- //! is enabled according to the internal flags.
+ //! @param enable
+ //! Nonzero enables this feature, zero disables it.
//!
- //! @returns
- //! Returns @expr{1@} on success or @expr{0@} if the server doesn't
- //! support unicode (i.e. if the statement to enable UTF8 mode
- //! fails).
+ //! @throws
+ //! Throws an exception if the server doesn't support this, i.e. if
+ //! the statement above fails. The MySQL system variable
+ //! @expr{character_set_results@} was added in MySQL 4.1.1.
//!
//! @note
- //! Any query encoding will need to be done by hand.
+ //! This mode is not compatible with earlier pike versions. You need
+ //! to run in compatibility mode <= 7.6 to have it disabled by
+ //! default.
//!
- //! @note
- //! If the connection previously was in full unicode mode as set by
- //! @[enter_unicode_mode] then the server will still expect queries
- //! to be UTF8 encoded. I.e. the server system variable
- //! @expr{character_set_client@} retains the value @expr{'utf8'@}.
- //!
- //! @note
- //! The server system variable @expr{character_set_results@} was
- //! added in MySQL 4.1.1.
- //!
+
//! @seealso
- //! @[enter_unicode_mode()], @[leave_unicode_mode()]
- int(0..1) enter_unicode_decode_mode (void|int force)
+ //! @[set_unicode_encode_mode]
{
- if (force || utf8_mode != UTF8_DECODE_QUERY) {
- if (catch {
- big_query("SET character_set_results = utf8");
- }) {
- return 0;
+ if (enable) {
+ ::big_query ("SET character_set_results = utf8");
+ utf8_mode |= UNICODE_DECODE_MODE;
}
- utf8_mode = UTF8_DECODE_QUERY;
+ else {
+ ::big_query ("SET character_set_results = " + get_charset());
+ utf8_mode &= ~UNICODE_DECODE_MODE;
}
- return 1;
+
}
- //! Leave unicode mode.
+ int get_unicode_decode_mode()
+ //! Returns nonzero if unicode decode mode is enabled, zero otherwise.
//!
- //! After this no automatic UTF8 conversion is done of queries and
- //! results.
+ //! @seealso
+ //! @[set_unicode_decode_mode]
+ {
+ return utf8_mode & UNICODE_DECODE_MODE;
+ }
+
+ void set_charset (string charset)
+ //! Changes the connection charset. Works similar to sending the query
+ //! @expr{SET NAMES @[charset]@} but also records the charset on the
+ //! client side so that various client functions work correctly.
//!
- //! The statement "@expr{SET NAMES 'xxx'@}" is sent to the server,
- //! where @expr{xxx@} is the charset that was passed with the
- //! @expr{mysql_charset_name@} option when the connection was opened.
- //! If that option wasn't specified then the charset @expr{latin1@} is
- //! used, which is the default connection charset in MySQL.
+ //! @[charset] is a MySQL charset name or the special value
+ //! @expr{"unicode"@} (see below). You can use @expr{SHOW CHARACTER
+ //! SET@} to get a list of valid charsets.
//!
- //! @param force
- //! If this optional flag is nonzero then the statement to reset the
- //! connection charset is sent even though unicode mode already is
- //! disabled according to the internal flags.
+ //! Specifying @expr{"unicode"@} as charset is the same as
+ //! @expr{"utf8"@} except that unicode encode and decode modes are
+ //! enabled too. Briefly, this means that you can send queries as
+ //! unencoded unicode strings and will get back non-binary text
+ //! results as unencoded unicode strings. See
+ //! @[set_unicode_encode_mode] and @[set_unicode_decode_mode] for
+ //! further details.
//!
- //! @returns
- //! Returns @expr{1@} on success or @expr{0@} if the server doesn't
- //! support unicode (i.e. if the statement to reset the connection
- //! charset fails).
+ //! @throws
+ //! Throws an exception if the server doesn't support this, i.e. if
+ //! the statement @expr{SET NAMES@} fails. Support for it was added
+ //! in MySQL 4.1.0.
//!
//! @note
- //! Unicode support was added in MySQL 4.1.
+ //! If @[charset] is @expr{"latin1"@} and unicode encode mode is
+ //! enabled (the default) then @[big_query] can send wide unicode
+ //! queries transparently if the server supports UTF-8. See
+ //! @[set_unicode_encode_mode].
//!
-
+ //! @note
+ //! If unicode decode mode is already enabled (see
+ //! @[set_unicode_decode_mode]) then this function won't affect the
+ //! result charset (i.e. the MySQL system variable
+ //! @expr{character_set_results@}).
+ //!
+ //! Actually, a query @expr{SET character_set_results = utf8@} will
+ //! be sent immediately after setting the charset as above if
+ //! unicode decode mode is enabled and @[charset] isn't
+ //! @expr{"utf8"@}.
+ //!
+ //! @note
+ //! You should always use either this function or the
+ //! @expr{"mysql_charset_name"@} option to @[create] to set the
+ //! connection charset, or more specifically the charset that the
+ //! server expects queries to have (i.e. the MySQL system variable
+ //! @expr{character_set_client@}). Otherwise @[big_query] might not
+ //! work correctly.
+ //!
+ //! Afterwards you may change the system variable
+ //! @expr{character_set_connection@}, and also
+ //! @expr{character_set_results@} if unicode decode mode isn't
+ //! enabled.
+ //!
+ //! @note
+ //! The MySQL @expr{latin1@} charset is close to Windows
+ //! @expr{cp1252@}. The difference from ISO-8859-1 is a bunch of
+ //! printable chars in the range @expr{0x80..0x9f@} (which contains
+ //! control chars in ISO-8859-1). For instance, the euro currency
+ //! sign is @expr{0x80@}.
+ //!
+ //! You can use the @expr{mysql-latin1@} encoding in the
+ //! @[Locale.Charset] module to do conversions, or just use the
+ //! special @expr{"unicode"@} charset instead.
+ //!
//! @seealso
- //! @[enter_unicode_mode()], @[enter_unicode_decode_mode()]
- int(0..1) leave_unicode_mode (void|int force)
+ //! @[get_charset], @[set_unicode_encode_mode], @[set_unicode_decode_mode]
{
- if (force || utf8_mode) {
- if (catch {
- big_query("SET NAMES '" + (initial_charset || "latin1") + "'");
- }) {
- return 0;
+ charset = lower_case (charset);
+
+ ::set_charset (charset == "unicode" ? "utf8" : charset);
+
+ if (charset == "unicode" ||
+ utf8_mode & (LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE))
+ update_unicode_encode_mode_from_charset (charset);
+
+ if (charset == "unicode")
+ utf8_mode |= UNICODE_DECODE_MODE;
+ else if (utf8_mode & UNICODE_DECODE_MODE && charset != "utf8")
+ // This setting has been overridden by ::set_charset, so we need
+ // to reinstate it.
+ ::big_query ("SET character_set_results = utf8");
}
- utf8_mode = 0;
- }
- return 1;
- }
+
- string query_unicode_mode()
- //! Returns the current unicode mode status.
+ string get_charset()
+ //! Returns the MySQL name for the current connection charset.
//!
- //! @returns
- //! @string
- //! @value "full"
- //! Full unicode mode as set by @[enter_unicode_mode] is
- //! enabled.
- //! @value "decode"
- //! Decode unicode mode as set by @[enter_unicode_decode_mode]
- //! is enabled.
- //! @value 0
- //! Unicode mode is not enabled. C.f. @[leave_unicode_mode].
- //! @endstring
+ //! Returns @expr{"unicode"@} if unicode encode mode is enabled and
+ //! UTF-8 is used on the server side (i.e. in
+ //! @expr{character_set_connection@}).
+ //!
+ //! @note
+ //! In servers with full charset support (i.e. MySQL 4.1.0 or
+ //! later), this corresponds to the MySQL system variable
+ //! @expr{character_set_client@} (with one exception - see next
+ //! note) and thus controls the charset in which queries are sent.
+ //! The charset used for text strings in results might be something
+ //! else (and typically is if unicode decode mode is enabled; see
+ //! @[set_unicode_decode_mode]).
+ //!
+ //! @note
+ //! If the returned charset is @expr{latin1@} or @expr{unicode@} and
+ //! unicode encode mode is enabled (the default) then
+ //! @expr{character_set_client@} in the server might be either
+ //! @expr{latin1@} or @expr{utf8@}, depending on the last sent
+ //! query. See @[set_unicode_encode_mode] for more info.
+ //!
+ //! @seealso
+ //! @[set_charset]
{
- switch (utf8_mode) {
- case UTF8_DECODE_QUERY|UTF8_ENCODE_QUERY: return "full";
- case UTF8_DECODE_QUERY: return "decode";
- default: return 0;
+ if (utf8_mode & UTF8_UNICODE_ENCODE_MODE && send_charset)
+ return "unicode";
+ return ::get_charset();
}
- }
+
#if constant( Mysql.mysql.MYSQL_NO_ADD_DROP_DB )
// Documented in the C-file.
194:
({ "\\\\", "\\\"", "\\0", "\\\'", "\\n", "\\r" }));
}
- //! Encode the apropriate sections of the query according to UTF8.
- //! ie Those sections that are not strings prefixed by BINARY.
- string utf8_encode_query(string q)
+ string latin1_to_utf8 (string s)
+ //! Converts a string in MySQL @expr{latin1@} format to UTF-8.
{
- string uq = upper_case(q);
- if (!has_value(uq, "BINARY")) return string_to_utf8(q);
- // The following optimization is disabled since it causes more
- // overhead in the case when q contains a large binary string (which
- // is arguably the main reason for q getting really large).
- //if ((q & ("\x7f" * sizeof(q))) == q) return q;
+ return string_to_utf8 (replace (s, ([
+ "\x80": "\u20AC", /*"\x81": "\u0081",*/ "\x82": "\u201A", "\x83": "\u0192",
+ "\x84": "\u201E", "\x85": "\u2026", "\x86": "\u2020", "\x87": "\u2021",
+ "\x88": "\u02C6", "\x89": "\u2030", "\x8a": "\u0160", "\x8b": "\u2039",
+ "\x8c": "\u0152", /*"\x8d": "\u008D",*/ "\x8e": "\u017D", /*"\x8f": "\u008F",*/
+ /*"\x90": "\u0090",*/ "\x91": "\u2018", "\x92": "\u2019", "\x93": "\u201C",
+ "\x94": "\u201D", "\x95": "\u2022", "\x96": "\u2013", "\x97": "\u2014",
+ "\x98": "\u02DC", "\x99": "\u2122", "\x9a": "\u0161", "\x9b": "\u203A",
+ "\x9c": "\u0153", /*"\x9d": "\u009D",*/ "\x9e": "\u017E", "\x9f": "\u0178",
+ ])));
+ }
-
+ string utf8_encode_query (string q, function(string:string) encode_fn)
+ //! Encodes the appropriate sections of the query with @[encode_fn].
+ //! Everything except strings prefixed by an introducer (i.e.
+ //! @expr{_something@} or @expr{N@}) is encoded.
+ {
// We need to find the segments that shouldn't be encoded.
string e = "";
- while(has_value(uq, "BINARY")) {
- string prefix = "";
- string suffix;
- sscanf(q, "%[^\'\"]%s", prefix, suffix);
- e += string_to_utf8(prefix);
- if (!suffix || !sizeof(suffix)) {
- q = uq = "";
- break;
- }
+ while (1) {
+ sscanf(q, "%[^\'\"]%s", string prefix, string suffix);
+ e += encode_fn (prefix);
-
+ if (suffix == "") break;
+
string quote = suffix[..0];
int start = 1;
int end;
242:
break;
}
- string uprefix = uq[..sizeof(prefix)-1];
- int is_binary;
- // Common cases.
- if (has_suffix(uprefix, "BINARY") || has_suffix(uprefix, "BINARY ")) {
- // Binary string.
- is_binary = 1;
- } else {
+ #define IS_IDENTIFIER_CHAR(chr) (Unicode.is_wordchar (chr) || \
+ (<'_', '$'>)[chr])
+
+ int intpos = -1;
+
+ // Optimize the use of _binary.
+ if (has_suffix (prefix, "_binary"))
+ intpos = sizeof (prefix) - sizeof ("_binary");
+ else if (has_suffix (prefix, "_binary "))
+ intpos = sizeof (prefix) - sizeof ("_binary ");
+
+ else {
// Find the white-space suffix of the prefix.
- int i = sizeof(uprefix);
+ int i = sizeof(prefix);
while (i--) {
- if (!(< ' ', '\n', '\r', '\t' >)[uprefix[i]]) break;
+ if (!(< ' ', '\n', '\r', '\t' >)[prefix[i]]) break;
}
- is_binary = has_suffix(uprefix, "BINARY" + uprefix[i+1..]);
+
+ if (i >= 0) {
+ if ((<'n', 'N'>)[prefix[i]])
+ // Probably got a national charset string.
+ intpos = i;
+ else {
+ // The following assumes all possible charset names contain
+ // only [a-zA-Z0-9_$] and are max 32 chars (from
+ // MY_CS_NAME_SIZE in m_ctype.h).
+ sscanf (reverse (prefix[i - 33..i]), "%[a-zA-Z0-9_$]%s",
+ string rev_intro, string rest);
+ if (sizeof (rev_intro) && rev_intro[-1] == '_' && sizeof (rest))
+ intpos = i - sizeof (rev_intro) + 1;
}
- if (is_binary) {
- e += suffix[..end];
+ }
+ }
+
+ int got_introducer;
+ if (intpos == 0)
+ // The prefix begins with the introducer.
+ got_introducer = 1;
+ else if (intpos > 0) {
+ // Check that the introducer sequence we found isn't a suffix of
+ // some longer keyword or identifier.
+ int prechar = prefix[intpos - 1];
+ if (!IS_IDENTIFIER_CHAR (prechar))
+ got_introducer = 1;
+ }
+
+ if (got_introducer) {
+ string s = suffix[..end];
+ if (String.width (s) > 8) {
+ string encoding = prefix[intpos..];
+ if (has_prefix (encoding, "_"))
+ sscanf (encoding[1..], "%[a-zA-Z0-9]", encoding);
+ else
+ encoding = "utf8"; // Gotta be "N".
+ s = s[1..sizeof (s) - 2];
+ if (sizeof (s) > 40) s = sprintf ("%O...", s[..37]);
+ else s = sprintf ("%O", s);
+ predef::error ("A string in the query should be %s encoded "
+ "but it is wide: %s\n", encoding, s);
+ }
+ e += s;
} else {
- e += string_to_utf8(suffix[..end]);
+ e += encode_fn (suffix[..end]);
}
-
+
q = suffix[end+1..];
- uq = uq[sizeof(uq)-sizeof(q)..];
+
}
- // Encode the trailer.
- e += string_to_utf8(q);
+
return e;
}
376:
}
}
+ #define QUERY_BODY(do_query) \
+ if (bindings) \
+ query = .sql_util.emulate_bindings(query,bindings,this); \
+ \
+ string restore_charset; \
+ if (charset) { \
+ restore_charset = send_charset || get_charset(); \
+ if (charset != restore_charset) \
+ ::big_query ("SET character_set_client=" + charset); \
+ else \
+ restore_charset = 0; \
+ } \
+ \
+ else if (send_charset) { \
+ string new_send_charset; \
+ \
+ if (utf8_mode & LATIN1_UNICODE_ENCODE_MODE) { \
+ if (String.width (query) == 8) \
+ new_send_charset = "latin1"; \
+ else { \
+ query = utf8_encode_query (query, latin1_to_utf8); \
+ new_send_charset = "utf8"; \
+ } \
+ } \
+ \
+ else { /* utf8_mode & UTF8_UNICODE_ENCODE_MODE */ \
+ if (_can_send_as_latin1 (query)) \
+ new_send_charset = "latin1"; \
+ else { \
+ query = utf8_encode_query (query, string_to_utf8); \
+ new_send_charset = "utf8"; \
+ } \
+ } \
+ \
+ if (new_send_charset != send_charset) { \
+ if (mixed err = \
+ ::big_query ("SET character_set_client=" + new_send_charset)) { \
+ if (new_send_charset = "utf8") \
+ predef::error ("The query is a wide string " \
+ "and the MySQL server doesn't support UTF-8: %s\n", \
+ describe_error (err)); \
+ else \
+ throw err; \
+ } \
+ send_charset = new_send_charset; \
+ werror ("set charset %O\n", send_charset); \
+ } \
+ } \
+ \
+ int|object res = ::do_query(query); \
+ \
+ if (restore_charset) { \
+ if (send_charset && (<"latin1", "utf8">)[charset]) \
+ send_charset = charset; \
+ else \
+ ::big_query ("SET character_set_client=" + restore_charset); \
+ } \
+ \
+ if (!objectp(res)) return res; \
+ \
+ if (utf8_mode & UNICODE_DECODE_MODE) { \
+ return .sql_util.UnicodeWrapper(res); \
+ } \
+ return res;
+
+ Mysql.mysql_result big_query (string query,
+ mapping(string|int:mixed)|void bindings,
+ void|string charset)
+ //! Sends a query to the server.
//!
- int|object big_query(string q, mapping(string|int:mixed)|void bindings)
+ //! @param query
+ //! The SQL query.
+ //!
+ //! @param bindings
+ //! An optional bindings mapping. See @[Sql.query] for details about
+ //! this.
+ //!
+ //! @param charset
+ //! An optional charset that will be used temporarily while sending
+ //! @[query] to the server. If necessary, a query
+ //! @code
+ //! SET character_set_client=@[charset]
+ //! @endcode
+ //! is sent to the server first, then @[query] is sent as-is, and then
+ //! the connection charset is restored again (if necessary).
+ //!
+ //! Primarily useful with @[charset] set to @expr{"latin1"@} if
+ //! unicode encode mode (see @[set_unicode_encode_mode]) is enabled
+ //! (the default) and you have some large queries (typically blob
+ //! inserts) where you want to avoid the query parsing overhead.
+ //!
+ //! @returns
+ //! A @[Mysql.mysql_result] object is returned if the query is of a
+ //! kind that returns a result. Zero is returned otherwise.
+ //!
+ //! @seealso
+ //! @[Sql.big_query]
{
- if (bindings)
- q = .sql_util.emulate_bindings(q,bindings,this);
- if (utf8_mode & UTF8_ENCODE_QUERY) {
- // Mysql's line protocol is stupid; we need to detect
- // the binary strings in the query.
- q = utf8_encode_query(q);
+ QUERY_BODY (big_query);
}
- int|object res = ::big_query(q);
- if (!objectp(res)) return res;
-
- if (utf8_mode & UTF8_DECODE_QUERY) {
- return .sql_util.UnicodeWrapper(res);
+ Mysql.mysql_result streaming_query (string query,
+ mapping(string|int:mixed)|void bindings,
+ void|string charset)
+ //! Makes a streaming SQL query.
+ //!
+ //! This function sends the SQL query @[query] to the Mysql-server.
+ //! The result of the query is streamed through the returned
+ //! @[Mysql.mysql_result] object. Note that the involved database
+ //! tables are locked until all the results has been read.
+ //!
+ //! In all other respects, it behaves like @[big_query].
+ {
+ QUERY_BODY (streaming_query);
}
- return ::big_query(q);
- }
+
-
+
int(0..1) is_keyword( string name )
//! Return 1 if the argument @[name] is a mysql keyword.
{
447:
mapping(string:string|int)|void options)
{
if (options) {
+ string charset = options->mysql_charset_name || "latin1";
+ if (charset == "unicode")
+ options->mysql_charset_name = "utf8";
+
::create(host||"", database||"", user||"", password||"", options);
- initial_charset = options->mysql_charset_name;
- switch (options->unicode_mode) {
- case "full": enter_unicode_mode(); break;
- case "decode": enter_unicode_decode_mode(); break;
- }
+
+ update_unicode_encode_mode_from_charset (lower_case (charset));
+
+ if (options->unicode_decode_mode)
+ set_unicode_decode_mode (1);
+
} else {
::create(host||"", database||"", user||"", password||"");
-
+
+ update_unicode_encode_mode_from_charset ("latin1");
}
}
#else
constant this_program_does_not_exist=1;
#endif /* constant(Mysql.mysql) */