Branch: Tag:

2022-01-26

2022-01-26 15:18:25 by Henrik Grubbström (Grubba) <grubba@grubba.org>

Sql.mysql: Clean up character set handling.

Adds helper functions fix_{query,result}_charset() that contain
code that used to be in the QUERY_BODY() macro. This makes the
code quite a bit easier to read.

Adds charset wrappers to list_{dbs,tables,fields}() that previously
changed behavior depending on what the send_charset was.

Remove some now obsoleted character set handling from Mysql.SqlTable.

Potential fix for intermittent failures for Mysql.SqlTable to handle
tables with wide characters in the table name.

580:   #define HAVE_MYSQL_FIELD_CHARSETNR_IFELSE(TRUE, FALSE) FALSE   #endif    + protected array(string(8bit)|zero) fix_query_charset(string query, +  string|void charset) + { +  if (charset) { +  string current_charset = send_charset || get_charset(); +  if (charset != current_charset) { +  CH_DEBUG ("Switching charset from %O to %O (due to charset arg).\n", +  current_charset, charset); +  ::big_query ("SET character_set_client=" + charset); +  /* Can't be changed automatically - has side effects. /mast */ +  /* ::big_query("SET character_set_connection=" + charset); */ +  return ({ query, current_charset }); +  } +  return ({ query, 0 }); +  } +  if (!send_charset) return ({ query, 0 }); +  +  string new_send_charset = send_charset; +  +  if (utf8_mode & LATIN1_UNICODE_ENCODE_MODE) { +  if (String.width (query) == 8) +  new_send_charset = "latin1"; +  else { +  CH_DEBUG ("Converting (mysql-)latin1 query to utf8.\n"); +  query = utf8_encode_query (query, latin1_to_utf8, 2); +  new_send_charset = "utf8"; +  } +  } +  else { /* utf8_mode & UTF8_UNICODE_ENCODE_MODE */ +  /* NB: The send_charset may only be upgraded from +  * "latin1" to "utf8", not the other way around. +  * This is to avoid extraneous charset changes +  * where the charset is changed from query to query. +  */ +  if ((send_charset == "utf8") || !_can_send_as_latin1(query)) { +  CH_DEBUG ("Converting query to utf8.\n"); +  query = utf8_encode_query (query, string_to_utf8, 2); +  new_send_charset = "utf8"; +  } +  } +  +  if (new_send_charset != send_charset) { +  CH_DEBUG ("Switching charset from %O to %O.\n", +  send_charset, new_send_charset); +  if (mixed err = catch { +  ::big_query ("SET character_set_client=" + new_send_charset); +  /* Can't be changed automatically - has side effects. /mast */ +  /* ::big_query("SET character_set_connection=" + +  new_send_charset); */ +  }) { +  if (new_send_charset == "utf8") +  predef::error ("The query is a wide string " +  "and the MySQL server doesn't support UTF-8: %s\n", +  describe_error (err)); +  else +  throw (err); +  } +  send_charset = new_send_charset; +  } +  +  return ({ query, 0 }); + } +  + protected array|object|mapping|string|int fix_result_charset(array|object|mapping|string|int res) + { +  if (!res) return UNDEFINED; +  +  if (!(utf8_mode & UNICODE_DECODE_MODE)) return res; +  +  if (objectp(res)) { +  CH_DEBUG ("Using unicode wrapper for result.\n"); +  return +  HAVE_MYSQL_FIELD_CHARSETNR_IFELSE ( +  .sql_util.MySQLUnicodeWrapper(res), +  .sql_util.MySQLBrokenUnicodeWrapper(res)); +  } +  +  if (arrayp(res)) { +  return map(res, fix_result_charset); +  } +  +  if (mappingp(res)) { +  return mkmapping(fix_result_charset(indices(res)), +  fix_result_charset(values(res))); +  } +  +  if (stringp(res)) { +  return utf8_to_string(res); +  } +  +  return res; + } +    #define QUERY_BODY(do_query) \    if (bindings) \    query = .sql_util.emulate_bindings(query,bindings,this); \    \ -  string restore_charset; \ -  if (charset) { \ -  restore_charset = send_charset || get_charset(); \ -  if (charset != restore_charset) { \ -  CH_DEBUG ("Switching charset from %O to %O (due to charset arg).\n", \ -  restore_charset, charset); \ -  ::big_query ("SET character_set_client=" + charset); \ -  /* Can't be changed automatically - has side effects. /mast */ \ -  /* ::big_query("SET character_set_connection=" + charset); */ \ -  } else \ -  restore_charset = 0; \ -  } \ +  [query, string restore_charset] = fix_query_charset(query, charset); \    \ -  else if (send_charset) { \ -  string new_send_charset = send_charset; \ -  \ -  if (utf8_mode & LATIN1_UNICODE_ENCODE_MODE) { \ -  if (String.width (query) == 8) \ -  new_send_charset = "latin1"; \ -  else { \ -  CH_DEBUG ("Converting (mysql-)latin1 query to utf8.\n"); \ -  query = utf8_encode_query (query, latin1_to_utf8, 2); \ -  new_send_charset = "utf8"; \ -  } \ -  } \ -  \ -  else { /* utf8_mode & UTF8_UNICODE_ENCODE_MODE */ \ -  /* NB: The send_charset may only be upgraded from \ -  * "latin1" to "utf8", not the other way around. \ -  * This is to avoid extraneous charset changes \ -  * where the charset is changed from query to query. \ -  */ \ -  if ((send_charset == "utf8") || !_can_send_as_latin1(query)) { \ -  CH_DEBUG ("Converting query to utf8.\n"); \ -  query = utf8_encode_query (query, string_to_utf8, 2); \ -  new_send_charset = "utf8"; \ -  } \ -  } \ -  \ -  if (new_send_charset != send_charset) { \ -  CH_DEBUG ("Switching charset from %O to %O.\n", \ -  send_charset, new_send_charset); \ -  if (mixed err = catch { \ -  ::big_query ("SET character_set_client=" + new_send_charset); \ -  /* Can't be changed automatically - has side effects. /mast */ \ -  /* ::big_query("SET character_set_connection=" + \ -  new_send_charset); */ \ -  }) { \ -  if (new_send_charset == "utf8") \ -  predef::error ("The query is a wide string " \ -  "and the MySQL server doesn't support UTF-8: %s\n", \ -  describe_error (err)); \ -  else \ -  throw (err); \ -  } \ -  send_charset = new_send_charset; \ -  } \ -  } \ -  \ +     CH_DEBUG ("Sending query with charset %O: %s.\n", \    charset || send_charset, \    (sizeof (query) > 200 ? \    sprintf ("%O...", query[..200]) : \    sprintf ("%O", query))); \    \ -  int|object res = ::do_query(query); \ +  int|object res = fix_result_charset(::do_query(query)); \    \    if (restore_charset) { \    if (send_charset && (<"latin1", "utf8">)[charset]) \
662:    } \    } \    \ -  if (!objectp(res)) return res; \ -  \ -  if (utf8_mode & UNICODE_DECODE_MODE) { \ -  CH_DEBUG ("Using unicode wrapper for result.\n"); \ -  return \ -  HAVE_MYSQL_FIELD_CHARSETNR_IFELSE ( \ -  .sql_util.MySQLUnicodeWrapper(res), \ -  .sql_util.MySQLBrokenUnicodeWrapper (res)); \ -  } \ +     return res;      variant Result big_query (string query,
764:      array(string) list_dbs(string|void wild)   { -  Result res = ::list_dbs(wild); +  Result res = +  fix_result_charset(::list_dbs(wild && fix_query_charset(wild)[0]));    array(string) ret = ({});    array(string) row;    while((row = res->fetch_row()) && sizeof(row)) {
775:      array(string) list_tables(string|void wild)   { -  Result res = ::list_tables(wild); +  Result res = +  fix_result_charset(::list_tables(wild && fix_query_charset(wild)[0]));    array(string) ret = ({});    array(string) row;    while((row = res->fetch_row()) && sizeof(row)) {
784:    return ret;   }    + array(mapping(string:mixed)) list_fields(string table, string|void wild) + { +  if (!wild) { +  // Very common case. +  return fix_result_charset(::list_fields(fix_query_charset(table)[0])); +  } +  +  string table_and_wild = table + "\0\0PIKE\0\0" + wild; +  table_and_wild = fix_query_charset(table_and_wild)[0]; +  array(string) a = table_and_wild / "\0\0PIKE\0\0"; +  if (sizeof(a) == 2) { +  // Common case. +  return fix_result_charset(::list_fields(@a)); +  } +  +  // Very uncommon case, but... +  // +  // Assume that the table name can not contain NUL characters. +  return fix_result_charset(::list_fields(a[0], a[1..] * "\0\0PIKE\0\0")); + } +    int(0..1) is_keyword( string name )   //! Return 1 if the argument @[name] is a mysql keyword that needs to   //! be quoted in a query. The list is currently up-to-date with MySQL