LCOV - code coverage report
Current view: top level - src/tds - iconv.c (source / functions) Hit Total Coverage
Test: FreeTDS coverage Lines: 299 437 68.4 %
Date: 2025-01-18 11:50:39 Functions: 18 19 94.7 %

          Line data    Source code
       1             : /* FreeTDS - Library of routines accessing Sybase and Microsoft databases
       2             :  * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005  Brian Bruns
       3             :  * Copyright (C) 2010  Frediano Ziglio
       4             :  *
       5             :  * This library is free software; you can redistribute it and/or
       6             :  * modify it under the terms of the GNU Library General Public
       7             :  * License as published by the Free Software Foundation; either
       8             :  * version 2 of the License, or (at your option) any later version.
       9             :  *
      10             :  * This library is distributed in the hope that it will be useful,
      11             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13             :  * Library General Public License for more details.
      14             :  *
      15             :  * You should have received a copy of the GNU Library General Public
      16             :  * License along with this library; if not, write to the
      17             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      18             :  * Boston, MA 02111-1307, USA.
      19             :  */
      20             : 
      21             : /**
      22             :  * \file
      23             :  * \brief Handle character conversions to/from server
      24             :  */
      25             : 
      26             : #include <config.h>
      27             : 
      28             : #include <stdarg.h>
      29             : #include <stdio.h>
      30             : #include <assert.h>
      31             : 
      32             : #if HAVE_STRING_H
      33             : #include <string.h>
      34             : #endif /* HAVE_STRING_H */
      35             : #if HAVE_ERRNO_H
      36             : #include <errno.h>
      37             : #endif
      38             : 
      39             : #include <freetds/tds.h>
      40             : #include <freetds/iconv.h>
      41             : #include <freetds/bool.h>
      42             : #include <freetds/bytes.h>
      43             : #if HAVE_ICONV
      44             : #include <iconv.h>
      45             : #endif
      46             : 
      47             : #define CHARSIZE(charset) ( ((charset)->min_bytes_per_char == (charset)->max_bytes_per_char )? \
      48             :                                 (charset)->min_bytes_per_char : 0 )
      49             : 
      50             : 
      51             : static int collate2charset(TDSCONNECTION * conn, TDS_UCHAR collate[5]);
      52             : static size_t skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size);
      53             : static int tds_iconv_info_init(TDSICONV * char_conv, int client_canonic, int server_canonic);
      54             : static bool tds_iconv_init(void);
      55             : static void _iconv_close(iconv_t * cd);
      56             : static void tds_iconv_info_close(TDSICONV * char_conv);
      57             : 
      58             : 
      59             : /**
      60             :  * \ingroup libtds
      61             :  * \defgroup conv Charset conversion
      62             :  * Convert between different charsets.
      63             :  */
      64             : 
      65             : #define TDS_ICONV_ENCODING_TABLES
      66             : #include <freetds/encodings.h>
      67             : 
      68             : /* this will contain real iconv names */
      69             : static const char *iconv_names[TDS_VECTOR_SIZE(canonic_charsets)];
      70             : static bool iconv_initialized = false;
      71             : static const char *ucs2name;
      72             : 
      73             : enum
      74             : { POS_ISO1, POS_UTF8, POS_UCS2LE, POS_UCS2BE };
      75             : 
      76             : static const struct {
      77             :         uint32_t len;
      78             :         /* this field must be aligned at least to 2 bytes */
      79             :         char data[12];
      80             : } test_strings[4] = {
      81             :         /* same string in required charsets */
      82             :         { 4, "Ao\xD3\xE5" },
      83             :         { 6, "Ao\xC3\x93\xC3\xA5" },
      84             :         { 8, "A\x00o\x000\xD3\x00\xE5\x00" },
      85             :         { 8, "\x00" "A\x00o\x000\xD3\x00\xE5" },
      86             : };
      87             : 
      88             : /**
      89             :  * Initialize charset searching for UTF-8, UCS-2 and ISO8859-1
      90             :  */
      91             : static bool
      92        1582 : tds_iconv_init(void)
      93             : {
      94             :         int i;
      95             :         iconv_t cd;
      96             : 
      97             :         /* first entries should be constants */
      98             :         assert(strcmp(canonic_charsets[POS_ISO1].name, "ISO-8859-1") == 0);
      99             :         assert(strcmp(canonic_charsets[POS_UTF8].name, "UTF-8") == 0);
     100             :         assert(strcmp(canonic_charsets[POS_UCS2LE].name, "UCS-2LE") == 0);
     101             :         assert(strcmp(canonic_charsets[POS_UCS2BE].name, "UCS-2BE") == 0);
     102             : 
     103             :         /* fast tests for GNU-iconv */
     104        1582 :         cd = tds_sys_iconv_open("ISO-8859-1", "UTF-8");
     105        1582 :         if (cd != (iconv_t) -1) {
     106        1582 :                 iconv_names[POS_ISO1] = "ISO-8859-1";
     107        1582 :                 iconv_names[POS_UTF8] = "UTF-8";
     108        1582 :                 tds_sys_iconv_close(cd);
     109             :         } else {
     110             : 
     111             :                 /* search names for ISO8859-1 and UTF-8 */
     112           0 :                 for (i = 0; iconv_aliases[i].alias; ++i) {
     113             :                         int j;
     114             : 
     115           0 :                         if (iconv_aliases[i].canonic != POS_ISO1)
     116           0 :                                 continue;
     117           0 :                         for (j = 0; iconv_aliases[j].alias; ++j) {
     118           0 :                                 if (iconv_aliases[j].canonic != POS_UTF8)
     119           0 :                                         continue;
     120             : 
     121           0 :                                 cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_aliases[j].alias);
     122           0 :                                 if (cd != (iconv_t) -1) {
     123           0 :                                         iconv_names[POS_ISO1] = iconv_aliases[i].alias;
     124           0 :                                         iconv_names[POS_UTF8] = iconv_aliases[j].alias;
     125           0 :                                         tds_sys_iconv_close(cd);
     126           0 :                                         break;
     127             :                                 }
     128             :                         }
     129           0 :                         if (iconv_names[POS_ISO1])
     130             :                                 break;
     131             :                 }
     132             :                 /* required characters not found !!! */
     133           0 :                 if (!iconv_names[POS_ISO1]) {
     134           0 :                         tdsdump_log(TDS_DBG_ERROR, "iconv name for ISO-8859-1 not found\n");
     135             :                         return false;
     136             :                 }
     137             :         }
     138             : 
     139             :         /* now search for UCS-2 */
     140        1582 :         cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2LE");
     141        1582 :         if (cd != (iconv_t) -1) {
     142        1582 :                 iconv_names[POS_UCS2LE] = "UCS-2LE";
     143        1582 :                 tds_sys_iconv_close(cd);
     144             :         }
     145        1582 :         cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2BE");
     146        1582 :         if (cd != (iconv_t) -1) {
     147        1582 :                 iconv_names[POS_UCS2BE] = "UCS-2BE";
     148        1582 :                 tds_sys_iconv_close(cd);
     149             :         }
     150             : 
     151             :         /* long search needed ?? */
     152        1582 :         if (!iconv_names[POS_UCS2LE] || !iconv_names[POS_UCS2BE]) {
     153           0 :                 for (i = 0; iconv_aliases[i].alias; ++i) {
     154           0 :                         if (strncmp(canonic_charsets[iconv_aliases[i].canonic].name, "UCS-2", 5) != 0)
     155           0 :                                 continue;
     156             : 
     157           0 :                         cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_names[POS_ISO1]);
     158           0 :                         if (cd != (iconv_t) -1) {
     159             :                                 char ib[1];
     160             :                                 char ob[4];
     161             :                                 size_t il, ol;
     162             :                                 ICONV_CONST char *pib;
     163             :                                 char *pob;
     164           0 :                                 int byte_sequence = 0;
     165             : 
     166             :                                 /* try to convert 'A' and check result */
     167           0 :                                 ib[0] = 0x41;
     168           0 :                                 pib = ib;
     169           0 :                                 pob = ob;
     170           0 :                                 il = 1;
     171           0 :                                 ol = 4;
     172           0 :                                 ob[0] = ob[1] = 0;
     173           0 :                                 if (tds_sys_iconv(cd, &pib, &il, &pob, &ol) != (size_t) - 1) {
     174             :                                         /* byte order sequence ?? */
     175           0 :                                         if (ol == 0) {
     176           0 :                                                 ob[0] = ob[2];
     177           0 :                                                 byte_sequence = 1;
     178             :                                                 /* TODO save somewhere */
     179             :                                         }
     180             : 
     181             :                                         /* save name without sequence (if present) */
     182           0 :                                         if (ob[0])
     183           0 :                                                 il = POS_UCS2LE;
     184             :                                         else
     185           0 :                                                 il = POS_UCS2BE;
     186           0 :                                         if (!iconv_names[il] || !byte_sequence)
     187           0 :                                                 iconv_names[il] = iconv_aliases[i].alias;
     188             :                                 }
     189           0 :                                 tds_sys_iconv_close(cd);
     190             :                         }
     191             :                 }
     192             :         }
     193             :         /* we need a UCS-2 (big endian or little endian) */
     194        1582 :         if (!iconv_names[POS_UCS2LE] && !iconv_names[POS_UCS2BE]) {
     195           0 :                 tdsdump_log(TDS_DBG_ERROR, "iconv name for UCS-2 not found\n");
     196             :                 return false;
     197             :         }
     198             : 
     199        1582 :         ucs2name = iconv_names[POS_UCS2LE] ? iconv_names[POS_UCS2LE] : iconv_names[POS_UCS2BE];
     200             : 
     201        7910 :         for (i = 0; i < 4; ++i)
     202        6344 :                 tdsdump_log(TDS_DBG_INFO1, "local name for %s is %s\n", canonic_charsets[i].name,
     203          16 :                             iconv_names[i] ? iconv_names[i] : "(null)");
     204             : 
     205             :         /* base conversions checks */
     206       25312 :         for (i = 0; i < 4 * 4; ++i) {
     207       25312 :                 const int from = i / 4;
     208       25312 :                 const int to = i % 4;
     209             :                 char ob[16];
     210             :                 size_t il, ol;
     211             :                 ICONV_CONST char *pib;
     212             :                 char *pob;
     213             :                 size_t res;
     214             : 
     215       25312 :                 if (!iconv_names[from] || !iconv_names[to])
     216           0 :                         continue;
     217       25312 :                 cd = tds_sys_iconv_open(iconv_names[to], iconv_names[from]);
     218       25312 :                 if (cd == (iconv_t) -1) {
     219           0 :                         tdsdump_log(TDS_DBG_ERROR, "iconv_open(%s, %s) failed\n", iconv_names[to], iconv_names[from]);
     220           0 :                         return false;
     221             :                 }
     222             : 
     223       25312 :                 pib = (ICONV_CONST char *) test_strings[from].data;
     224       25312 :                 il = test_strings[from].len;
     225       25312 :                 pob = ob;
     226       25312 :                 ol = sizeof(ob);
     227       25312 :                 res = tds_sys_iconv(cd, &pib, &il, &pob, &ol);
     228       25312 :                 tds_sys_iconv_close(cd);
     229             : 
     230       25312 :                 if (res != 0
     231       25312 :                     || sizeof(ob) - ol != test_strings[to].len
     232       25312 :                     || memcmp(ob, test_strings[to].data, test_strings[to].len) != 0) {
     233           0 :                         tdsdump_log(TDS_DBG_ERROR, "iconv(%s, %s) failed res %d\n", iconv_names[to], iconv_names[from], (int) res);
     234           0 :                         tdsdump_log(TDS_DBG_ERROR, "len %d\n", (int) (sizeof(ob) - ol));
     235             :                         return false;
     236             :                 }
     237             :         }
     238             : 
     239             :         /* success (it should always occurs) */
     240             :         return true;
     241             : }
     242             : 
     243             : /**
     244             :  * Get iconv name given canonic
     245             :  */
     246             : static const char *
     247        2722 : tds_set_iconv_name(int charset)
     248             : {
     249             :         int i;
     250             :         iconv_t cd;
     251             :         const char *name;
     252             : 
     253        2722 :         assert(iconv_initialized);
     254             : 
     255             :         /* try using canonic name and UTF-8 and UCS2 */
     256        2722 :         name = canonic_charsets[charset].name;
     257        2722 :         cd = tds_sys_iconv_open(iconv_names[POS_UTF8], name);
     258        2722 :         if (cd != (iconv_t) -1)
     259             :                 goto found;
     260           0 :         cd = tds_sys_iconv_open(ucs2name, name);
     261           0 :         if (cd != (iconv_t) -1)
     262             :                 goto found;
     263             : 
     264             :         /* try all alternatives */
     265           0 :         for (i = 0; iconv_aliases[i].alias; ++i) {
     266           0 :                 if (iconv_aliases[i].canonic != charset)
     267           0 :                         continue;
     268             : 
     269           0 :                 name = iconv_aliases[i].alias;
     270           0 :                 cd = tds_sys_iconv_open(iconv_names[POS_UTF8], name);
     271           0 :                 if (cd != (iconv_t) -1)
     272             :                         goto found;
     273           0 :                 cd = tds_sys_iconv_open(ucs2name, name);
     274           0 :                 if (cd != (iconv_t) -1)
     275             :                         goto found;
     276             :         }
     277             : 
     278             :         /* charset not found, pretend it's ISO 8859-1 */
     279           0 :         iconv_names[charset] = canonic_charsets[POS_ISO1].name;
     280           0 :         return NULL;
     281             : 
     282        2722 : found:
     283        2722 :         iconv_names[charset] = name;
     284        2722 :         tds_sys_iconv_close(cd);
     285        2722 :         return name;
     286             : }
     287             : 
     288             : static void
     289             : tds_iconv_reset(TDSICONV *conv)
     290             : {
     291             :         /*
     292             :          * (min|max)_bytes_per_char can be used to divide
     293             :          * so init to safe values
     294             :          */
     295       19558 :         conv->to.charset.min_bytes_per_char = 1;
     296       19558 :         conv->to.charset.max_bytes_per_char = 1;
     297       19558 :         conv->from.charset.min_bytes_per_char = 1;
     298       19558 :         conv->from.charset.max_bytes_per_char = 1;
     299             : 
     300       19558 :         conv->to.charset.name = conv->from.charset.name = "";
     301       19558 :         conv->to.charset.canonic = conv->from.charset.canonic = 0;
     302       19558 :         conv->to.cd = (iconv_t) -1;
     303       19558 :         conv->from.cd = (iconv_t) -1;
     304             : }
     305             : 
     306             : /**
     307             :  * Allocate iconv stuff
     308             :  * \return 0 for success
     309             :  */
     310             : int
     311        3829 : tds_iconv_alloc(TDSCONNECTION * conn)
     312             : {
     313             :         int i;
     314             :         TDSICONV *char_conv;
     315             : 
     316        3829 :         assert(!conn->char_convs);
     317        3829 :         if (!(conn->char_convs = tds_new(TDSICONV *, initial_char_conv_count + 1)))
     318             :                 return 1;
     319        3829 :         char_conv = tds_new0(TDSICONV, initial_char_conv_count);
     320        3829 :         if (!char_conv) {
     321           0 :                 TDS_ZERO_FREE(conn->char_convs);
     322           0 :                 return 1;
     323             :         }
     324        3829 :         conn->char_conv_count = initial_char_conv_count + 1;
     325             : 
     326       11487 :         for (i = 0; i < initial_char_conv_count; ++i) {
     327        7658 :                 conn->char_convs[i] = &char_conv[i];
     328       15316 :                 tds_iconv_reset(&char_conv[i]);
     329             :         }
     330             : 
     331             :         /* chardata is just a pointer to another iconv info */
     332        3829 :         conn->char_convs[initial_char_conv_count] = conn->char_convs[client2server_chardata];
     333             : 
     334        3829 :         return 0;
     335             : }
     336             : 
     337             : /**
     338             :  * \addtogroup conv
     339             :  * @{ 
     340             :  * Set up the initial iconv conversion descriptors.
     341             :  * When the socket is allocated, three TDSICONV structures are attached to iconv.  
     342             :  * They have fixed meanings:
     343             :  *      \li 0. Client <-> UCS-2 (client2ucs2)
     344             :  *      \li 1. Client <-> server single-byte charset (client2server_chardata)
     345             :  *
     346             :  * Other designs that use less data are possible, but these three conversion needs are 
     347             :  * very often needed.  By reserving them, we avoid searching the array for our most common purposes.
     348             :  *
     349             :  * To solve different iconv names and portability problems FreeTDS maintains 
     350             :  * a list of aliases each charset.  
     351             :  * 
     352             :  * First we discover the names of our minimum required charsets (UTF-8, ISO8859-1 and UCS2).  
     353             :  * Later, as and when it's needed, we try to discover others.
     354             :  *
     355             :  * There is one list of canonic names (GNU iconv names) and two sets of aliases
     356             :  * (one for other iconv implementations and another for Sybase). For every
     357             :  * canonic charset name we cache the iconv name found during discovery. 
     358             :  */
     359             : TDSRET
     360        3697 : tds_iconv_open(TDSCONNECTION * conn, const char *charset, int use_utf16)
     361             : {
     362             :         static const char UCS_2LE[] = "UCS-2LE";
     363             :         int canonic;
     364        3697 :         int canonic_charset = tds_canonical_charset(charset);
     365        3697 :         int canonic_env_charset = conn->env.charset ? tds_canonical_charset(conn->env.charset) : -1;
     366             :         int fOK;
     367             : 
     368        3697 :         TDS_ENCODING *client = &conn->char_convs[client2ucs2]->from.charset;
     369        3697 :         TDS_ENCODING *server = &conn->char_convs[client2ucs2]->to.charset;
     370             : 
     371        3697 :         tdsdump_log(TDS_DBG_FUNC, "tds_iconv_open(%p, %s, %d)\n", conn, charset, use_utf16);
     372             : 
     373             :         /* TDS 5.0 support only UTF-16 encodings */
     374        3697 :         if (IS_TDS50(conn))
     375         714 :                 use_utf16 = true;
     376             : 
     377             :         /* initialize */
     378        3697 :         if (!iconv_initialized) {
     379        1582 :                 if (!tds_iconv_init()) {
     380           0 :                         tdsdump_log(TDS_DBG_ERROR, "error: tds_iconv_init() failed; "
     381             :                                                    "try using GNU libiconv library\n");
     382             :                         return TDS_FAIL;
     383             :                 }
     384        1582 :                 iconv_initialized = true;
     385             :         }
     386             : 
     387             :         /* 
     388             :          * Client <-> UCS-2 (client2ucs2)
     389             :          */
     390        3697 :         tdsdump_log(TDS_DBG_FUNC, "setting up conversions for client charset \"%s\"\n", charset);
     391             : 
     392        3697 :         tdsdump_log(TDS_DBG_FUNC, "preparing iconv for \"%s\" <-> \"%s\" conversion\n", charset, UCS_2LE);
     393             : 
     394        3697 :         fOK = 0;
     395        3697 :         if (use_utf16) {
     396        2945 :                 canonic = TDS_CHARSET_UTF_16LE;
     397        2945 :                 fOK = tds_iconv_info_init(conn->char_convs[client2ucs2], canonic_charset, canonic);
     398             :         }
     399        2945 :         if (!fOK) {
     400         752 :                 canonic = TDS_CHARSET_UCS_2LE;
     401         752 :                 fOK = tds_iconv_info_init(conn->char_convs[client2ucs2], canonic_charset, canonic);
     402             :         }
     403        3697 :         if (!fOK)
     404             :                 return TDS_FAIL;
     405             : 
     406             :         /* 
     407             :          * How many UTF-8 bytes we need is a function of what the input character set is.
     408             :          * TODO This could definitely be more sophisticated, but it deals with the common case.
     409             :          */
     410        3697 :         if (client->min_bytes_per_char == 1 && client->max_bytes_per_char == 4 && server->max_bytes_per_char == 1) {
     411             :                 /* ie client is UTF-8 and server is ISO-8859-1 or variant. */
     412           0 :                 client->max_bytes_per_char = 3;
     413             :         }
     414             : 
     415             :         /* 
     416             :          * Client <-> server single-byte charset
     417             :          * TODO: the server hasn't reported its charset yet, so this logic can't work here.  
     418             :          *       not sure what to do about that yet.  
     419             :          */
     420        3697 :         conn->char_convs[client2server_chardata]->flags = TDS_ENCODING_MEMCPY;
     421        3697 :         if (canonic_env_charset >= 0) {
     422           0 :                 tdsdump_log(TDS_DBG_FUNC, "preparing iconv for \"%s\" <-> \"%s\" conversion\n", charset, conn->env.charset);
     423           0 :                 fOK = tds_iconv_info_init(conn->char_convs[client2server_chardata], canonic_charset, canonic_env_charset);
     424           0 :                 if (!fOK)
     425             :                         return TDS_FAIL;
     426             :         } else {
     427        3697 :                 conn->char_convs[client2server_chardata]->from.charset = canonic_charsets[canonic_charset];
     428        3697 :                 conn->char_convs[client2server_chardata]->to.charset = canonic_charsets[canonic_charset];
     429             :         }
     430             : 
     431        3697 :         tdsdump_log(TDS_DBG_FUNC, "tds_iconv_open: done\n");
     432             :         return TDS_SUCCESS;
     433             : }
     434             : 
     435             : /**
     436             :  * Open iconv descriptors to convert between character sets (both directions).
     437             :  * 1.  Look up the canonical names of the character sets.
     438             :  * 2.  Look up their widths.
     439             :  * 3.  Ask iconv to open a conversion descriptor.
     440             :  * 4.  Fail if any of the above offer any resistance.  
     441             :  * \remarks The charset names written to \a iconv will be the canonical names, 
     442             :  *          not necessarily the names passed in. 
     443             :  */
     444             : static int
     445        8248 : tds_iconv_info_init(TDSICONV * char_conv, int client_canonical, int server_canonical)
     446             : {
     447        8248 :         TDS_ENCODING *client = &char_conv->from.charset;
     448        8248 :         TDS_ENCODING *server = &char_conv->to.charset;
     449             : 
     450        8248 :         assert(char_conv->to.cd == (iconv_t) -1);
     451        8248 :         assert(char_conv->from.cd == (iconv_t) -1);
     452             : 
     453        8248 :         if (client_canonical < 0) {
     454           0 :                 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: client charset name \"%d\" invalid\n", client_canonical);
     455             :                 return 0;
     456             :         }
     457             : 
     458        8248 :         if (server_canonical < 0) {
     459           0 :                 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: server charset name \"%d\" invalid\n", server_canonical);
     460             :                 return 0;
     461             :         }
     462             : 
     463        8248 :         *client = canonic_charsets[client_canonical];
     464        8248 :         *server = canonic_charsets[server_canonical];
     465             : 
     466             :         /* special case, same charset, no conversion */
     467        8248 :         if (client_canonical == server_canonical) {
     468          90 :                 char_conv->to.cd = (iconv_t) -1;
     469          90 :                 char_conv->from.cd = (iconv_t) -1;
     470          90 :                 char_conv->flags = TDS_ENCODING_MEMCPY;
     471          90 :                 return 1;
     472             :         }
     473             : 
     474        8158 :         char_conv->flags = 0;
     475             : 
     476             :         /* get iconv names */
     477        8158 :         if (!iconv_names[client_canonical]) {
     478           0 :                 if (!tds_set_iconv_name(client_canonical)) {
     479           0 :                         tdsdump_log(TDS_DBG_FUNC, "Charset %d not supported by iconv, using \"%s\" instead\n",
     480             :                                                   client_canonical, iconv_names[client_canonical]);
     481             :                 }
     482             :         }
     483             :         
     484        8158 :         if (!iconv_names[server_canonical]) {
     485        2722 :                 if (!tds_set_iconv_name(server_canonical)) {
     486           0 :                         tdsdump_log(TDS_DBG_FUNC, "Charset %d not supported by iconv, using \"%s\" instead\n",
     487             :                                                   server_canonical, iconv_names[server_canonical]);
     488             :                 }
     489             :         }
     490             : 
     491        8158 :         char_conv->to.cd = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[client_canonical]);
     492        8158 :         if (char_conv->to.cd == (iconv_t) -1) {
     493           0 :                 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", client->name, server->name);
     494             :         }
     495             : 
     496        8158 :         char_conv->from.cd = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[server_canonical]);
     497        8158 :         if (char_conv->from.cd == (iconv_t) -1) {
     498           0 :                 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", server->name, client->name);
     499             :         }
     500             : 
     501             :         /* TODO, do some optimizations like UCS2 -> UTF8 min,max = 2,2 (UCS2) and 1,4 (UTF8) */
     502             : 
     503             :         /* tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: converting \"%s\"->\"%s\"\n", client->name, server->name); */
     504             : 
     505             :         return 1;
     506             : }
     507             : 
     508             : 
     509             : static void
     510             : _iconv_close(iconv_t * cd)
     511             : {
     512             :         static const iconv_t invalid = (iconv_t) -1;
     513             : 
     514       31884 :         if (*cd != invalid) {
     515       16220 :                 tds_sys_iconv_close(*cd);
     516       16220 :                 *cd = invalid;
     517             :         }
     518             : }
     519             : 
     520             : static void
     521       15942 : tds_iconv_info_close(TDSICONV * char_conv)
     522             : {
     523       31884 :         _iconv_close(&char_conv->to.cd);
     524       31884 :         _iconv_close(&char_conv->from.cd);
     525       15942 : }
     526             : 
     527             : void
     528           0 : tds_iconv_close(TDSCONNECTION * conn)
     529             : {
     530             :         int i;
     531             : 
     532       15942 :         for (i = 0; i < conn->char_conv_count; ++i)
     533       15942 :                 tds_iconv_info_close(conn->char_convs[i]);
     534           0 : }
     535             : 
     536             : #define CHUNK_ALLOC 4
     537             : 
     538             : void
     539        3805 : tds_iconv_free(TDSCONNECTION * conn)
     540             : {
     541             :         int i;
     542             : 
     543        3805 :         if (!conn->char_convs)
     544             :                 return;
     545        3805 :         tds_iconv_close(conn);
     546             : 
     547        3805 :         free(conn->char_convs[0]);
     548        6756 :         for (i = initial_char_conv_count + 1; i < conn->char_conv_count; i += CHUNK_ALLOC)
     549        2951 :                 free(conn->char_convs[i]);
     550        3805 :         TDS_ZERO_FREE(conn->char_convs);
     551        3805 :         conn->char_conv_count = 0;
     552             : }
     553             : 
     554             : static void
     555             : tds_iconv_err(TDSSOCKET *tds, int err)
     556             : {
     557        5081 :         if (tds)
     558        3313 :                 tdserror(tds_get_ctx(tds), tds, err, 0);
     559             : }
     560             : 
     561             : /** 
     562             :  * Wrapper around iconv(3).  Same parameters, with slightly different behavior.
     563             :  * \param tds state information for the socket and the TDS protocol
     564             :  * \param io Enumerated value indicating whether the data are being sent to or received from the server. 
     565             :  * \param conv information about the encodings involved, including the iconv(3) conversion descriptors. 
     566             :  * \param inbuf address of pointer to the input buffer of data to be converted.  
     567             :  * \param inbytesleft address of count of bytes in \a inbuf.
     568             :  * \param outbuf address of pointer to the output buffer.  
     569             :  * \param outbytesleft address of count of bytes in \a outbuf.
     570             :  * \retval number of irreversible conversions performed.  -1 on error, see iconv(3) documentation for 
     571             :  * a description of the possible values of \e errno.  
     572             :  * \remarks Unlike iconv(3), none of the arguments can be nor point to NULL.  Like iconv(3), all pointers will 
     573             :  *      be updated.  Success is signified by a nonnegative return code and \a *inbytesleft == 0.  
     574             :  *      If the conversion descriptor in \a iconv is -1 or NULL, \a inbuf is copied to \a outbuf, 
     575             :  *      and all parameters updated accordingly. 
     576             :  * 
     577             :  *      If a character in \a inbuf cannot be converted because no such cbaracter exists in the
     578             :  *      \a outbuf character set, we emit messages similar to the ones Sybase emits when it fails such a conversion. 
     579             :  *      The message varies depending on the direction of the data.  
     580             :  *      On a read error, we emit Msg 2403, Severity 16 (EX_INFO):
     581             :  *              "WARNING! Some character(s) could not be converted into client's character set. 
     582             :  *                      Unconverted bytes were changed to question marks ('?')."
     583             :  *      On a write error we emit Msg 2402, Severity 16 (EX_USER):
     584             :  *              "Error converting client characters into server's character set. Some character(s) could not be converted."
     585             :  *        and return an error code.  Client libraries relying on this routine should reflect an error back to the application.  
     586             :  *
     587             :  * \todo Check for variable multibyte non-UTF-8 input character set.  
     588             :  * \todo Use more robust error message generation.  
     589             :  * \todo For reads, cope with \a outbuf encodings that don't have the equivalent of an ASCII '?'.  
     590             :  * \todo Support alternative to '?' for the replacement character.  
     591             :  */
     592             : size_t
     593      805583 : tds_iconv(TDSSOCKET * tds, TDSICONV * conv, TDS_ICONV_DIRECTION io,
     594             :           const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft)
     595             : {
     596             :         static const iconv_t invalid = (iconv_t) -1;
     597      805583 :         TDSICONVDIR *from = NULL;
     598      805583 :         TDSICONVDIR *to = NULL;
     599             : 
     600      805583 :         iconv_t error_cd = invalid;
     601             : 
     602      805583 :         char quest_mark[] = "?";      /* best to leave non-const; implementations vary */
     603             :         ICONV_CONST char *pquest_mark;
     604             :         size_t lquest_mark;
     605             :         size_t irreversible;
     606             :         size_t one_character;
     607      805583 :         bool eilseq_raised = false;
     608             :         int conv_errno;
     609             :         /* cast away const-ness */
     610      805583 :         TDS_ERRNO_MESSAGE_FLAGS *suppress = (TDS_ERRNO_MESSAGE_FLAGS*) &conv->suppress;
     611             : 
     612      805583 :         assert(inbuf && inbytesleft && outbuf && outbytesleft);
     613             : 
     614             :         /* if empty there's nothing to return.
     615             :          * This fix case with some iconv implementation that does
     616             :          * not handle *inbuf == NULL and *inbytesleft == 0 as
     617             :          * empty strings
     618             :          */
     619      805583 :         if (*inbytesleft == 0)
     620             :                 return 0;
     621             : 
     622      805305 :         switch (io) {
     623      133574 :         case to_server:
     624      133574 :                 from = &conv->from;
     625      133574 :                 to = &conv->to;
     626      133574 :                 break;
     627      671731 :         case to_client:
     628      671731 :                 from = &conv->to;
     629      671731 :                 to = &conv->from;
     630      671731 :                 break;
     631           0 :         default:
     632           0 :                 tdsdump_log(TDS_DBG_FUNC, "tds_iconv: unable to determine if %d means in or out.  \n", io);
     633           0 :                 assert(io == to_server || io == to_client);
     634             :                 break;
     635             :         }
     636             : 
     637             :         /* silly case, memcpy */
     638      805305 :         if (conv->flags & TDS_ENCODING_MEMCPY || to->cd == invalid) {
     639      158071 :                 size_t len = *inbytesleft < *outbytesleft ? *inbytesleft : *outbytesleft;
     640             : 
     641      158071 :                 memcpy(*outbuf, *inbuf, len);
     642      158071 :                 conv_errno = *inbytesleft > *outbytesleft ? E2BIG : 0;
     643      158071 :                 *inbytesleft -= len;
     644      158071 :                 *outbytesleft -= len;
     645      158071 :                 *inbuf += len;
     646      158071 :                 *outbuf += len;
     647      158071 :                 errno = conv_errno;
     648      158071 :                 return conv_errno ? (size_t) -1 : 0;
     649             :         }
     650             : 
     651             :         /*
     652             :          * Call iconv() as many times as necessary, until we reach the end of input or exhaust output.  
     653             :          */
     654             :         for (;;) {
     655     1114114 :                 conv_errno = 0;
     656     1114114 :                 irreversible = tds_sys_iconv(to->cd, (ICONV_CONST char **) inbuf, inbytesleft, outbuf, outbytesleft);
     657             : 
     658             :                 /* iconv success, return */
     659     1114114 :                 if (irreversible != (size_t) - 1) {
     660      927572 :                         if (irreversible > 0)
     661           0 :                                 eilseq_raised = true;
     662             : 
     663             :                         /* here we detect end of conversion and try to reset shift state */
     664      927572 :                         if (inbuf) {
     665             :                                 /*
     666             :                                  * if inbuf or *inbuf is NULL iconv reset the shift state.
     667             :                                  * Note that setting inbytesleft to NULL can cause core so don't do it!
     668             :                                  */
     669      463786 :                                 inbuf = NULL;
     670      463786 :                                 continue;
     671             :                         }
     672             :                         break;
     673             :                 }
     674             : 
     675             :                 /* save errno, other function could change its value */
     676      186542 :                 conv_errno = errno;
     677             : 
     678      186542 :                 if (conv_errno == EILSEQ)
     679        8936 :                         eilseq_raised = true;
     680             : 
     681      186542 :                 if (!eilseq_raised || io != to_client || !inbuf)
     682             :                         break;
     683             :                 /* 
     684             :                  * Invalid input sequence encountered reading from server. 
     685             :                  * Skip one input sequence, adjusting pointers. 
     686             :                  */
     687        3100 :                 one_character = skip_one_input_sequence(to->cd, &from->charset, inbuf, inbytesleft);
     688             : 
     689        3100 :                 if (!one_character)
     690             :                         break;
     691             : 
     692             :                 /* 
     693             :                  * To replace invalid input with '?', we have to convert a UTF-8 '?' into the output character set.  
     694             :                  * In unimaginably weird circumstances, this might be impossible.
     695             :                  * We use UTF-8 instead of ASCII because some implementations 
     696             :                  * do not convert singlebyte <-> singlebyte.
     697             :                  */
     698        3100 :                 if (error_cd == invalid) {
     699        1550 :                         error_cd = tds_sys_iconv_open(to->charset.name, iconv_names[POS_UTF8]);
     700        1550 :                         if (error_cd == invalid) {
     701             :                                 break;  /* what to do? */
     702             :                         }
     703             :                 }
     704             : 
     705        3100 :                 lquest_mark = 1;
     706        3100 :                 pquest_mark = quest_mark;
     707             : 
     708        3100 :                 irreversible = tds_sys_iconv(error_cd, &pquest_mark, &lquest_mark, outbuf, outbytesleft);
     709             : 
     710        3100 :                 if (irreversible == (size_t) - 1)
     711             :                         break;
     712             : 
     713        3094 :                 if (!*inbytesleft)
     714             :                         break;
     715             :         }
     716             : 
     717      647234 :         if (eilseq_raised && !suppress->eilseq) {
     718             :                 /* invalid multibyte input sequence encountered */
     719        5081 :                 if (io == to_client) {
     720        1544 :                         if (irreversible == (size_t) - 1) {
     721             :                                 tds_iconv_err(tds, TDSEICONV2BIG);
     722             :                         } else {
     723        1544 :                                 tds_iconv_err(tds, TDSEICONVI);
     724        1544 :                                 conv_errno = 0;
     725             :                         }
     726             :                 } else {
     727             :                         tds_iconv_err(tds, TDSEICONVO);
     728             :                 }
     729        5081 :                 suppress->eilseq = 1;
     730             :         }
     731             : 
     732      645690 :         switch (conv_errno) {
     733        2376 :         case EINVAL:            /* incomplete multibyte sequence is encountered */
     734        2376 :                 if (suppress->einval)
     735             :                         break;
     736             :                 /* in chunk conversion this can mean we end a chunk inside a character */
     737           0 :                 tds_iconv_err(tds, TDSEICONVAVAIL);
     738           0 :                 suppress->einval = 1;
     739           0 :                 break;
     740      175230 :         case E2BIG:             /* output buffer has no more room */
     741      175230 :                 if (suppress->e2big)
     742             :                         break;
     743           0 :                 tds_iconv_err(tds, TDSEICONVIU);
     744           0 :                 suppress->e2big = 1;
     745           0 :                 break;
     746             :         default:
     747             :                 break;
     748             :         }
     749             : 
     750      824840 :         if (error_cd != invalid) {
     751        1550 :                 tds_sys_iconv_close(error_cd);
     752             :         }
     753             : 
     754      647234 :         errno = conv_errno;
     755      647234 :         return irreversible;
     756             : }
     757             : 
     758             : /**
     759             :  * Get a iconv info structure, allocate and initialize if needed
     760             :  */
     761             : TDSICONV *
     762       20467 : tds_iconv_get_info(TDSCONNECTION * conn, int canonic_client, int canonic_server)
     763             : {
     764             :         TDSICONV *info;
     765             :         int i;
     766             : 
     767             :         /* search a charset from already allocated charsets */
     768       62461 :         for (i = conn->char_conv_count; --i >= initial_char_conv_count;)
     769       37443 :                 if (canonic_client == conn->char_convs[i]->from.charset.canonic
     770       28569 :                     && canonic_server == conn->char_convs[i]->to.charset.canonic)
     771             :                         return conn->char_convs[i];
     772             : 
     773             :         /* allocate a new iconv structure */
     774        4551 :         if (conn->char_conv_count % CHUNK_ALLOC == ((initial_char_conv_count + 1) % CHUNK_ALLOC)) {
     775             :                 TDSICONV **p;
     776             :                 TDSICONV *infos;
     777             : 
     778        2975 :                 infos = tds_new(TDSICONV, CHUNK_ALLOC);
     779        2975 :                 if (!infos)
     780             :                         return NULL;
     781        2975 :                 p = (TDSICONV **) realloc(conn->char_convs, sizeof(TDSICONV *) * (conn->char_conv_count + CHUNK_ALLOC));
     782        2975 :                 if (!p) {
     783           0 :                         free(infos);
     784           0 :                         return NULL;
     785             :                 }
     786        2975 :                 conn->char_convs = p;
     787        2975 :                 memset(infos, 0, sizeof(TDSICONV) * CHUNK_ALLOC);
     788       14875 :                 for (i = 0; i < CHUNK_ALLOC; ++i) {
     789       11900 :                         conn->char_convs[i + conn->char_conv_count] = &infos[i];
     790       23800 :                         tds_iconv_reset(&infos[i]);
     791             :                 }
     792             :         }
     793        4551 :         info = conn->char_convs[conn->char_conv_count++];
     794             : 
     795             :         /* init */
     796        4551 :         if (tds_iconv_info_init(info, canonic_client, canonic_server))
     797             :                 return info;
     798             : 
     799           0 :         tds_iconv_info_close(info);
     800           0 :         --conn->char_conv_count;
     801           0 :         return NULL;
     802             : }
     803             : 
     804             : TDSICONV *
     805          16 : tds_iconv_get(TDSCONNECTION * conn, const char *client_charset, const char *server_charset)
     806             : {
     807          16 :         int canonic_client_charset_num = tds_canonical_charset(client_charset);
     808          16 :         int canonic_server_charset_num = tds_canonical_charset(server_charset);
     809             : 
     810          16 :         if (canonic_client_charset_num < 0) {
     811           0 :                 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_get: what is charset \"%s\"?\n", client_charset);
     812             :                 return NULL;
     813             :         }
     814          16 :         if (canonic_server_charset_num < 0) {
     815           0 :                 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_get: what is charset \"%s\"?\n", server_charset);
     816             :                 return NULL;
     817             :         }
     818             : 
     819          16 :         return tds_iconv_get_info(conn, canonic_client_charset_num, canonic_server_charset_num);
     820             : }
     821             : 
     822             : /* change singlebyte conversions according to server */
     823             : static void
     824        7771 : tds_srv_charset_changed_num(TDSCONNECTION * conn, int canonic_charset_num)
     825             : {
     826        7771 :         TDSICONV *char_conv = conn->char_convs[client2server_chardata];
     827             : 
     828        7771 :         if (IS_TDS7_PLUS(conn) && canonic_charset_num == TDS_CHARSET_ISO_8859_1)
     829           0 :                 canonic_charset_num = TDS_CHARSET_CP1252;
     830             : 
     831        7771 :         tdsdump_log(TDS_DBG_FUNC, "setting server single-byte charset to \"%s\"\n", canonic_charsets[canonic_charset_num].name);
     832             : 
     833        7771 :         if (canonic_charset_num == char_conv->to.charset.canonic)
     834             :                 return;
     835             : 
     836             :         /* find and set conversion */
     837        2425 :         char_conv = tds_iconv_get_info(conn, conn->char_convs[client2ucs2]->from.charset.canonic, canonic_charset_num);
     838        2425 :         if (char_conv)
     839        2425 :                 conn->char_convs[client2server_chardata] = char_conv;
     840             : }
     841             : 
     842             : void
     843        3628 : tds_srv_charset_changed(TDSCONNECTION * conn, const char *charset)
     844             : {
     845        3628 :         int n = tds_canonical_charset(charset);
     846             : 
     847             :         /* ignore request to change to unknown charset */
     848        3628 :         if (n < 0) {
     849           0 :                 tdsdump_log(TDS_DBG_FUNC, "tds_srv_charset_changed: what is charset \"%s\"?\n", charset);
     850             :                 return;
     851             :         }
     852             : 
     853        3628 :         tds_srv_charset_changed_num(conn, n);
     854             : }
     855             : 
     856             : /* change singlebyte conversions according to server */
     857             : void
     858        4143 : tds7_srv_charset_changed(TDSCONNECTION * conn, TDS_UCHAR collation[5])
     859             : {
     860        4143 :         tds_srv_charset_changed_num(conn, collate2charset(conn, collation));
     861        4143 : }
     862             : 
     863             : /**
     864             :  * Move the input sequence pointer to the next valid position.
     865             :  * Used when an input character cannot be converted.  
     866             :  * \returns number of bytes to skip.
     867             :  */
     868             : /* FIXME possible buffer reading overflow ?? */
     869             : static size_t
     870        3100 : skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size)
     871             : {
     872        3100 :         unsigned charsize = CHARSIZE(charset);
     873             :         char ib[16];
     874             :         char ob[16];
     875             :         ICONV_CONST char *pib;
     876             :         char *pob;
     877             :         size_t il, ol, l;
     878             :         iconv_t cd2;
     879             : 
     880             : 
     881             :         /* usually fixed size and UTF-8 do not have state, so do not reset it */
     882           0 :         if (charsize)
     883             :                 goto skip_charsize;
     884             : 
     885        3100 :         if (0 == strcmp(charset->name, "UTF-8")) {
     886             :                 /*
     887             :                  * Deal with UTF-8.  
     888             :                  * bytes | bits | representation
     889             :                  *     1 |    7 | 0vvvvvvv
     890             :                  *     2 |   11 | 110vvvvv 10vvvvvv
     891             :                  *     3 |   16 | 1110vvvv 10vvvvvv 10vvvvvv
     892             :                  *     4 |   21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
     893             :                  */
     894        3088 :                 int c = **input;
     895             : 
     896        3088 :                 c = c & (c >> 1);
     897             :                 do {
     898        6176 :                         ++charsize;
     899        6176 :                 } while ((c <<= 1) & 0x80);
     900             :                 goto skip_charsize;
     901             :         }
     902             : 
     903             :         /* handle state encoding */
     904             : 
     905             :         /* extract state from iconv */
     906          12 :         pob = ib;
     907          12 :         ol = sizeof(ib);
     908          12 :         tds_sys_iconv(cd, NULL, NULL, &pob, &ol);
     909             : 
     910             :         /* init destination conversion */
     911             :         /* TODO use largest fixed size for this platform */
     912          12 :         cd2 = tds_sys_iconv_open("UCS-4", charset->name);
     913          12 :         if (cd2 == (iconv_t) -1)
     914             :                 return 0;
     915             : 
     916             :         /* add part of input */
     917          12 :         il = ol;
     918          12 :         if (il > *input_size)
     919           0 :                 il = *input_size;
     920          12 :         l = sizeof(ib) - ol;
     921          12 :         memcpy(ib + l, *input, il);
     922          12 :         il += l;
     923             : 
     924             :         /* translate a single character */
     925          12 :         pib = ib;
     926          12 :         pob = ob;
     927             :         /* TODO use size of largest fixed charset */
     928          12 :         ol = 4;
     929          12 :         tds_sys_iconv(cd2, &pib, &il, &pob, &ol);
     930             : 
     931             :         /* adjust input */
     932          12 :         l = (pib - ib) - l;
     933          12 :         *input += l;
     934          12 :         *input_size -= l;
     935             : 
     936             :         /* extract state */
     937          12 :         pob = ib;
     938          12 :         ol = sizeof(ib);
     939          12 :         tds_sys_iconv(cd, NULL, NULL, &pob, &ol);
     940             : 
     941             :         /* set input state */
     942          12 :         pib = ib;
     943          12 :         il = sizeof(ib) - ol;
     944          12 :         pob = ob;
     945          12 :         ol = sizeof(ob);
     946          12 :         tds_sys_iconv(cd, &pib, &il, &pob, &ol);
     947             : 
     948          12 :         tds_sys_iconv_close(cd2);
     949             : 
     950          12 :         if (l != 0)
     951             :                 return l;
     952             : 
     953             :         /* last blindly attempt, skip minimum bytes */
     954           6 :         charsize = charset->min_bytes_per_char;
     955             : 
     956             :         /* fall through */
     957             : 
     958        3094 : skip_charsize:
     959        3094 :         if (charsize > *input_size)
     960             :                 return 0;
     961        3094 :         *input += charsize;
     962        3094 :         *input_size -= charsize;
     963        3094 :         return charsize;
     964             : }
     965             : 
     966             : #include <freetds/charset_lookup.h>
     967             : 
     968             : /**
     969             :  * Determine canonical iconv character set.
     970             :  * \returns canonical position, or -1 if lookup failed.
     971             :  * \remarks Returned name can be used in bytes_per_char(), above.
     972             :  */
     973             : int
     974       11298 : tds_canonical_charset(const char *charset_name)
     975             : {
     976       11298 :         const struct charset_alias *c = charset_lookup(charset_name, strlen(charset_name));
     977       11298 :         return c ? c->canonic : -1;
     978             : }
     979             : 
     980             : /**
     981             :  * Determine canonical iconv character set name.  
     982             :  * \returns canonical name, or NULL if lookup failed.
     983             :  * \remarks Returned name can be used in bytes_per_char(), above.
     984             :  */
     985             : const char *
     986        3041 : tds_canonical_charset_name(const char *charset_name)
     987             : {
     988             :         int res;
     989             : 
     990             :         /* get numeric pos */
     991        3041 :         res = tds_canonical_charset(charset_name);
     992        3041 :         if (res >= 0)
     993        3041 :                 return canonic_charsets[res].name;
     994             : 
     995             :         return charset_name;    /* hope for the best */
     996             : }
     997             : 
     998             : static int
     999       20421 : collate2charset(TDSCONNECTION * conn, TDS_UCHAR collate[5])
    1000             : {
    1001       20421 :         int cp = 0;
    1002       20421 :         const int sql_collate = collate[4];
    1003             :         /* extract 16 bit of LCID (it's 20 bits but higher 4 are just variations) */
    1004       20421 :         const int lcid = TDS_GET_UA2LE(collate);
    1005             : 
    1006             :         /* starting with bit 20 (little endian, so 3rd byte bit 4) there are 8 bits:
    1007             :          * fIgnoreCase fIgnoreAccent fIgnoreKana fIgnoreWidth fBinary fBinary2 fUTF8 FRESERVEDBIT
    1008             :          * so fUTF8 is on the 4th byte bit 2 */
    1009       20421 :         if ((collate[3] & 0x4) != 0 && IS_TDS74_PLUS(conn))
    1010             :                 return TDS_CHARSET_UTF_8;
    1011             : 
    1012             :         /*
    1013             :          * The table from the MSQLServer reference "Windows Collation Designators" 
    1014             :          * and from " NLS Information for Microsoft Windows XP".
    1015             :          *
    1016             :          * See also https://go.microsoft.com/fwlink/?LinkId=119987 [MSDN-SQLCollation]
    1017             :          */
    1018             : 
    1019       20421 :         switch (sql_collate) {
    1020             :         case 30:                /* SQL_Latin1_General_CP437_BIN */
    1021             :         case 31:                /* SQL_Latin1_General_CP437_CS_AS */
    1022             :         case 32:                /* SQL_Latin1_General_CP437_CI_AS */
    1023             :         case 33:                /* SQL_Latin1_General_Pref_CP437_CI_AS */
    1024             :         case 34:                /* SQL_Latin1_General_CP437_CI_AI */
    1025             :                 return TDS_CHARSET_CP437;
    1026           0 :         case 40:                /* SQL_Latin1_General_CP850_BIN */
    1027             :         case 41:                /* SQL_Latin1_General_CP850_CS_AS */
    1028             :         case 42:                /* SQL_Latin1_General_CP850_CI_AS */
    1029             :         case 43:                /* SQL_Latin1_General_Pref_CP850_CI_AS */
    1030             :         case 44:                /* SQL_Latin1_General_CP850_CI_AI */
    1031             :         case 49:                /* SQL_1xCompat_CP850_CI_AS */
    1032             :         case 55:                /* SQL_AltDiction_CP850_CS_AS */
    1033             :         case 56:                /* SQL_AltDiction_Pref_CP850_CI_AS */
    1034             :         case 57:                /* SQL_AltDiction_CP850_CI_AI */
    1035             :         case 58:                /* SQL_Scandinavian_Pref_CP850_CI_AS */
    1036             :         case 59:                /* SQL_Scandinavian_CP850_CS_AS */
    1037             :         case 60:                /* SQL_Scandinavian_CP850_CI_AS */
    1038             :         case 61:                /* SQL_AltDiction_CP850_CI_AS */
    1039           0 :                 return TDS_CHARSET_CP850;
    1040           0 :         case 80:                /* SQL_Latin1_General_1250_BIN */
    1041             :         case 81:                /* SQL_Latin1_General_CP1250_CS_AS */
    1042             :         case 82:                /* SQL_Latin1_General_CP1250_CI_AS */
    1043           0 :                 return TDS_CHARSET_CP1250;
    1044           0 :         case 105:               /* SQL_Latin1_General_CP1251_CS_AS */
    1045             :         case 106:               /* SQL_Latin1_General_CP1251_CI_AS */
    1046           0 :                 return TDS_CHARSET_CP1251;
    1047           0 :         case 113:               /* SQL_Latin1_General_CP1253_CS_AS */
    1048             :         case 114:               /* SQL_Latin1_General_CP1253_CI_AS */
    1049             :         case 120:               /* SQL_MixDiction_CP1253_CS_AS */
    1050             :         case 121:               /* SQL_AltDiction_CP1253_CS_AS */
    1051             :         case 122:               /* SQL_AltDiction2_CP1253_CS_AS */
    1052             :         case 124:               /* SQL_Latin1_General_CP1253_CI_AI */
    1053           0 :                 return TDS_CHARSET_CP1253;
    1054           0 :         case 137:               /* SQL_Latin1_General_CP1255_CS_AS */
    1055             :         case 138:               /* SQL_Latin1_General_CP1255_CI_AS */
    1056           0 :                 return TDS_CHARSET_CP1255;
    1057           0 :         case 145:               /* SQL_Latin1_General_CP1256_CS_AS */
    1058             :         case 146:               /* SQL_Latin1_General_CP1256_CI_AS */
    1059           0 :                 return TDS_CHARSET_CP1256;
    1060           0 :         case 153:               /* SQL_Latin1_General_CP1257_CS_AS */
    1061             :         case 154:               /* SQL_Latin1_General_CP1257_CI_AS */
    1062           0 :                 return TDS_CHARSET_CP1257;
    1063             :         }
    1064             : 
    1065       20421 :         switch (lcid) {
    1066             :         case 0x405:
    1067             :         case 0x40e:             /* 0x1040e */
    1068             :         case 0x415:
    1069             :         case 0x418:
    1070             :         case 0x41a:
    1071             :         case 0x41b:
    1072             :         case 0x41c:
    1073             :         case 0x424:
    1074             :         case 0x442:
    1075             :         case 0x81a:
    1076             :         case 0x104e:            /* ?? */
    1077             :         case 0x141a:
    1078             :                 cp = TDS_CHARSET_CP1250;
    1079             :                 break;
    1080           0 :         case 0x402:
    1081             :         case 0x419:
    1082             :         case 0x422:
    1083             :         case 0x423:
    1084             :         case 0x42f:
    1085             :         case 0x43f:
    1086             :         case 0x440:
    1087             :         case 0x444:
    1088             :         case 0x450:
    1089             :         case 0x82c:
    1090             :         case 0x843:
    1091             :         case 0xc1a:
    1092             :         case 0x46d:
    1093             :         case 0x201a:
    1094             :         case 0x485:
    1095           0 :                 cp = TDS_CHARSET_CP1251;
    1096           0 :                 break;
    1097       20403 :         case 0x1007:
    1098             :         case 0x1009:
    1099             :         case 0x100a:
    1100             :         case 0x100c:
    1101             :         case 0x1407:
    1102             :         case 0x1409:
    1103             :         case 0x140a:
    1104             :         case 0x140c:
    1105             :         case 0x1809:
    1106             :         case 0x180a:
    1107             :         case 0x180c:
    1108             :         case 0x1c09:
    1109             :         case 0x1c0a:
    1110             :         case 0x2009:
    1111             :         case 0x200a:
    1112             :         case 0x2409:
    1113             :         case 0x240a:
    1114             :         case 0x2809:
    1115             :         case 0x280a:
    1116             :         case 0x2c09:
    1117             :         case 0x2c0a:
    1118             :         case 0x3009:
    1119             :         case 0x300a:
    1120             :         case 0x3409:
    1121             :         case 0x340a:
    1122             :         case 0x380a:
    1123             :         case 0x3c0a:
    1124             :         case 0x400a:
    1125             :         case 0x403:
    1126             :         case 0x406:
    1127             :         case 0x417:
    1128             :         case 0x42e:
    1129             :         case 0x43b:
    1130             :         case 0x452:
    1131             :         case 0x462:
    1132             :         case 0x47a:
    1133             :         case 0x47c:
    1134             :         case 0x47e:
    1135             :         case 0x483:
    1136             :         case 0x407:             /* 0x10407 */
    1137             :         case 0x409:
    1138             :         case 0x40a:
    1139             :         case 0x40b:
    1140             :         case 0x40c:
    1141             :         case 0x40f:
    1142             :         case 0x410:
    1143             :         case 0x413:
    1144             :         case 0x414:
    1145             :         case 0x416:
    1146             :         case 0x41d:
    1147             :         case 0x421:
    1148             :         case 0x42d:
    1149             :         case 0x436:
    1150             :         case 0x437:             /* 0x10437 */
    1151             :         case 0x438:
    1152             :                 /*case 0x439:  ??? Unicode only */
    1153             :         case 0x43e:
    1154             :         case 0x440a:
    1155             :         case 0x441:
    1156             :         case 0x456:
    1157             :         case 0x480a:
    1158             :         case 0x4c0a:
    1159             :         case 0x500a:
    1160             :         case 0x807:
    1161             :         case 0x809:
    1162             :         case 0x80a:
    1163             :         case 0x80c:
    1164             :         case 0x810:
    1165             :         case 0x813:
    1166             :         case 0x814:
    1167             :         case 0x816:
    1168             :         case 0x81d:
    1169             :         case 0x83b:
    1170             :         case 0x83e:
    1171             :         case 0x85f:
    1172             :         case 0xc07:
    1173             :         case 0xc09:
    1174             :         case 0xc0a:
    1175             :         case 0xc0c:
    1176       20403 :                 cp = TDS_CHARSET_CP1252;
    1177       20403 :                 break;
    1178           0 :         case 0x408:
    1179           0 :                 cp = TDS_CHARSET_CP1253;
    1180           0 :                 break;
    1181           0 :         case 0x41f:
    1182             :         case 0x42c:
    1183             :         case 0x443:
    1184           0 :                 cp = TDS_CHARSET_CP1254;
    1185           0 :                 break;
    1186           6 :         case 0x40d:
    1187           6 :                 cp = TDS_CHARSET_CP1255;
    1188           6 :                 break;
    1189           0 :         case 0x1001:
    1190             :         case 0x1401:
    1191             :         case 0x1801:
    1192             :         case 0x1c01:
    1193             :         case 0x2001:
    1194             :         case 0x2401:
    1195             :         case 0x2801:
    1196             :         case 0x2c01:
    1197             :         case 0x3001:
    1198             :         case 0x3401:
    1199             :         case 0x3801:
    1200             :         case 0x3c01:
    1201             :         case 0x4001:
    1202             :         case 0x401:
    1203             :         case 0x480:
    1204             :         case 0x420:
    1205             :         case 0x429:
    1206             :         case 0x48c:
    1207             :         case 0x801:
    1208             :         case 0xc01:
    1209           0 :                 cp = TDS_CHARSET_CP1256;
    1210           0 :                 break;
    1211           0 :         case 0x425:
    1212             :         case 0x426:
    1213             :         case 0x427:
    1214             :         case 0x827:             /* ?? */
    1215           0 :                 cp = TDS_CHARSET_CP1257;
    1216           0 :                 break;
    1217           0 :         case 0x42a:
    1218           0 :                 cp = TDS_CHARSET_CP1258;
    1219           0 :                 break;
    1220           0 :         case 0x41e:
    1221           0 :                 cp = TDS_CHARSET_CP874;
    1222           0 :                 break;
    1223           0 :         case 0x411:             /* 0x10411 */
    1224           0 :                 cp = TDS_CHARSET_CP932;
    1225           0 :                 break;
    1226          12 :         case 0x1004:
    1227             :         case 0x804:             /* 0x20804 */
    1228          12 :                 cp = TDS_CHARSET_GB18030;
    1229          12 :                 break;
    1230           0 :         case 0x412:             /* 0x10412 */
    1231           0 :                 cp = TDS_CHARSET_CP949;
    1232           0 :                 break;
    1233           0 :         case 0x1404:
    1234             :         case 0x404:             /* 0x30404 */
    1235             :         case 0xc04:
    1236           0 :                 cp = TDS_CHARSET_CP950;
    1237           0 :                 break;
    1238           0 :         default:
    1239           0 :                 cp = TDS_CHARSET_CP1252;
    1240             :         }
    1241             : 
    1242             :         return cp;
    1243             : }
    1244             : 
    1245             : /**
    1246             :  * Get iconv information from a LCID (to support different column encoding under MSSQL2K)
    1247             :  */
    1248             : TDSICONV *
    1249       16278 : tds_iconv_from_collate(TDSCONNECTION * conn, TDS_UCHAR collate[5])
    1250             : {
    1251       16278 :         int canonic_charset = collate2charset(conn, collate);
    1252             : 
    1253             :         /* same as client (usually this is true, so this improve performance) ? */
    1254       16278 :         if (conn->char_convs[client2server_chardata]->to.charset.canonic == canonic_charset)
    1255             :                 return conn->char_convs[client2server_chardata];
    1256             : 
    1257        2328 :         return tds_iconv_get_info(conn, conn->char_convs[client2ucs2]->from.charset.canonic, canonic_charset);
    1258             : }
    1259             : 
    1260             : /** @} */

Generated by: LCOV version 1.13