Line data Source code
1 : /* FreeTDS - Library of routines accessing Sybase and Microsoft databases
2 : * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Brian Bruns
3 : * Copyright (C) 2010 Frediano Ziglio
4 : *
5 : * This library is free software; you can redistribute it and/or
6 : * modify it under the terms of the GNU Library General Public
7 : * License as published by the Free Software Foundation; either
8 : * version 2 of the License, or (at your option) any later version.
9 : *
10 : * This library is distributed in the hope that it will be useful,
11 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 : * Library General Public License for more details.
14 : *
15 : * You should have received a copy of the GNU Library General Public
16 : * License along with this library; if not, write to the
17 : * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 : * Boston, MA 02111-1307, USA.
19 : */
20 :
21 : /**
22 : * \file
23 : * \brief Handle character conversions to/from server
24 : */
25 :
26 : #include <config.h>
27 :
28 : #include <stdarg.h>
29 : #include <stdio.h>
30 : #include <assert.h>
31 :
32 : #if HAVE_STRING_H
33 : #include <string.h>
34 : #endif /* HAVE_STRING_H */
35 : #if HAVE_ERRNO_H
36 : #include <errno.h>
37 : #endif
38 :
39 : #include <freetds/tds.h>
40 : #include <freetds/iconv.h>
41 : #include <freetds/bool.h>
42 : #include <freetds/bytes.h>
43 : #if HAVE_ICONV
44 : #include <iconv.h>
45 : #endif
46 :
47 : #define CHARSIZE(charset) ( ((charset)->min_bytes_per_char == (charset)->max_bytes_per_char )? \
48 : (charset)->min_bytes_per_char : 0 )
49 :
50 :
51 : static int collate2charset(TDSCONNECTION * conn, const TDS_UCHAR collate[5]);
52 : static size_t skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size);
53 : static bool tds_iconv_info_init(TDSICONV * char_conv, int client_canonic, int server_canonic);
54 : static bool tds_iconv_init(void);
55 : static void _iconv_close(iconv_t * cd);
56 : static void tds_iconv_info_close(TDSICONV * char_conv);
57 :
58 :
59 : /**
60 : * \ingroup libtds
61 : * \defgroup conv Charset conversion
62 : * Convert between different charsets.
63 : */
64 :
65 : #define TDS_ICONV_ENCODING_TABLES
66 : #include <freetds/encodings.h>
67 :
68 : /* this will contain real iconv names */
69 : static const char *iconv_names[TDS_VECTOR_SIZE(canonic_charsets)];
70 : static bool iconv_initialized = false;
71 : static const char *ucs2name;
72 :
73 : enum
74 : { POS_ISO1, POS_UTF8, POS_UCS2LE, POS_UCS2BE };
75 :
76 : static const struct {
77 : uint32_t len;
78 : /* this field must be aligned at least to 2 bytes */
79 : char data[12];
80 : } test_strings[4] = {
81 : /* same string in required charsets */
82 : { 4, "Ao\xD3\xE5" },
83 : { 6, "Ao\xC3\x93\xC3\xA5" },
84 : { 8, "A\x00o\x000\xD3\x00\xE5\x00" },
85 : { 8, "\x00" "A\x00o\x000\xD3\x00\xE5" },
86 : };
87 :
88 : /**
89 : * Initialize charset searching for UTF-8, UCS-2 and ISO8859-1
90 : */
91 : static bool
92 2004 : tds_iconv_init(void)
93 : {
94 : int i;
95 : iconv_t cd;
96 :
97 : /* first entries should be constants */
98 : assert(strcmp(canonic_charsets[POS_ISO1].name, "ISO-8859-1") == 0);
99 : assert(strcmp(canonic_charsets[POS_UTF8].name, "UTF-8") == 0);
100 : assert(strcmp(canonic_charsets[POS_UCS2LE].name, "UCS-2LE") == 0);
101 : assert(strcmp(canonic_charsets[POS_UCS2BE].name, "UCS-2BE") == 0);
102 :
103 : /* fast tests for GNU-iconv */
104 2004 : cd = tds_sys_iconv_open("ISO-8859-1", "UTF-8");
105 2004 : if (cd != (iconv_t) -1) {
106 2004 : iconv_names[POS_ISO1] = "ISO-8859-1";
107 2004 : iconv_names[POS_UTF8] = "UTF-8";
108 2004 : tds_sys_iconv_close(cd);
109 : } else {
110 :
111 : /* search names for ISO8859-1 and UTF-8 */
112 0 : for (i = 0; iconv_aliases[i].alias; ++i) {
113 : int j;
114 :
115 0 : if (iconv_aliases[i].canonic != POS_ISO1)
116 0 : continue;
117 0 : for (j = 0; iconv_aliases[j].alias; ++j) {
118 0 : if (iconv_aliases[j].canonic != POS_UTF8)
119 0 : continue;
120 :
121 0 : cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_aliases[j].alias);
122 0 : if (cd != (iconv_t) -1) {
123 0 : iconv_names[POS_ISO1] = iconv_aliases[i].alias;
124 0 : iconv_names[POS_UTF8] = iconv_aliases[j].alias;
125 0 : tds_sys_iconv_close(cd);
126 0 : break;
127 : }
128 : }
129 0 : if (iconv_names[POS_ISO1])
130 : break;
131 : }
132 : /* required characters not found !!! */
133 0 : if (!iconv_names[POS_ISO1]) {
134 0 : tdsdump_log(TDS_DBG_ERROR, "iconv name for ISO-8859-1 not found\n");
135 : return false;
136 : }
137 : }
138 :
139 : /* now search for UCS-2 */
140 2004 : cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2LE");
141 2004 : if (cd != (iconv_t) -1) {
142 2004 : iconv_names[POS_UCS2LE] = "UCS-2LE";
143 2004 : tds_sys_iconv_close(cd);
144 : }
145 2004 : cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2BE");
146 2004 : if (cd != (iconv_t) -1) {
147 2004 : iconv_names[POS_UCS2BE] = "UCS-2BE";
148 2004 : tds_sys_iconv_close(cd);
149 : }
150 :
151 : /* long search needed ?? */
152 2004 : if (!iconv_names[POS_UCS2LE] || !iconv_names[POS_UCS2BE]) {
153 0 : for (i = 0; iconv_aliases[i].alias; ++i) {
154 0 : if (strncmp(canonic_charsets[iconv_aliases[i].canonic].name, "UCS-2", 5) != 0)
155 0 : continue;
156 :
157 0 : cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_names[POS_ISO1]);
158 0 : if (cd != (iconv_t) -1) {
159 : char ib[1];
160 : char ob[4];
161 : size_t il, ol;
162 : ICONV_CONST char *pib;
163 : char *pob;
164 0 : int byte_sequence = 0;
165 :
166 : /* try to convert 'A' and check result */
167 0 : ib[0] = 0x41;
168 0 : pib = ib;
169 0 : pob = ob;
170 0 : il = 1;
171 0 : ol = 4;
172 0 : ob[0] = ob[1] = 0;
173 0 : if (tds_sys_iconv(cd, &pib, &il, &pob, &ol) != (size_t) - 1) {
174 : /* byte order sequence ?? */
175 0 : if (ol == 0) {
176 0 : ob[0] = ob[2];
177 0 : byte_sequence = 1;
178 : /* TODO save somewhere */
179 : }
180 :
181 : /* save name without sequence (if present) */
182 0 : if (ob[0])
183 0 : il = POS_UCS2LE;
184 : else
185 0 : il = POS_UCS2BE;
186 0 : if (!iconv_names[il] || !byte_sequence)
187 0 : iconv_names[il] = iconv_aliases[i].alias;
188 : }
189 0 : tds_sys_iconv_close(cd);
190 : }
191 : }
192 : }
193 : /* we need a UCS-2 (big endian or little endian) */
194 2004 : if (!iconv_names[POS_UCS2LE] && !iconv_names[POS_UCS2BE]) {
195 0 : tdsdump_log(TDS_DBG_ERROR, "iconv name for UCS-2 not found\n");
196 : return false;
197 : }
198 :
199 2004 : ucs2name = iconv_names[POS_UCS2LE] ? iconv_names[POS_UCS2LE] : iconv_names[POS_UCS2BE];
200 :
201 10020 : for (i = 0; i < 4; ++i)
202 8016 : tdsdump_log(TDS_DBG_INFO1, "local name for %s is %s\n", canonic_charsets[i].name,
203 : iconv_names[i] ? iconv_names[i] : "(null)");
204 :
205 : /* base conversions checks */
206 32064 : for (i = 0; i < 4 * 4; ++i) {
207 32064 : const int from = i / 4;
208 32064 : const int to = i % 4;
209 : char ob[16];
210 : size_t il, ol;
211 : ICONV_CONST char *pib;
212 : char *pob;
213 : size_t res;
214 :
215 32064 : if (!iconv_names[from] || !iconv_names[to])
216 0 : continue;
217 32064 : cd = tds_sys_iconv_open(iconv_names[to], iconv_names[from]);
218 32064 : if (cd == (iconv_t) -1) {
219 0 : tdsdump_log(TDS_DBG_ERROR, "iconv_open(%s, %s) failed\n", iconv_names[to], iconv_names[from]);
220 0 : return false;
221 : }
222 :
223 32064 : pib = (ICONV_CONST char *) test_strings[from].data;
224 32064 : il = test_strings[from].len;
225 32064 : pob = ob;
226 32064 : ol = sizeof(ob);
227 32064 : res = tds_sys_iconv(cd, &pib, &il, &pob, &ol);
228 32064 : tds_sys_iconv_close(cd);
229 :
230 32064 : if (res != 0
231 32064 : || sizeof(ob) - ol != test_strings[to].len
232 32064 : || memcmp(ob, test_strings[to].data, test_strings[to].len) != 0) {
233 0 : tdsdump_log(TDS_DBG_ERROR, "iconv(%s, %s) failed res %d\n", iconv_names[to], iconv_names[from], (int) res);
234 0 : tdsdump_log(TDS_DBG_ERROR, "len %d\n", (int) (sizeof(ob) - ol));
235 : return false;
236 : }
237 : }
238 :
239 : /* success (it should always occurs) */
240 : return true;
241 : }
242 :
243 : /**
244 : * Get iconv name given canonic
245 : */
246 : static const char *
247 3550 : tds_set_iconv_name(int charset)
248 : {
249 : int i;
250 : iconv_t cd;
251 : const char *name;
252 :
253 3550 : assert(iconv_initialized);
254 :
255 : /* try using canonic name and UTF-8 and UCS2 */
256 3550 : name = canonic_charsets[charset].name;
257 3550 : cd = tds_sys_iconv_open(iconv_names[POS_UTF8], name);
258 3550 : if (cd != (iconv_t) -1)
259 : goto found;
260 0 : cd = tds_sys_iconv_open(ucs2name, name);
261 0 : if (cd != (iconv_t) -1)
262 : goto found;
263 :
264 : /* try all alternatives */
265 0 : for (i = 0; iconv_aliases[i].alias; ++i) {
266 0 : if (iconv_aliases[i].canonic != charset)
267 0 : continue;
268 :
269 0 : name = iconv_aliases[i].alias;
270 0 : cd = tds_sys_iconv_open(iconv_names[POS_UTF8], name);
271 0 : if (cd != (iconv_t) -1)
272 : goto found;
273 0 : cd = tds_sys_iconv_open(ucs2name, name);
274 0 : if (cd != (iconv_t) -1)
275 : goto found;
276 : }
277 :
278 : /* charset not found, pretend it's ISO 8859-1 */
279 0 : iconv_names[charset] = canonic_charsets[POS_ISO1].name;
280 0 : return NULL;
281 :
282 3550 : found:
283 3550 : iconv_names[charset] = name;
284 3550 : tds_sys_iconv_close(cd);
285 3550 : return name;
286 : }
287 :
288 : static void
289 : tds_iconv_reset(TDSICONV *conv)
290 : {
291 : /*
292 : * (min|max)_bytes_per_char can be used to divide
293 : * so init to safe values
294 : */
295 24258 : conv->to.charset.min_bytes_per_char = 1;
296 24258 : conv->to.charset.max_bytes_per_char = 1;
297 24258 : conv->from.charset.min_bytes_per_char = 1;
298 24258 : conv->from.charset.max_bytes_per_char = 1;
299 :
300 24258 : conv->to.charset.name = conv->from.charset.name = "";
301 24258 : conv->to.charset.canonic = conv->from.charset.canonic = 0;
302 24258 : conv->to.cd = (iconv_t) -1;
303 24258 : conv->from.cd = (iconv_t) -1;
304 : }
305 :
306 : /**
307 : * Allocate iconv stuff
308 : * \return 0 for success
309 : */
310 : int
311 4637 : tds_iconv_alloc(TDSCONNECTION * conn)
312 : {
313 : int i;
314 : TDSICONV *char_conv;
315 :
316 4637 : assert(!conn->char_convs);
317 4637 : if (!(conn->char_convs = tds_new(TDSICONV *, initial_char_conv_count + 1)))
318 : return 1;
319 4637 : char_conv = tds_new0(TDSICONV, initial_char_conv_count);
320 4637 : if (!char_conv) {
321 0 : TDS_ZERO_FREE(conn->char_convs);
322 0 : return 1;
323 : }
324 4637 : conn->char_conv_count = initial_char_conv_count + 1;
325 :
326 13911 : for (i = 0; i < initial_char_conv_count; ++i) {
327 9274 : conn->char_convs[i] = &char_conv[i];
328 18548 : tds_iconv_reset(&char_conv[i]);
329 : }
330 :
331 : /* chardata is just a pointer to another iconv info */
332 4637 : conn->char_convs[initial_char_conv_count] = conn->char_convs[client2server_chardata];
333 :
334 4637 : return 0;
335 : }
336 :
337 : /**
338 : * \addtogroup conv
339 : * @{
340 : * Set up the initial iconv conversion descriptors.
341 : * When the socket is allocated, three TDSICONV structures are attached to iconv.
342 : * They have fixed meanings:
343 : * \li 0. Client <-> UCS-2 (client2ucs2)
344 : * \li 1. Client <-> server single-byte charset (client2server_chardata)
345 : *
346 : * Other designs that use less data are possible, but these three conversion needs are
347 : * very often needed. By reserving them, we avoid searching the array for our most common purposes.
348 : *
349 : * To solve different iconv names and portability problems FreeTDS maintains
350 : * a list of aliases each charset.
351 : *
352 : * First we discover the names of our minimum required charsets (UTF-8, ISO8859-1 and UCS2).
353 : * Later, as and when it's needed, we try to discover others.
354 : *
355 : * There is one list of canonic names (GNU iconv names) and two sets of aliases
356 : * (one for other iconv implementations and another for Sybase). For every
357 : * canonic charset name we cache the iconv name found during discovery.
358 : */
359 : TDSRET
360 4472 : tds_iconv_open(TDSCONNECTION * conn, const char *charset, int use_utf16)
361 : {
362 : static const char UCS_2LE[] = "UCS-2LE";
363 : int canonic;
364 4472 : int canonic_charset = tds_canonical_charset(charset);
365 4472 : int canonic_env_charset = conn->env.charset ? tds_canonical_charset(conn->env.charset) : -1;
366 : bool ok;
367 :
368 4472 : TDS_ENCODING *client = &conn->char_convs[client2ucs2]->from.charset;
369 4472 : TDS_ENCODING *server = &conn->char_convs[client2ucs2]->to.charset;
370 :
371 4472 : tdsdump_log(TDS_DBG_FUNC, "tds_iconv_open(%p, %s, %d)\n", conn, charset, use_utf16);
372 :
373 : /* TDS 5.0 support only UTF-16 encodings */
374 4472 : if (IS_TDS50(conn))
375 720 : use_utf16 = true;
376 :
377 : /* initialize */
378 4472 : if (!iconv_initialized) {
379 2004 : if (!tds_iconv_init()) {
380 0 : tdsdump_log(TDS_DBG_ERROR, "error: tds_iconv_init() failed; "
381 : "try using GNU libiconv library\n");
382 : return TDS_FAIL;
383 : }
384 2004 : iconv_initialized = true;
385 : }
386 :
387 : /*
388 : * Client <-> UCS-2 (client2ucs2)
389 : */
390 4472 : tdsdump_log(TDS_DBG_FUNC, "setting up conversions for client charset \"%s\"\n", charset);
391 :
392 4472 : tdsdump_log(TDS_DBG_FUNC, "preparing iconv for \"%s\" <-> \"%s\" conversion\n", charset, UCS_2LE);
393 :
394 4472 : ok = false;
395 4472 : if (use_utf16) {
396 3706 : canonic = TDS_CHARSET_UTF_16LE;
397 3706 : ok = tds_iconv_info_init(conn->char_convs[client2ucs2], canonic_charset, canonic);
398 : }
399 3706 : if (!ok) {
400 766 : canonic = TDS_CHARSET_UCS_2LE;
401 766 : ok = tds_iconv_info_init(conn->char_convs[client2ucs2], canonic_charset, canonic);
402 : }
403 4472 : if (!ok)
404 : return TDS_FAIL;
405 :
406 : /*
407 : * How many UTF-8 bytes we need is a function of what the input character set is.
408 : * TODO This could definitely be more sophisticated, but it deals with the common case.
409 : */
410 4472 : if (client->min_bytes_per_char == 1 && client->max_bytes_per_char == 4 && server->max_bytes_per_char == 1) {
411 : /* ie client is UTF-8 and server is ISO-8859-1 or variant. */
412 0 : client->max_bytes_per_char = 3;
413 : }
414 :
415 : /*
416 : * Client <-> server single-byte charset
417 : * TODO: the server hasn't reported its charset yet, so this logic can't work here.
418 : * not sure what to do about that yet.
419 : */
420 4472 : conn->char_convs[client2server_chardata]->flags = TDS_ENCODING_MEMCPY;
421 4472 : if (canonic_env_charset >= 0) {
422 0 : tdsdump_log(TDS_DBG_FUNC, "preparing iconv for \"%s\" <-> \"%s\" conversion\n", charset, conn->env.charset);
423 0 : ok = tds_iconv_info_init(conn->char_convs[client2server_chardata], canonic_charset, canonic_env_charset);
424 0 : if (!ok)
425 : return TDS_FAIL;
426 : } else {
427 4472 : conn->char_convs[client2server_chardata]->from.charset = canonic_charsets[canonic_charset];
428 4472 : conn->char_convs[client2server_chardata]->to.charset = canonic_charsets[canonic_charset];
429 : }
430 :
431 4472 : tdsdump_log(TDS_DBG_FUNC, "tds_iconv_open: done\n");
432 : return TDS_SUCCESS;
433 : }
434 :
435 : /**
436 : * Open iconv descriptors to convert between character sets (both directions).
437 : * 1. Look up the canonical names of the character sets.
438 : * 2. Look up their widths.
439 : * 3. Ask iconv to open a conversion descriptor.
440 : * 4. Fail if any of the above offer any resistance.
441 : * \remarks The charset names written to \a iconv will be the canonical names,
442 : * not necessarily the names passed in.
443 : */
444 : static bool
445 10210 : tds_iconv_info_init(TDSICONV * char_conv, int client_canonical, int server_canonical)
446 : {
447 10210 : TDS_ENCODING *client = &char_conv->from.charset;
448 10210 : TDS_ENCODING *server = &char_conv->to.charset;
449 :
450 10210 : assert(char_conv->to.cd == (iconv_t) -1);
451 10210 : assert(char_conv->from.cd == (iconv_t) -1);
452 :
453 10210 : if (client_canonical < 0) {
454 0 : tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: client charset name \"%d\" invalid\n", client_canonical);
455 : return false;
456 : }
457 :
458 10210 : if (server_canonical < 0) {
459 0 : tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: server charset name \"%d\" invalid\n", server_canonical);
460 : return false;
461 : }
462 :
463 10210 : *client = canonic_charsets[client_canonical];
464 10210 : *server = canonic_charsets[server_canonical];
465 :
466 : /* special case, same charset, no conversion */
467 10210 : if (client_canonical == server_canonical) {
468 104 : char_conv->to.cd = (iconv_t) -1;
469 104 : char_conv->from.cd = (iconv_t) -1;
470 104 : char_conv->flags = TDS_ENCODING_MEMCPY;
471 104 : return true;
472 : }
473 :
474 10106 : char_conv->flags = 0;
475 :
476 : /* get iconv names */
477 10106 : if (!iconv_names[client_canonical]) {
478 0 : if (!tds_set_iconv_name(client_canonical)) {
479 0 : tdsdump_log(TDS_DBG_FUNC, "Charset %d not supported by iconv, using \"%s\" instead\n",
480 : client_canonical, iconv_names[client_canonical]);
481 : }
482 : }
483 :
484 10106 : if (!iconv_names[server_canonical]) {
485 3550 : if (!tds_set_iconv_name(server_canonical)) {
486 0 : tdsdump_log(TDS_DBG_FUNC, "Charset %d not supported by iconv, using \"%s\" instead\n",
487 : server_canonical, iconv_names[server_canonical]);
488 : }
489 : }
490 :
491 10106 : char_conv->to.cd = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[client_canonical]);
492 10106 : if (char_conv->to.cd == (iconv_t) -1) {
493 0 : tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", client->name, server->name);
494 : }
495 :
496 10106 : char_conv->from.cd = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[server_canonical]);
497 10106 : if (char_conv->from.cd == (iconv_t) -1) {
498 0 : tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", server->name, client->name);
499 : }
500 :
501 : /* TODO, do some optimizations like UCS2 -> UTF8 min,max = 2,2 (UCS2) and 1,4 (UTF8) */
502 :
503 : /* tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: converting \"%s\"->\"%s\"\n", client->name, server->name); */
504 :
505 : return true;
506 : }
507 :
508 :
509 : static void
510 : _iconv_close(iconv_t * cd)
511 : {
512 : static const iconv_t invalid = (iconv_t) -1;
513 :
514 39058 : if (*cd != invalid) {
515 20092 : tds_sys_iconv_close(*cd);
516 20092 : *cd = invalid;
517 : }
518 : }
519 :
520 : static void
521 19529 : tds_iconv_info_close(TDSICONV * char_conv)
522 : {
523 39058 : _iconv_close(&char_conv->to.cd);
524 39058 : _iconv_close(&char_conv->from.cd);
525 19529 : }
526 :
527 : void
528 0 : tds_iconv_close(TDSCONNECTION * conn)
529 : {
530 : int i;
531 :
532 19529 : for (i = 0; i < conn->char_conv_count; ++i)
533 19529 : tds_iconv_info_close(conn->char_convs[i]);
534 0 : }
535 :
536 : #define CHUNK_ALLOC 4
537 :
538 : void
539 4607 : tds_iconv_free(TDSCONNECTION * conn)
540 : {
541 : int i;
542 :
543 4607 : if (!conn->char_convs)
544 : return;
545 4607 : tds_iconv_close(conn);
546 :
547 4607 : free(conn->char_convs[0]);
548 8323 : for (i = initial_char_conv_count + 1; i < conn->char_conv_count; i += CHUNK_ALLOC)
549 3716 : free(conn->char_convs[i]);
550 4607 : TDS_ZERO_FREE(conn->char_convs);
551 4607 : conn->char_conv_count = 0;
552 : }
553 :
554 : static void
555 : tds_iconv_err(TDSSOCKET *tds, int err)
556 : {
557 6351 : if (tds)
558 4141 : tdserror(tds_get_ctx(tds), tds, err, 0);
559 : }
560 :
561 : /**
562 : * Wrapper around iconv(3). Same parameters, with slightly different behavior.
563 : * \param tds state information for the socket and the TDS protocol
564 : * \param io Enumerated value indicating whether the data are being sent to or received from the server.
565 : * \param conv information about the encodings involved, including the iconv(3) conversion descriptors.
566 : * \param inbuf address of pointer to the input buffer of data to be converted.
567 : * \param inbytesleft address of count of bytes in \a inbuf.
568 : * \param outbuf address of pointer to the output buffer.
569 : * \param outbytesleft address of count of bytes in \a outbuf.
570 : * \retval number of irreversible conversions performed. -1 on error, see iconv(3) documentation for
571 : * a description of the possible values of \e errno.
572 : * \remarks Unlike iconv(3), none of the arguments can be nor point to NULL. Like iconv(3), all pointers will
573 : * be updated. Success is signified by a nonnegative return code and \a *inbytesleft == 0.
574 : * If the conversion descriptor in \a iconv is -1 or NULL, \a inbuf is copied to \a outbuf,
575 : * and all parameters updated accordingly.
576 : *
577 : * If a character in \a inbuf cannot be converted because no such cbaracter exists in the
578 : * \a outbuf character set, we emit messages similar to the ones Sybase emits when it fails such a conversion.
579 : * The message varies depending on the direction of the data.
580 : * On a read error, we emit Msg 2403, Severity 16 (EX_INFO):
581 : * "WARNING! Some character(s) could not be converted into client's character set.
582 : * Unconverted bytes were changed to question marks ('?')."
583 : * On a write error we emit Msg 2402, Severity 16 (EX_USER):
584 : * "Error converting client characters into server's character set. Some character(s) could not be converted."
585 : * and return an error code. Client libraries relying on this routine should reflect an error back to the application.
586 : *
587 : * \todo Check for variable multibyte non-UTF-8 input character set.
588 : * \todo Use more robust error message generation.
589 : * \todo For reads, cope with \a outbuf encodings that don't have the equivalent of an ASCII '?'.
590 : * \todo Support alternative to '?' for the replacement character.
591 : */
592 : size_t
593 1011532 : tds_iconv(TDSSOCKET * tds, TDSICONV * conv, TDS_ICONV_DIRECTION io,
594 : const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft)
595 : {
596 : static const iconv_t invalid = (iconv_t) -1;
597 1011532 : TDSICONVDIR *from = NULL;
598 1011532 : TDSICONVDIR *to = NULL;
599 :
600 1011532 : iconv_t error_cd = invalid;
601 :
602 1011532 : char quest_mark[] = "?"; /* best to leave non-const; implementations vary */
603 : ICONV_CONST char *pquest_mark;
604 : size_t lquest_mark;
605 : size_t irreversible;
606 : size_t one_character;
607 1011532 : bool eilseq_raised = false;
608 : int conv_errno;
609 : /* cast away const-ness */
610 1011532 : TDS_ERRNO_MESSAGE_FLAGS *suppress = (TDS_ERRNO_MESSAGE_FLAGS*) &conv->suppress;
611 :
612 1011532 : assert(inbuf && inbytesleft && outbuf && outbytesleft);
613 :
614 : /* if empty there's nothing to return.
615 : * This fix case with some iconv implementation that does
616 : * not handle *inbuf == NULL and *inbytesleft == 0 as
617 : * empty strings
618 : */
619 1011532 : if (*inbytesleft == 0)
620 : return 0;
621 :
622 1011180 : switch (io) {
623 170482 : case to_server:
624 170482 : from = &conv->from;
625 170482 : to = &conv->to;
626 170482 : break;
627 840698 : case to_client:
628 840698 : from = &conv->to;
629 840698 : to = &conv->from;
630 840698 : break;
631 0 : default:
632 0 : tdsdump_log(TDS_DBG_FUNC, "tds_iconv: unable to determine if %d means in or out. \n", io);
633 0 : assert(io == to_server || io == to_client);
634 : /* the rest of this function assumes from & to are not null */
635 0 : errno = EINVAL;
636 0 : return -1;
637 : }
638 :
639 : /* silly case, memcpy */
640 1011180 : if (conv->flags & TDS_ENCODING_MEMCPY || to->cd == invalid) {
641 159477 : size_t len = *inbytesleft < *outbytesleft ? *inbytesleft : *outbytesleft;
642 :
643 159477 : memcpy(*outbuf, *inbuf, len);
644 159477 : conv_errno = *inbytesleft > *outbytesleft ? E2BIG : 0;
645 159477 : *inbytesleft -= len;
646 159477 : *outbytesleft -= len;
647 159477 : *inbuf += len;
648 159477 : *outbuf += len;
649 159477 : errno = conv_errno;
650 159477 : return conv_errno ? (size_t) -1 : 0;
651 : }
652 :
653 : /*
654 : * Call iconv() as many times as necessary, until we reach the end of input or exhaust output.
655 : */
656 : for (;;) {
657 1477989 : conv_errno = 0;
658 1477989 : irreversible = tds_sys_iconv(to->cd, (ICONV_CONST char **) inbuf, inbytesleft, outbuf, outbytesleft);
659 :
660 : /* iconv success, return */
661 1477989 : if (irreversible != (size_t) - 1) {
662 1244836 : if (irreversible > 0)
663 0 : eilseq_raised = true;
664 :
665 : /* here we detect end of conversion and try to reset shift state */
666 1244836 : if (inbuf) {
667 : /*
668 : * if inbuf or *inbuf is NULL iconv reset the shift state.
669 : * Note that setting inbytesleft to NULL can cause core so don't do it!
670 : */
671 622418 : inbuf = NULL;
672 622418 : continue;
673 : }
674 : break;
675 : }
676 :
677 : /* save errno, other function could change its value */
678 233153 : conv_errno = errno;
679 :
680 233153 : if (conv_errno == EILSEQ)
681 11170 : eilseq_raised = true;
682 :
683 233153 : if (!eilseq_raised || io != to_client || !inbuf)
684 : break;
685 : /*
686 : * Invalid input sequence encountered reading from server.
687 : * Skip one input sequence, adjusting pointers.
688 : */
689 3876 : one_character = skip_one_input_sequence(to->cd, &from->charset, inbuf, inbytesleft);
690 :
691 3876 : if (!one_character)
692 : break;
693 :
694 : /*
695 : * To replace invalid input with '?', we have to convert a UTF-8 '?' into the output character set.
696 : * In unimaginably weird circumstances, this might be impossible.
697 : * We use UTF-8 instead of ASCII because some implementations
698 : * do not convert singlebyte <-> singlebyte.
699 : */
700 3876 : if (error_cd == invalid) {
701 1938 : error_cd = tds_sys_iconv_open(to->charset.name, iconv_names[POS_UTF8]);
702 1938 : if (error_cd == invalid) {
703 : break; /* what to do? */
704 : }
705 : }
706 :
707 3876 : lquest_mark = 1;
708 3876 : pquest_mark = quest_mark;
709 :
710 3876 : irreversible = tds_sys_iconv(error_cd, &pquest_mark, &lquest_mark, outbuf, outbytesleft);
711 :
712 3876 : if (irreversible == (size_t) - 1)
713 : break;
714 :
715 3868 : if (!*inbytesleft)
716 : break;
717 : }
718 :
719 851703 : if (eilseq_raised && !suppress->eilseq) {
720 : /* invalid multibyte input sequence encountered */
721 6351 : if (io == to_client) {
722 1930 : if (irreversible == (size_t) - 1) {
723 : tds_iconv_err(tds, TDSEICONV2BIG);
724 : } else {
725 1930 : tds_iconv_err(tds, TDSEICONVI);
726 1930 : conv_errno = 0;
727 : }
728 : } else {
729 : tds_iconv_err(tds, TDSEICONVO);
730 : }
731 6351 : suppress->eilseq = 1;
732 : }
733 :
734 849773 : switch (conv_errno) {
735 2970 : case EINVAL: /* incomplete multibyte sequence is encountered */
736 2970 : if (suppress->einval)
737 : break;
738 : /* in chunk conversion this can mean we end a chunk inside a character */
739 0 : tds_iconv_err(tds, TDSEICONVAVAIL);
740 0 : suppress->einval = 1;
741 0 : break;
742 219013 : case E2BIG: /* output buffer has no more room */
743 219013 : if (suppress->e2big)
744 : break;
745 0 : tds_iconv_err(tds, TDSEICONVIU);
746 0 : suppress->e2big = 1;
747 0 : break;
748 : default:
749 : break;
750 : }
751 :
752 1073686 : if (error_cd != invalid) {
753 1938 : tds_sys_iconv_close(error_cd);
754 : }
755 :
756 851703 : errno = conv_errno;
757 851703 : return irreversible;
758 : }
759 :
760 : /**
761 : * Get a iconv info structure, allocate and initialize if needed
762 : */
763 : TDSICONV *
764 28630 : tds_iconv_get_info(TDSCONNECTION * conn, int canonic_client, int canonic_server)
765 : {
766 : TDSICONV *info;
767 : int i;
768 :
769 : /* search a charset from already allocated charsets */
770 87242 : for (i = conn->char_conv_count; --i >= initial_char_conv_count;)
771 52874 : if (canonic_client == conn->char_convs[i]->from.charset.canonic
772 41300 : && canonic_server == conn->char_convs[i]->to.charset.canonic)
773 : return conn->char_convs[i];
774 :
775 : /* allocate a new iconv structure */
776 5738 : if (conn->char_conv_count % CHUNK_ALLOC == ((initial_char_conv_count + 1) % CHUNK_ALLOC)) {
777 : TDSICONV **p;
778 : TDSICONV *infos;
779 :
780 3746 : infos = tds_new(TDSICONV, CHUNK_ALLOC);
781 3746 : if (!infos)
782 : return NULL;
783 3746 : p = (TDSICONV **) realloc(conn->char_convs, sizeof(TDSICONV *) * (conn->char_conv_count + CHUNK_ALLOC));
784 3746 : if (!p) {
785 0 : free(infos);
786 0 : return NULL;
787 : }
788 3746 : conn->char_convs = p;
789 3746 : memset(infos, 0, sizeof(TDSICONV) * CHUNK_ALLOC);
790 18730 : for (i = 0; i < CHUNK_ALLOC; ++i) {
791 14984 : conn->char_convs[i + conn->char_conv_count] = &infos[i];
792 29968 : tds_iconv_reset(&infos[i]);
793 : }
794 : }
795 5738 : info = conn->char_convs[conn->char_conv_count++];
796 :
797 : /* init */
798 5738 : if (tds_iconv_info_init(info, canonic_client, canonic_server))
799 : return info;
800 :
801 0 : tds_iconv_info_close(info);
802 0 : --conn->char_conv_count;
803 0 : return NULL;
804 : }
805 :
806 : TDSICONV *
807 20 : tds_iconv_get(TDSCONNECTION * conn, const char *client_charset, const char *server_charset)
808 : {
809 20 : int canonic_client_charset_num = tds_canonical_charset(client_charset);
810 20 : int canonic_server_charset_num = tds_canonical_charset(server_charset);
811 :
812 20 : if (canonic_client_charset_num < 0) {
813 0 : tdsdump_log(TDS_DBG_FUNC, "tds_iconv_get: what is charset \"%s\"?\n", client_charset);
814 : return NULL;
815 : }
816 20 : if (canonic_server_charset_num < 0) {
817 0 : tdsdump_log(TDS_DBG_FUNC, "tds_iconv_get: what is charset \"%s\"?\n", server_charset);
818 : return NULL;
819 : }
820 :
821 20 : return tds_iconv_get_info(conn, canonic_client_charset_num, canonic_server_charset_num);
822 : }
823 :
824 : /* change singlebyte conversions according to server */
825 : static void
826 9968 : tds_srv_charset_changed_num(TDSCONNECTION * conn, int canonic_charset_num)
827 : {
828 9968 : TDSICONV *char_conv = conn->char_convs[client2server_chardata];
829 :
830 9968 : if (IS_TDS7_PLUS(conn) && canonic_charset_num == TDS_CHARSET_ISO_8859_1)
831 0 : canonic_charset_num = TDS_CHARSET_CP1252;
832 :
833 9968 : tdsdump_log(TDS_DBG_FUNC, "setting server single-byte charset to \"%s\"\n", canonic_charsets[canonic_charset_num].name);
834 :
835 9968 : if (canonic_charset_num == char_conv->to.charset.canonic)
836 : return;
837 :
838 : /* find and set conversion */
839 3178 : char_conv = tds_iconv_get_info(conn, conn->char_convs[client2ucs2]->from.charset.canonic, canonic_charset_num);
840 3178 : if (char_conv)
841 3178 : conn->char_convs[client2server_chardata] = char_conv;
842 : }
843 :
844 : void
845 4406 : tds_srv_charset_changed(TDSCONNECTION * conn, const char *charset)
846 : {
847 4406 : int n = tds_canonical_charset(charset);
848 :
849 : /* ignore request to change to unknown charset */
850 4406 : if (n < 0) {
851 0 : tdsdump_log(TDS_DBG_FUNC, "tds_srv_charset_changed: what is charset \"%s\"?\n", charset);
852 : return;
853 : }
854 :
855 4406 : tds_srv_charset_changed_num(conn, n);
856 : }
857 :
858 : /* change singlebyte conversions according to server */
859 : void
860 5562 : tds7_srv_charset_changed(TDSCONNECTION * conn, TDS_UCHAR collation[5])
861 : {
862 5562 : tds_srv_charset_changed_num(conn, collate2charset(conn, collation));
863 5562 : }
864 :
865 : /**
866 : * Move the input sequence pointer to the next valid position.
867 : * Used when an input character cannot be converted.
868 : * \returns number of bytes to skip.
869 : */
870 : /* FIXME possible buffer reading overflow ?? */
871 : static size_t
872 3876 : skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size)
873 : {
874 3876 : unsigned charsize = CHARSIZE(charset);
875 : char ib[16];
876 : char ob[16];
877 : ICONV_CONST char *pib;
878 : char *pob;
879 : size_t il, ol, l;
880 : iconv_t cd2;
881 :
882 :
883 : /* usually fixed size and UTF-8 do not have state, so do not reset it */
884 0 : if (charsize)
885 : goto skip_charsize;
886 :
887 3876 : if (0 == strcmp(charset->name, "UTF-8")) {
888 : /*
889 : * Deal with UTF-8.
890 : * bytes | bits | representation
891 : * 1 | 7 | 0vvvvvvv
892 : * 2 | 11 | 110vvvvv 10vvvvvv
893 : * 3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv
894 : * 4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
895 : */
896 3860 : int c = **input;
897 :
898 3860 : c = c & (c >> 1);
899 : do {
900 7720 : ++charsize;
901 7720 : } while ((c <<= 1) & 0x80);
902 : goto skip_charsize;
903 : }
904 :
905 : /* handle state encoding */
906 :
907 : /* extract state from iconv */
908 16 : pob = ib;
909 16 : ol = sizeof(ib);
910 16 : tds_sys_iconv(cd, NULL, NULL, &pob, &ol);
911 :
912 : /* init destination conversion */
913 : /* TODO use largest fixed size for this platform */
914 16 : cd2 = tds_sys_iconv_open("UCS-4", charset->name);
915 16 : if (cd2 == (iconv_t) -1)
916 : return 0;
917 :
918 : /* add part of input */
919 16 : il = ol;
920 16 : if (il > *input_size)
921 0 : il = *input_size;
922 16 : l = sizeof(ib) - ol;
923 16 : memcpy(ib + l, *input, il);
924 16 : il += l;
925 :
926 : /* translate a single character */
927 16 : pib = ib;
928 16 : pob = ob;
929 : /* TODO use size of largest fixed charset */
930 16 : ol = 4;
931 16 : tds_sys_iconv(cd2, &pib, &il, &pob, &ol);
932 :
933 : /* adjust input */
934 16 : l = (pib - ib) - l;
935 16 : *input += l;
936 16 : *input_size -= l;
937 :
938 : /* extract state */
939 16 : pob = ib;
940 16 : ol = sizeof(ib);
941 16 : tds_sys_iconv(cd, NULL, NULL, &pob, &ol);
942 :
943 : /* set input state */
944 16 : pib = ib;
945 16 : il = sizeof(ib) - ol;
946 16 : pob = ob;
947 16 : ol = sizeof(ob);
948 16 : tds_sys_iconv(cd, &pib, &il, &pob, &ol);
949 :
950 16 : tds_sys_iconv_close(cd2);
951 :
952 16 : if (l != 0)
953 : return l;
954 :
955 : /* last blindly attempt, skip minimum bytes */
956 8 : charsize = charset->min_bytes_per_char;
957 :
958 : /* fall through */
959 :
960 3868 : skip_charsize:
961 3868 : if (charsize > *input_size)
962 : return 0;
963 3868 : *input += charsize;
964 3868 : *input_size -= charsize;
965 3868 : return charsize;
966 : }
967 :
968 : #include <freetds/charset_lookup.h>
969 :
970 : /**
971 : * Determine canonical iconv character set.
972 : * \returns canonical position, or -1 if lookup failed.
973 : * \remarks Returned name can be used in bytes_per_char(), above.
974 : */
975 : int
976 13910 : tds_canonical_charset(const char *charset_name)
977 : {
978 13910 : const struct charset_alias *c = charset_lookup(charset_name, strlen(charset_name));
979 13910 : return c ? c->canonic : -1;
980 : }
981 :
982 : /**
983 : * Determine canonical iconv character set name.
984 : * \returns canonical name, or NULL if lookup failed.
985 : * \remarks Returned name can be used in bytes_per_char(), above.
986 : */
987 : const char *
988 3826 : tds_canonical_charset_name(const char *charset_name)
989 : {
990 : int res;
991 :
992 : /* get numeric pos */
993 3826 : res = tds_canonical_charset(charset_name);
994 3826 : if (res >= 0)
995 3826 : return canonic_charsets[res].name;
996 :
997 : return charset_name; /* hope for the best */
998 : }
999 :
1000 : static int
1001 27705 : collate2charset(TDSCONNECTION * conn, const TDS_UCHAR collate[5])
1002 : {
1003 27705 : int cp = 0;
1004 27705 : const int sql_collate = collate[4];
1005 : /* extract 16 bit of LCID (it's 20 bits but higher 4 are just variations) */
1006 27705 : const int lcid = TDS_GET_UA2LE(collate);
1007 :
1008 : /* starting with bit 20 (little endian, so 3rd byte bit 4) there are 8 bits:
1009 : * fIgnoreCase fIgnoreAccent fIgnoreKana fIgnoreWidth fBinary fBinary2 fUTF8 FRESERVEDBIT
1010 : * so fUTF8 is on the 4th byte bit 2 */
1011 27705 : if ((collate[3] & 0x4) != 0 && IS_TDS74_PLUS(conn))
1012 : return TDS_CHARSET_UTF_8;
1013 :
1014 : /*
1015 : * The table from the MSQLServer reference "Windows Collation Designators"
1016 : * and from " NLS Information for Microsoft Windows XP".
1017 : *
1018 : * See also https://go.microsoft.com/fwlink/?LinkId=119987 [MSDN-SQLCollation]
1019 : */
1020 :
1021 27705 : switch (sql_collate) {
1022 : case 30: /* SQL_Latin1_General_CP437_BIN */
1023 : case 31: /* SQL_Latin1_General_CP437_CS_AS */
1024 : case 32: /* SQL_Latin1_General_CP437_CI_AS */
1025 : case 33: /* SQL_Latin1_General_Pref_CP437_CI_AS */
1026 : case 34: /* SQL_Latin1_General_CP437_CI_AI */
1027 : return TDS_CHARSET_CP437;
1028 8 : case 40: /* SQL_Latin1_General_CP850_BIN */
1029 : case 41: /* SQL_Latin1_General_CP850_CS_AS */
1030 : case 42: /* SQL_Latin1_General_CP850_CI_AS */
1031 : case 43: /* SQL_Latin1_General_Pref_CP850_CI_AS */
1032 : case 44: /* SQL_Latin1_General_CP850_CI_AI */
1033 : case 49: /* SQL_1xCompat_CP850_CI_AS */
1034 8 : return TDS_CHARSET_CP850;
1035 27197 : case 51: /* SQL_Latin1_General_Cp1_CS_AS_KI_WI */
1036 : case 52: /* SQL_Latin1_General_Cp1_CI_AS_KI_WI */
1037 : case 53: /* SQL_Latin1_General_Pref_Cp1_CI_AS_KI_WI */
1038 : case 54: /* SQL_Latin1_General_Cp1_CI_AI_KI_WI */
1039 27197 : return TDS_CHARSET_CP1252;
1040 0 : case 55: /* SQL_AltDiction_CP850_CS_AS */
1041 : case 56: /* SQL_AltDiction_Pref_CP850_CI_AS */
1042 : case 57: /* SQL_AltDiction_CP850_CI_AI */
1043 : case 58: /* SQL_Scandinavian_Pref_CP850_CI_AS */
1044 : case 59: /* SQL_Scandinavian_CP850_CS_AS */
1045 : case 60: /* SQL_Scandinavian_CP850_CI_AS */
1046 : case 61: /* SQL_AltDiction_CP850_CI_AS */
1047 0 : return TDS_CHARSET_CP850;
1048 0 : case 80: /* SQL_Latin1_General_1250_BIN */
1049 : case 81: /* SQL_Latin1_General_CP1250_CS_AS */
1050 : case 82: /* SQL_Latin1_General_CP1250_CI_AS */
1051 : case 83: /* SQL_Czech_Cp1250_CS_AS_KI_WI */
1052 : case 84: /* SQL_Czech_Cp1250_CI_AS_KI_WI */
1053 : case 85: /* SQL_Hungarian_Cp1250_CS_AS_KI_WI */
1054 : case 86: /* SQL_Hungarian_Cp1250_CI_AS_KI_WI */
1055 : case 87: /* SQL_Polish_Cp1250_CS_AS_KI_WI */
1056 : case 88: /* SQL_Polish_Cp1250_CI_AS_KI_WI */
1057 : case 89: /* SQL_Romanian_Cp1250_CS_AS_KI_WI */
1058 : case 90: /* SQL_Romanian_Cp1250_CI_AS_KI_WI */
1059 : case 91: /* SQL_Croatian_Cp1250_CS_AS_KI_WI */
1060 : case 92: /* SQL_Croatian_Cp1250_CI_AS_KI_WI */
1061 : case 93: /* SQL_Slovak_Cp1250_CS_AS_KI_WI */
1062 : case 94: /* SQL_Slovak_Cp1250_CI_AS_KI_WI */
1063 : case 95: /* SQL_Slovenian_Cp1250_CS_AS_KI_WI */
1064 : case 96: /* SQL_Slovenian_Cp1250_CI_AS_KI_WI */
1065 0 : return TDS_CHARSET_CP1250;
1066 0 : case 104: /* SQL_Latin1_General_1251_BIN */
1067 : case 105: /* SQL_Latin1_General_CP1251_CS_AS */
1068 : case 106: /* SQL_Latin1_General_CP1251_CI_AS */
1069 : case 107: /* SQL_Ukrainian_Cp1251_CS_AS_KI_WI */
1070 : case 108: /* SQL_Ukrainian_Cp1251_CI_AS_KI_WI */
1071 0 : return TDS_CHARSET_CP1251;
1072 0 : case 112: /* SQL_Latin1_General_1253_BIN */
1073 : case 113: /* SQL_Latin1_General_CP1253_CS_AS */
1074 : case 114: /* SQL_Latin1_General_CP1253_CI_AS */
1075 : case 120: /* SQL_MixDiction_CP1253_CS_AS */
1076 : case 121: /* SQL_AltDiction_CP1253_CS_AS */
1077 : case 122: /* SQL_AltDiction2_CP1253_CS_AS */
1078 : case 124: /* SQL_Latin1_General_CP1253_CI_AI */
1079 0 : return TDS_CHARSET_CP1253;
1080 0 : case 128: /* SQL_Latin1_General_1254_BIN */
1081 : case 129: /* SQL_Latin1_General_Cp1254_CS_AS_KI_WI */
1082 : case 130: /* SQL_Latin1_General_Cp1254_CI_AS_KI_WI */
1083 0 : return TDS_CHARSET_CP1254;
1084 0 : case 136: /* SQL_Latin1_General_1255_BIN */
1085 : case 137: /* SQL_Latin1_General_CP1255_CS_AS */
1086 : case 138: /* SQL_Latin1_General_CP1255_CI_AS */
1087 0 : return TDS_CHARSET_CP1255;
1088 0 : case 144: /* SQL_Latin1_General_1256_BIN */
1089 : case 145: /* SQL_Latin1_General_CP1256_CS_AS */
1090 : case 146: /* SQL_Latin1_General_CP1256_CI_AS */
1091 0 : return TDS_CHARSET_CP1256;
1092 0 : case 152: /* SQL_Latin1_General_1257_BIN */
1093 : case 153: /* SQL_Latin1_General_CP1257_CS_AS */
1094 : case 154: /* SQL_Latin1_General_CP1257_CI_AS */
1095 : case 155: /* SQL_Estonian_Cp1257_CS_AS_KI_WI */
1096 : case 156: /* SQL_Estonian_Cp1257_CI_AS_KI_WI */
1097 : case 157: /* SQL_Latvian_Cp1257_CS_AS_KI_WI */
1098 : case 158: /* SQL_Latvian_Cp1257_CI_AS_KI_WI */
1099 : case 159: /* SQL_Lithuanian_Cp1257_CS_AS_KI_WI */
1100 : case 160: /* SQL_Lithuanian_Cp1257_CI_AS_KI_WI */
1101 0 : return TDS_CHARSET_CP1257;
1102 0 : case 183: /* SQL_Danish_Pref_CP1_CI_AS */
1103 : case 184: /* SQL_SwedishPhone_Pref_CP1_CI_AS */
1104 : case 185: /* SQL_SwedishStd_Pref_CP1_CI_AS */
1105 : case 186: /* SQL_Icelandic_Pref_CP1_CI_AS */
1106 0 : return TDS_CHARSET_CP1252;
1107 : }
1108 :
1109 500 : switch (lcid) {
1110 : case 0x405:
1111 : case 0x40e: /* 0x1040e */
1112 : case 0x415:
1113 : case 0x418:
1114 : case 0x41a:
1115 : case 0x41b:
1116 : case 0x41c:
1117 : case 0x424:
1118 : case 0x442:
1119 : case 0x81a:
1120 : case 0x104e: /* ?? */
1121 : case 0x141a:
1122 : cp = TDS_CHARSET_CP1250;
1123 : break;
1124 0 : case 0x402:
1125 : case 0x419:
1126 : case 0x422:
1127 : case 0x423:
1128 : case 0x42f:
1129 : case 0x43f:
1130 : case 0x440:
1131 : case 0x444:
1132 : case 0x450:
1133 : case 0x82c:
1134 : case 0x843:
1135 : case 0xc1a:
1136 : case 0x46d:
1137 : case 0x201a:
1138 : case 0x485:
1139 0 : cp = TDS_CHARSET_CP1251;
1140 0 : break;
1141 476 : case 0x1007:
1142 : case 0x1009:
1143 : case 0x100a:
1144 : case 0x100c:
1145 : case 0x1407:
1146 : case 0x1409:
1147 : case 0x140a:
1148 : case 0x140c:
1149 : case 0x1809:
1150 : case 0x180a:
1151 : case 0x180c:
1152 : case 0x1c09:
1153 : case 0x1c0a:
1154 : case 0x2009:
1155 : case 0x200a:
1156 : case 0x2409:
1157 : case 0x240a:
1158 : case 0x2809:
1159 : case 0x280a:
1160 : case 0x2c09:
1161 : case 0x2c0a:
1162 : case 0x3009:
1163 : case 0x300a:
1164 : case 0x3409:
1165 : case 0x340a:
1166 : case 0x380a:
1167 : case 0x3c0a:
1168 : case 0x400a:
1169 : case 0x403:
1170 : case 0x406:
1171 : case 0x417:
1172 : case 0x42e:
1173 : case 0x43b:
1174 : case 0x452:
1175 : case 0x462:
1176 : case 0x47a:
1177 : case 0x47c:
1178 : case 0x47e:
1179 : case 0x483:
1180 : case 0x407: /* 0x10407 */
1181 : case 0x409:
1182 : case 0x40a:
1183 : case 0x40b:
1184 : case 0x40c:
1185 : case 0x40f:
1186 : case 0x410:
1187 : case 0x413:
1188 : case 0x414:
1189 : case 0x416:
1190 : case 0x41d:
1191 : case 0x421:
1192 : case 0x42d:
1193 : case 0x436:
1194 : case 0x437: /* 0x10437 */
1195 : case 0x438:
1196 : /*case 0x439: ??? Unicode only */
1197 : case 0x43e:
1198 : case 0x440a:
1199 : case 0x441:
1200 : case 0x456:
1201 : case 0x480a:
1202 : case 0x4c0a:
1203 : case 0x500a:
1204 : case 0x807:
1205 : case 0x809:
1206 : case 0x80a:
1207 : case 0x80c:
1208 : case 0x810:
1209 : case 0x813:
1210 : case 0x814:
1211 : case 0x816:
1212 : case 0x81d:
1213 : case 0x83b:
1214 : case 0x83e:
1215 : case 0x85f:
1216 : case 0xc07:
1217 : case 0xc09:
1218 : case 0xc0a:
1219 : case 0xc0c:
1220 476 : cp = TDS_CHARSET_CP1252;
1221 476 : break;
1222 0 : case 0x408:
1223 0 : cp = TDS_CHARSET_CP1253;
1224 0 : break;
1225 0 : case 0x41f:
1226 : case 0x42c:
1227 : case 0x443:
1228 0 : cp = TDS_CHARSET_CP1254;
1229 0 : break;
1230 8 : case 0x40d:
1231 8 : cp = TDS_CHARSET_CP1255;
1232 8 : break;
1233 0 : case 0x1001:
1234 : case 0x1401:
1235 : case 0x1801:
1236 : case 0x1c01:
1237 : case 0x2001:
1238 : case 0x2401:
1239 : case 0x2801:
1240 : case 0x2c01:
1241 : case 0x3001:
1242 : case 0x3401:
1243 : case 0x3801:
1244 : case 0x3c01:
1245 : case 0x4001:
1246 : case 0x401:
1247 : case 0x480:
1248 : case 0x420:
1249 : case 0x429:
1250 : case 0x48c:
1251 : case 0x801:
1252 : case 0xc01:
1253 0 : cp = TDS_CHARSET_CP1256;
1254 0 : break;
1255 0 : case 0x425:
1256 : case 0x426:
1257 : case 0x427:
1258 : case 0x827: /* ?? */
1259 0 : cp = TDS_CHARSET_CP1257;
1260 0 : break;
1261 0 : case 0x42a:
1262 0 : cp = TDS_CHARSET_CP1258;
1263 0 : break;
1264 0 : case 0x41e:
1265 0 : cp = TDS_CHARSET_CP874;
1266 0 : break;
1267 0 : case 0x411: /* 0x10411 */
1268 0 : cp = TDS_CHARSET_CP932;
1269 0 : break;
1270 16 : case 0x1004:
1271 : case 0x804: /* 0x20804 */
1272 16 : cp = TDS_CHARSET_GB18030;
1273 16 : break;
1274 0 : case 0x412: /* 0x10412 */
1275 0 : cp = TDS_CHARSET_CP949;
1276 0 : break;
1277 0 : case 0x1404:
1278 : case 0x404: /* 0x30404 */
1279 : case 0xc04:
1280 0 : cp = TDS_CHARSET_CP950;
1281 0 : break;
1282 0 : default:
1283 0 : cp = TDS_CHARSET_CP1252;
1284 : }
1285 :
1286 : return cp;
1287 : }
1288 :
1289 : /**
1290 : * Get iconv information from a LCID (to support different column encoding under MSSQL2K)
1291 : */
1292 : TDSICONV *
1293 22143 : tds_iconv_from_collate(TDSCONNECTION * conn, const TDS_UCHAR collate[5])
1294 : {
1295 22143 : int canonic_charset = collate2charset(conn, collate);
1296 :
1297 : /* same as client (usually this is true, so this improve performance) ? */
1298 22143 : if (conn->char_convs[client2server_chardata]->to.charset.canonic == canonic_charset)
1299 : return conn->char_convs[client2server_chardata];
1300 :
1301 3112 : return tds_iconv_get_info(conn, conn->char_convs[client2ucs2]->from.charset.canonic, canonic_charset);
1302 : }
1303 :
1304 : /**
1305 : * Returns a collation name for the given charset.
1306 : * It's used more to specify the encoding, the collation is usually case
1307 : * insensitive and accent sensitive.
1308 : */
1309 : const char *
1310 628 : tds_canonical_collate_name(int canonic_charset)
1311 : {
1312 : /*
1313 : * The name returned is chosen for maximum compatibility,
1314 : * most are supported by MSSQL 2000.
1315 : */
1316 628 : switch (canonic_charset) {
1317 : case TDS_CHARSET_CP437:
1318 : return "SQL_Latin1_General_CP437_CI_AS";
1319 4 : case TDS_CHARSET_CP850:
1320 4 : return "SQL_Latin1_General_CP850_CI_AS";
1321 0 : case TDS_CHARSET_CP874:
1322 0 : return "Thai_CI_AS";
1323 0 : case TDS_CHARSET_CP932:
1324 0 : return "Japanese_CI_AS";
1325 0 : case TDS_CHARSET_CP949:
1326 0 : return "Korean_Wansung_CI_AS";
1327 0 : case TDS_CHARSET_CP950:
1328 0 : return "Chinese_Taiwan_Bopomofo_CI_AS";
1329 0 : case TDS_CHARSET_CP1250:
1330 0 : return "SQL_Latin1_General_CP1250_CI_AS";
1331 0 : case TDS_CHARSET_CP1251:
1332 0 : return "SQL_Latin1_General_CP1251_CI_AS";
1333 : case TDS_CHARSET_CP1252:
1334 : break;
1335 0 : case TDS_CHARSET_CP1253:
1336 0 : return "SQL_Latin1_General_CP1253_CI_AS";
1337 0 : case TDS_CHARSET_CP1254:
1338 0 : return "SQL_Latin1_General_CP1254_CI_AS";
1339 0 : case TDS_CHARSET_CP1255:
1340 0 : return "SQL_Latin1_General_CP1255_CI_AS";
1341 0 : case TDS_CHARSET_CP1256:
1342 0 : return "SQL_Latin1_General_CP1256_CI_AS";
1343 0 : case TDS_CHARSET_CP1257:
1344 0 : return "SQL_Latin1_General_CP1257_CI_AS";
1345 0 : case TDS_CHARSET_CP1258:
1346 0 : return "Vietnamese_CI_AS";
1347 0 : case TDS_CHARSET_GB18030:
1348 0 : return "Chinese_PRC_CI_AS";
1349 0 : case TDS_CHARSET_UTF_8:
1350 0 : return "Chinese_PRC_90_CI_AS_SC_UTF8";
1351 : }
1352 624 : return "SQL_Latin1_General_CP1_CI_AS";
1353 : }
1354 :
1355 : /** @} */
|