/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- iconv_untestable
- gen_codepoint_utf16
- get_codepoint
- show_buf
- test_buffer
- test_codepoint
- test_next_codepoint
- test_first_1m
- test_random_5m
- test_string2key
- torture_local_iconv
- torture_local_iconv
1 /*
2 Unix SMB/CIFS implementation.
3
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
6
7 Copyright (C) Andrew Tridgell 2004
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include "includes.h"
24 #include "torture/torture.h"
25 #include "system/iconv.h"
26 #include "system/time.h"
27 #include "libcli/raw/libcliraw.h"
28 #include "param/param.h"
29 #include "torture/util.h"
30
31 #if HAVE_NATIVE_ICONV
32
33 static bool iconv_untestable(struct torture_context *tctx)
/* [<][>][^][v][top][bottom][index][help] */
34 {
35 iconv_t cd;
36
37 if (!lp_parm_bool(tctx->lp_ctx, NULL, "iconv", "native", true))
38 torture_skip(tctx, "system iconv disabled - skipping test");
39
40 cd = iconv_open("UTF-16LE", "UCS-4LE");
41 if (cd == (iconv_t)-1)
42 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
43 iconv_close(cd);
44
45 cd = iconv_open("UTF-16LE", "CP850");
46 if (cd == (iconv_t)-1)
47 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
48 iconv_close(cd);
49
50 return false;
51 }
52
53 /*
54 generate a UTF-16LE buffer for a given unicode codepoint
55 */
56 static int gen_codepoint_utf16(unsigned int codepoint,
/* [<][>][^][v][top][bottom][index][help] */
57 char *buf, size_t *size)
58 {
59 static iconv_t cd;
60 uint8_t in[4];
61 char *ptr_in;
62 size_t size_in, size_out, ret;
63 if (!cd) {
64 cd = iconv_open("UTF-16LE", "UCS-4LE");
65 if (cd == (iconv_t)-1) {
66 cd = NULL;
67 return -1;
68 }
69 }
70
71 in[0] = codepoint & 0xFF;
72 in[1] = (codepoint>>8) & 0xFF;
73 in[2] = (codepoint>>16) & 0xFF;
74 in[3] = (codepoint>>24) & 0xFF;
75
76 ptr_in = (char *)in;
77 size_in = 4;
78 size_out = 8;
79
80 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
81
82 *size = 8 - size_out;
83
84 return ret;
85 }
86
87
88 /*
89 work out the unicode codepoint of the first UTF-8 character in the buffer
90 */
91 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
/* [<][>][^][v][top][bottom][index][help] */
92 {
93 iconv_t cd;
94 uint8_t out[4];
95 char *ptr_out;
96 size_t size_out, size_in, ret;
97
98 cd = iconv_open("UCS-4LE", charset);
99
100 size_in = size;
101 ptr_out = (char *)out;
102 size_out = sizeof(out);
103 memset(out, 0, sizeof(out));
104
105 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
106
107 iconv_close(cd);
108
109 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
110 }
111
112 /*
113 display a buffer with name prefix
114 */
115 static void show_buf(const char *name, uint8_t *buf, size_t size)
/* [<][>][^][v][top][bottom][index][help] */
116 {
117 int i;
118 printf("%s ", name);
119 for (i=0;i<size;i++) {
120 printf("%02x ", buf[i]);
121 }
122 printf("\n");
123 }
124
125 /*
126 given a UTF-16LE buffer, test the system and built-in iconv code to
127 make sure they do exactly the same thing in converting the buffer to
128 "charset", then convert it back again and ensure we get the same
129 buffer back
130 */
131 static bool test_buffer(struct torture_context *test,
/* [<][>][^][v][top][bottom][index][help] */
132 uint8_t *inbuf, size_t size, const char *charset)
133 {
134 uint8_t buf1[1000], buf2[1000], buf3[1000];
135 size_t outsize1, outsize2, outsize3;
136 const char *ptr_in;
137 char *ptr_out;
138 size_t size_in1, size_in2, size_in3;
139 size_t ret1, ret2, ret3, len1, len2;
140 int errno1, errno2;
141 static iconv_t cd;
142 static smb_iconv_t cd2, cd3;
143 static const char *last_charset;
144
145 if (cd && last_charset) {
146 iconv_close(cd);
147 smb_iconv_close(cd2);
148 smb_iconv_close(cd3);
149 cd = NULL;
150 }
151
152 if (!cd) {
153 cd = iconv_open(charset, "UTF-16LE");
154 if (cd == (iconv_t)-1) {
155 torture_fail(test,
156 talloc_asprintf(test,
157 "failed to open %s to UTF-16LE",
158 charset));
159 }
160 cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
161 cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
162 last_charset = charset;
163 }
164
165 /* internal convert to charset - placing result in buf1 */
166 ptr_in = (const char *)inbuf;
167 ptr_out = (char *)buf1;
168 size_in1 = size;
169 outsize1 = sizeof(buf1);
170
171 memset(ptr_out, 0, outsize1);
172 errno = 0;
173 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
174 errno1 = errno;
175
176 /* system convert to charset - placing result in buf2 */
177 ptr_in = (const char *)inbuf;
178 ptr_out = (char *)buf2;
179 size_in2 = size;
180 outsize2 = sizeof(buf2);
181
182 memset(ptr_out, 0, outsize2);
183 errno = 0;
184 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
185 errno2 = errno;
186
187 len1 = sizeof(buf1) - outsize1;
188 len2 = sizeof(buf2) - outsize2;
189
190 /* codepoints above 1M are not interesting for now */
191 if (len2 > len1 &&
192 memcmp(buf1, buf2, len1) == 0 &&
193 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
194 return true;
195 }
196 if (len1 > len2 &&
197 memcmp(buf1, buf2, len2) == 0 &&
198 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
199 return true;
200 }
201
202 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
203
204 if (errno1 != errno2) {
205 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
206 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
207 torture_fail(test, talloc_asprintf(test,
208 "e1=%d/%s e2=%d/%s",
209 errno1, strerror(errno1),
210 errno2, strerror(errno2)));
211 }
212
213 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
214
215 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
216
217 if (len1 != len2 ||
218 memcmp(buf1, buf2, len1) != 0) {
219 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
220 show_buf(" IN1:", inbuf, size-size_in1);
221 show_buf(" IN2:", inbuf, size-size_in2);
222 show_buf("OUT1:", buf1, len1);
223 show_buf("OUT2:", buf2, len2);
224 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
225 torture_comment(test, "next codepoint is %u",
226 get_codepoint((char *)(buf2+len1), len2-len1, charset));
227 }
228 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
229 torture_comment(test, "next codepoint is %u",
230 get_codepoint((char *)(buf1+len2),len1-len2, charset));
231 }
232
233 torture_fail(test, "failed");
234 }
235
236 /* convert back to UTF-16, putting result in buf3 */
237 size = size - size_in1;
238 ptr_in = (const char *)buf1;
239 ptr_out = (char *)buf3;
240 size_in3 = len1;
241 outsize3 = sizeof(buf3);
242
243 memset(ptr_out, 0, outsize3);
244 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
245
246 /* we only internally support the first 1M codepoints */
247 if (outsize3 != sizeof(buf3) - size &&
248 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
249 size - (sizeof(buf3) - outsize3),
250 "UTF-16LE") >= (1<<20)) {
251 return true;
252 }
253
254 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
255 "pull failed - %s", strerror(errno)));
256
257 if (strncmp(charset, "UTF", 3) != 0) {
258 /* don't expect perfect mappings for non UTF charsets */
259 return true;
260 }
261
262
263 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
264 "wrong outsize3");
265
266 if (memcmp(buf3, inbuf, size) != 0) {
267 torture_comment(test, "pull bytes mismatch:");
268 show_buf("inbuf", inbuf, size);
269 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
270 torture_comment(test, "next codepoint is %u\n",
271 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
272 size - (sizeof(buf3) - outsize3),
273 "UTF-16LE"));
274 torture_fail(test, "");
275 }
276
277 return true;
278 }
279
280
281 /*
282 test the push_codepoint() and next_codepoint() functions for a given
283 codepoint
284 */
285 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
/* [<][>][^][v][top][bottom][index][help] */
286 {
287 uint8_t buf[10];
288 size_t size, size2;
289 codepoint_t c;
290
291 size = push_codepoint(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, codepoint);
292 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
293 "Invalid Codepoint range");
294
295 if (size == -1) return true;
296
297 buf[size] = random();
298 buf[size+1] = random();
299 buf[size+2] = random();
300 buf[size+3] = random();
301
302 c = next_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, &size2);
303
304 torture_assert(tctx, c == codepoint,
305 talloc_asprintf(tctx,
306 "next_codepoint(%u) failed - gave %u", codepoint, c));
307
308 torture_assert(tctx, size2 == size,
309 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
310 codepoint, (int)size2, (int)size));
311
312 return true;
313 }
314
315 static bool test_next_codepoint(struct torture_context *tctx)
/* [<][>][^][v][top][bottom][index][help] */
316 {
317 unsigned int codepoint;
318 if (iconv_untestable(tctx))
319 return true;
320
321 for (codepoint=0;codepoint<(1<<20);codepoint++) {
322 if (!test_codepoint(tctx, codepoint))
323 return false;
324 }
325 return true;
326 }
327
328 static bool test_first_1m(struct torture_context *tctx)
/* [<][>][^][v][top][bottom][index][help] */
329 {
330 unsigned int codepoint;
331 size_t size;
332 unsigned char inbuf[1000];
333
334 if (iconv_untestable(tctx))
335 return true;
336
337 for (codepoint=0;codepoint<(1<<20);codepoint++) {
338 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
339 continue;
340 }
341
342 if (codepoint % 1000 == 0) {
343 if (torture_setting_bool(tctx, "progress", true)) {
344 torture_comment(tctx, "codepoint=%u \r", codepoint);
345 fflush(stdout);
346 }
347 }
348
349 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
350 return false;
351 }
352 return true;
353 }
354
355 static bool test_random_5m(struct torture_context *tctx)
/* [<][>][^][v][top][bottom][index][help] */
356 {
357 unsigned char inbuf[1000];
358 unsigned int i;
359
360 if (iconv_untestable(tctx))
361 return true;
362
363 for (i=0;i<500000;i++) {
364 size_t size;
365 unsigned int c;
366
367 if (i % 1000 == 0) {
368 if (torture_setting_bool(tctx, "progress", true)) {
369 torture_comment(tctx, "i=%u \r", i);
370 fflush(stdout);
371 }
372 }
373
374 size = random() % 100;
375 for (c=0;c<size;c++) {
376 if (random() % 100 < 80) {
377 inbuf[c] = random() % 128;
378 } else {
379 inbuf[c] = random();
380 }
381 if (random() % 10 == 0) {
382 inbuf[c] |= 0xd8;
383 }
384 if (random() % 10 == 0) {
385 inbuf[c] |= 0xdc;
386 }
387 }
388 if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
389 printf("i=%d failed UTF-8\n", i);
390 return false;
391 }
392
393 if (!test_buffer(tctx, inbuf, size, "CP850")) {
394 printf("i=%d failed CP850\n", i);
395 return false;
396 }
397 }
398 return true;
399 }
400
401
402 static bool test_string2key(struct torture_context *tctx)
/* [<][>][^][v][top][bottom][index][help] */
403 {
404 uint16_t *buf;
405 char *dest = NULL;
406 TALLOC_CTX *mem_ctx = talloc_new(tctx);
407 size_t len = (random()%1000)+1;
408 const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
409 uint8_t le1[20];
410 uint8_t *munged1;
411 uint8_t *out1;
412 size_t ret;
413 int i;
414 const char *correct = "a\357\277\275b\357\277\275c\001defg";
415
416 buf = talloc_size(mem_ctx, len*2);
417 generate_random_buffer((uint8_t *)buf, len*2);
418
419 torture_comment(tctx, "converting random buffer\n");
420
421 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret, false)) {
422 torture_fail(tctx, "Failed to convert random buffer\n");
423 }
424
425 for (i=0;i<10;i++) {
426 SSVAL(&le1[2*i], 0, in1[i]);
427 }
428
429 torture_comment(tctx, "converting fixed buffer to UTF16\n");
430
431 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret, false)) {
432 torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
433 }
434
435 torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
436
437 torture_comment(tctx, "converting fixed buffer to UTF8\n");
438
439 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret, false)) {
440 torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
441 }
442
443 torture_assert(tctx, strcmp(correct, out1) == 0, "conversion gave incorrect result\n");
444
445 talloc_free(mem_ctx);
446
447 return true;
448 }
449
450 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
/* [<][>][^][v][top][bottom][index][help] */
451 {
452 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
453
454 torture_suite_add_simple_test(suite, "string2key",
455 test_string2key);
456
457 torture_suite_add_simple_test(suite, "next_codepoint()",
458 test_next_codepoint);
459
460 torture_suite_add_simple_test(suite, "first 1M codepoints",
461 test_first_1m);
462
463 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
464 test_random_5m);
465
466 torture_suite_add_simple_test(suite, "string2key",
467 test_string2key);
468 return suite;
469 }
470
471 #else
472
473 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
/* [<][>][^][v][top][bottom][index][help] */
474 {
475 printf("No native iconv library - can't run iconv test\n");
476 return NULL;
477 }
478
479 #endif