Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"@(#)kiconv.c	1.1	07/10/08 SMI"
     27 
     28 /*
     29  * Kernel iconv code conversion functions (PSARC/2007/173).
     30  *
     31  * Man pages: kiconv_open(9F), kiconv(9F), kiconv_close(9F), and kiconvstr(9F).
     32  * Interface stability: Committed.
     33  */
     34 
     35 #include <sys/types.h>
     36 #include <sys/param.h>
     37 #include <sys/sysmacros.h>
     38 #include <sys/systm.h>
     39 #include <sys/debug.h>
     40 #include <sys/kmem.h>
     41 #include <sys/sunddi.h>
     42 #include <sys/ksynch.h>
     43 #include <sys/modctl.h>
     44 #include <sys/byteorder.h>
     45 #include <sys/errno.h>
     46 #include <sys/kiconv.h>
     47 #include <sys/kiconv_latin1.h>
     48 
     49 
     50 /*
     51  * The following macros indicate ids to the correct code conversion mapping
     52  * data tables to use. The actual tables are coming from <sys/kiconv_latin1.h>.
     53  */
     54 #define	KICONV_TBLID_1252		(0x00)
     55 #define	KICONV_TBLID_8859_1		(0x01)
     56 #define	KICONV_TBLID_8859_15		(0x02)
     57 #define	KICONV_TBLID_850		(0x03)
     58 
     59 #define	KICONV_MAX_MAPPING_TBLID	(0x03)
     60 
     61 /*
     62  * The following tables are coming from u8_textprep.c. We use them to
     63  * check on validity of UTF-8 characters and their bytes.
     64  */
     65 extern const int8_t u8_number_of_bytes[];
     66 extern const uint8_t u8_valid_min_2nd_byte[];
     67 extern const uint8_t u8_valid_max_2nd_byte[];
     68 
     69 
     70 /*
     71  * The following four functions, open_to_1252(), open_to_88591(),
     72  * open_to_885915(), and open_to_850(), are kiconv_open functions from
     73  * UTF-8 to corresponding single byte codesets.
     74  */
     75 static void *
     76 open_to_1252()
     77 {
     78 	kiconv_state_t s;
     79 
     80 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
     81 	s->id = KICONV_TBLID_1252;
     82 	s->bom_processed = 0;
     83 
     84 	return ((void *)s);
     85 }
     86 
     87 static void *
     88 open_to_88591()
     89 {
     90 	kiconv_state_t s;
     91 
     92 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
     93 	s->id = KICONV_TBLID_8859_1;
     94 	s->bom_processed = 0;
     95 
     96 	return ((void *)s);
     97 }
     98 
     99 static void *
    100 open_to_885915()
    101 {
    102 	kiconv_state_t s;
    103 
    104 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
    105 	s->id = KICONV_TBLID_8859_15;
    106 	s->bom_processed = 0;
    107 
    108 	return ((void *)s);
    109 }
    110 
    111 static void *
    112 open_to_850()
    113 {
    114 	kiconv_state_t s;
    115 
    116 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
    117 	s->id = KICONV_TBLID_850;
    118 	s->bom_processed = 0;
    119 
    120 	return ((void *)s);
    121 }
    122 
    123 /*
    124  * The following four functions, open_fr_1252(), open_fr_88591(),
    125  * open_fr_885915(), and open_fr_850(), are kiconv_open functions from
    126  * corresponding single byte codesets to UTF-8.
    127  */
    128 static void *
    129 open_fr_1252()
    130 {
    131 	return ((void *)KICONV_TBLID_1252);
    132 }
    133 
    134 static void *
    135 open_fr_88591()
    136 {
    137 	return ((void *)KICONV_TBLID_8859_1);
    138 }
    139 
    140 static void *
    141 open_fr_885915()
    142 {
    143 	return ((void *)KICONV_TBLID_8859_15);
    144 }
    145 
    146 static void *
    147 open_fr_850()
    148 {
    149 	return ((void *)KICONV_TBLID_850);
    150 }
    151 
    152 /*
    153  * The following close_to_sb() function is kiconv_close function for
    154  * the conversions from UTF-8 to single byte codesets. The close_fr_sb()
    155  * is kiconv_close function for the conversions from single byte codesets to
    156  * UTF-8.
    157  */
    158 static int
    159 close_to_sb(void *s)
    160 {
    161 	if (! s || s == (void *)-1)
    162 		return (EBADF);
    163 
    164 	kmem_free(s, sizeof (kiconv_state_data_t));
    165 
    166 	return (0);
    167 }
    168 
    169 static int
    170 close_fr_sb(void *s)
    171 {
    172 	if ((ulong_t)s > KICONV_MAX_MAPPING_TBLID)
    173 		return (EBADF);
    174 
    175 	return (0);
    176 }
    177 
    178 /*
    179  * The following is the common kiconv function for conversions from UTF-8
    180  * to single byte codesets.
    181  */
    182 static size_t
    183 kiconv_to_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
    184 	size_t *outbytesleft, int *errno)
    185 {
    186 	size_t id;
    187 	size_t ret_val;
    188 	uchar_t *ib;
    189 	uchar_t *oldib;
    190 	uchar_t *ob;
    191 	uchar_t *ibtail;
    192 	uchar_t *obtail;
    193 	uint32_t u8;
    194 	size_t i;
    195 	size_t l;
    196 	size_t h;
    197 	size_t init_h;
    198 	int8_t sz;
    199 	boolean_t second;
    200 
    201 	/* Check on the kiconv code conversion descriptor. */
    202 	if (! kcd || kcd == (void *)-1) {
    203 		*errno = EBADF;
    204 		return ((size_t)-1);
    205 	}
    206 
    207 	/*
    208 	 * Get the table id we are going to use for the code conversion
    209 	 * and let's double check on it.
    210 	 */
    211 	id = ((kiconv_state_t)kcd)->id;
    212 	if (id > KICONV_MAX_MAPPING_TBLID) {
    213 		*errno = EBADF;
    214 		return ((size_t)-1);
    215 	}
    216 
    217 	/* If this is a state reset request, process and return. */
    218 	if (! inbuf || ! (*inbuf)) {
    219 		((kiconv_state_t)kcd)->bom_processed = 0;
    220 		return ((size_t)0);
    221 	}
    222 
    223 	ret_val = 0;
    224 	ib = (uchar_t *)*inbuf;
    225 	ob = (uchar_t *)*outbuf;
    226 	ibtail = ib + *inbytesleft;
    227 	obtail = ob + *outbytesleft;
    228 
    229 	/*
    230 	 * The inital high value for the binary search we will be using
    231 	 * shortly is a literal constant as of today but to be future proof,
    232 	 * let's calculate it like the following at here.
    233 	 */
    234 	init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
    235 
    236 	/*
    237 	 * If we haven't checked on the UTF-8 signature BOM character in
    238 	 * the beginning of the conversion data stream, we check it and if
    239 	 * find one, we skip it since we have no use for it.
    240 	 */
    241 	if (((kiconv_state_t)kcd)->bom_processed == 0 && (ibtail - ib) >= 3 &&
    242 	    *ib == 0xef && *(ib + 1) == 0xbb && *(ib + 2) == 0xbf)
    243 			ib += 3;
    244 	((kiconv_state_t)kcd)->bom_processed = 1;
    245 
    246 	while (ib < ibtail) {
    247 		sz = u8_number_of_bytes[*ib];
    248 		if (sz <= 0) {
    249 			*errno = EILSEQ;
    250 			ret_val = (size_t)-1;
    251 			break;
    252 		}
    253 
    254 		/*
    255 		 * If there is no room to write at the output buffer,
    256 		 * issue E2BIG error.
    257 		 */
    258 		if (ob >= obtail) {
    259 			*errno = E2BIG;
    260 			ret_val = (size_t)-1;
    261 			break;
    262 		}
    263 
    264 		/*
    265 		 * If it is a 7-bit ASCII character, we don't need to
    266 		 * process further and we just copy the character over.
    267 		 *
    268 		 * If not, we collect the character bytes up to four bytes,
    269 		 * validate the bytes, and binary search for the corresponding
    270 		 * single byte codeset character byte. If we find it from
    271 		 * the mapping table, we put that into the output buffer;
    272 		 * otherwise, we put a replacement character instead as
    273 		 * a non-identical conversion.
    274 		 */
    275 		if (sz == 1) {
    276 			*ob++ = *ib++;
    277 			continue;
    278 		}
    279 
    280 		/*
    281 		 * Issue EINVAL error if input buffer has an incomplete
    282 		 * character at the end of the buffer.
    283 		 */
    284 		if ((ibtail - ib) < sz) {
    285 			*errno = EINVAL;
    286 			ret_val = (size_t)-1;
    287 			break;
    288 		}
    289 
    290 		/*
    291 		 * We collect UTF-8 character bytes and also check if
    292 		 * this is a valid UTF-8 character without any bogus bytes
    293 		 * based on the latest UTF-8 binary representation.
    294 		 */
    295 		oldib = ib;
    296 		u8 = *ib++;
    297 		second = B_TRUE;
    298 		for (i = 1; i < sz; i++) {
    299 			if (second) {
    300 				if (*ib < u8_valid_min_2nd_byte[u8] ||
    301 				    *ib > u8_valid_max_2nd_byte[u8]) {
    302 					*errno = EILSEQ;
    303 					ret_val = (size_t)-1;
    304 					ib = oldib;
    305 					goto TO_SB_ILLEGAL_CHAR_ERR;
    306 				}
    307 				second = B_FALSE;
    308 			} else if (*ib < 0x80 || *ib > 0xbf) {
    309 				*errno = EILSEQ;
    310 				ret_val = (size_t)-1;
    311 				ib = oldib;
    312 				goto TO_SB_ILLEGAL_CHAR_ERR;
    313 			}
    314 			u8 = (u8 << 8) | ((uint32_t)*ib);
    315 			ib++;
    316 		}
    317 
    318 		i = l = 0;
    319 		h = init_h;
    320 		while (l <= h) {
    321 			i = (l + h) / 2;
    322 			if (to_sb_tbl[id][i].u8 == u8)
    323 				break;
    324 			else if (to_sb_tbl[id][i].u8 < u8)
    325 				l = i + 1;
    326 			else
    327 				h = i - 1;
    328 		}
    329 
    330 		if (to_sb_tbl[id][i].u8 == u8) {
    331 			*ob++ = to_sb_tbl[id][i].sb;
    332 		} else {
    333 			/*
    334 			 * If we don't find a character in the target
    335 			 * codeset, we insert an ASCII replacement character
    336 			 * at the output buffer and indicate such
    337 			 * "non-identical" conversion by increasing the
    338 			 * return value which is the non-identical conversion
    339 			 * counter if bigger than 0.
    340 			 */
    341 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
    342 			ret_val++;
    343 		}
    344 	}
    345 
    346 TO_SB_ILLEGAL_CHAR_ERR:
    347 	*inbuf = (char *)ib;
    348 	*inbytesleft = ibtail - ib;
    349 	*outbuf = (char *)ob;
    350 	*outbytesleft = obtail - ob;
    351 
    352 	return (ret_val);
    353 }
    354 
    355 /*
    356  * The following is the common kiconv function from single byte codesets to
    357  * UTF-8.
    358  */
    359 static size_t
    360 kiconv_fr_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
    361 	size_t *outbytesleft, int *errno)
    362 {
    363 	size_t ret_val;
    364 	uchar_t *ib;
    365 	uchar_t *ob;
    366 	uchar_t *ibtail;
    367 	uchar_t *obtail;
    368 	size_t i;
    369 	size_t k;
    370 	int8_t sz;
    371 
    372 	/* Check on the kiconv code conversion descriptor validity. */
    373 	if ((ulong_t)kcd > KICONV_MAX_MAPPING_TBLID) {
    374 		*errno = EBADF;
    375 		return ((size_t)-1);
    376 	}
    377 
    378 	/*
    379 	 * If this is a state reset request, there is nothing to do and so
    380 	 * we just return.
    381 	 */
    382 	if (! inbuf || ! (*inbuf))
    383 		return ((size_t)0);
    384 
    385 	ret_val = 0;
    386 	ib = (uchar_t *)*inbuf;
    387 	ob = (uchar_t *)*outbuf;
    388 	ibtail = ib + *inbytesleft;
    389 	obtail = ob + *outbytesleft;
    390 
    391 	while (ib < ibtail) {
    392 		/*
    393 		 * If this is a 7-bit ASCII character, we just copy over and
    394 		 * that's all we need to do for this character.
    395 		 */
    396 		if (*ib < 0x80) {
    397 			if (ob >= obtail) {
    398 				*errno = E2BIG;
    399 				ret_val = (size_t)-1;
    400 				break;
    401 			}
    402 
    403 			*ob++ = *ib++;
    404 			continue;
    405 		}
    406 
    407 		/*
    408 		 * Otherwise, we get the corresponding UTF-8 character bytes
    409 		 * from the mapping table and copy them over.
    410 		 *
    411 		 * We don't need to worry about if the UTF-8 character bytes
    412 		 * at the mapping tables are valid or not since they are good.
    413 		 */
    414 		k = *ib - 0x80;
    415 		sz = u8_number_of_bytes[to_u8_tbl[(ulong_t)kcd][k].u8[0]];
    416 
    417 		/*
    418 		 * If sz <= 0, that means we don't have any assigned character
    419 		 * at the code point, k + 0x80, of the single byte codeset
    420 		 * which is the fromcode. In other words, the input buffer
    421 		 * has an illegal character.
    422 		 */
    423 		if (sz <= 0) {
    424 			*errno = EILSEQ;
    425 			ret_val = (size_t)-1;
    426 			break;
    427 		}
    428 
    429 		if ((obtail - ob) < sz) {
    430 			*errno = E2BIG;
    431 			ret_val = (size_t)-1;
    432 			break;
    433 		}
    434 
    435 		for (i = 0; i < sz; i++)
    436 			*ob++ = to_u8_tbl[(ulong_t)kcd][k].u8[i];
    437 
    438 		ib++;
    439 	}
    440 
    441 	*inbuf = (char *)ib;
    442 	*inbytesleft = ibtail - ib;
    443 	*outbuf = (char *)ob;
    444 	*outbytesleft = obtail - ob;
    445 
    446 	return (ret_val);
    447 }
    448 
    449 /*
    450  * The following is the common kiconvstr function from UTF-8 to single byte
    451  * codesets.
    452  */
    453 static size_t
    454 kiconvstr_to_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
    455 	size_t *outlen, int flag, int *errno)
    456 {
    457 	size_t ret_val;
    458 	uchar_t *oldib;
    459 	uchar_t *ibtail;
    460 	uchar_t *obtail;
    461 	uint32_t u8;
    462 	size_t i;
    463 	size_t l;
    464 	size_t h;
    465 	size_t init_h;
    466 	int8_t sz;
    467 	boolean_t second;
    468 	boolean_t do_not_ignore_null;
    469 
    470 	/* Let's make sure that the table id is within the valid boundary. */
    471 	if (id > KICONV_MAX_MAPPING_TBLID) {
    472 		*errno = EBADF;
    473 		return ((size_t)-1);
    474 	}
    475 
    476 	ret_val = 0;
    477 	ibtail = ib + *inlen;
    478 	obtail = ob + *outlen;
    479 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
    480 	init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
    481 
    482 	/* Skip any UTF-8 signature BOM character in the beginning. */
    483 	if ((ibtail - ib) >= 3 && *ib == 0xef && *(ib + 1) == 0xbb &&
    484 	    *(ib + 2) == 0xbf)
    485 			ib += 3;
    486 
    487 	/*
    488 	 * Basically this is pretty much the same as kiconv_to_sb() except
    489 	 * that we are now accepting two flag values and doing the processing
    490 	 * accordingly.
    491 	 */
    492 	while (ib < ibtail) {
    493 		sz = u8_number_of_bytes[*ib];
    494 		if (sz <= 0) {
    495 			if (flag & KICONV_REPLACE_INVALID) {
    496 				if (ob >= obtail) {
    497 					*errno = E2BIG;
    498 					ret_val = (size_t)-1;
    499 					break;
    500 				}
    501 
    502 				ib++;
    503 				goto STR_TO_SB_REPLACE_INVALID;
    504 			}
    505 
    506 			*errno = EILSEQ;
    507 			ret_val = (size_t)-1;
    508 			break;
    509 		}
    510 
    511 		if (*ib == '\0' && do_not_ignore_null)
    512 			break;
    513 
    514 		if (ob >= obtail) {
    515 			*errno = E2BIG;
    516 			ret_val = (size_t)-1;
    517 			break;
    518 		}
    519 
    520 		if (sz == 1) {
    521 			*ob++ = *ib++;
    522 			continue;
    523 		}
    524 
    525 		if ((ibtail - ib) < sz) {
    526 			if (flag & KICONV_REPLACE_INVALID) {
    527 				ib = ibtail;
    528 				goto STR_TO_SB_REPLACE_INVALID;
    529 			}
    530 
    531 			*errno = EINVAL;
    532 			ret_val = (size_t)-1;
    533 			break;
    534 		}
    535 
    536 		oldib = ib;
    537 		u8 = *ib++;
    538 		second = B_TRUE;
    539 		for (i = 1; i < sz; i++) {
    540 			if (second) {
    541 				if (*ib < u8_valid_min_2nd_byte[u8] ||
    542 				    *ib > u8_valid_max_2nd_byte[u8]) {
    543 					if (flag & KICONV_REPLACE_INVALID) {
    544 						ib = oldib + sz;
    545 						goto STR_TO_SB_REPLACE_INVALID;
    546 					}
    547 
    548 					*errno = EILSEQ;
    549 					ret_val = (size_t)-1;
    550 					ib = oldib;
    551 					goto STR_TO_SB_ILLEGAL_CHAR_ERR;
    552 				}
    553 				second = B_FALSE;
    554 			} else if (*ib < 0x80 || *ib > 0xbf) {
    555 				if (flag & KICONV_REPLACE_INVALID) {
    556 					ib = oldib + sz;
    557 					goto STR_TO_SB_REPLACE_INVALID;
    558 				}
    559 
    560 				*errno = EILSEQ;
    561 				ret_val = (size_t)-1;
    562 				ib = oldib;
    563 				goto STR_TO_SB_ILLEGAL_CHAR_ERR;
    564 			}
    565 			u8 = (u8 << 8) | ((uint32_t)*ib);
    566 			ib++;
    567 		}
    568 
    569 		i = l = 0;
    570 		h = init_h;
    571 		while (l <= h) {
    572 			i = (l + h) / 2;
    573 			if (to_sb_tbl[id][i].u8 == u8)
    574 				break;
    575 			else if (to_sb_tbl[id][i].u8 < u8)
    576 				l = i + 1;
    577 			else
    578 				h = i - 1;
    579 		}
    580 
    581 		if (to_sb_tbl[id][i].u8 == u8) {
    582 			*ob++ = to_sb_tbl[id][i].sb;
    583 		} else {
    584 STR_TO_SB_REPLACE_INVALID:
    585 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
    586 			ret_val++;
    587 		}
    588 	}
    589 
    590 STR_TO_SB_ILLEGAL_CHAR_ERR:
    591 	*inlen = ibtail - ib;
    592 	*outlen = obtail - ob;
    593 
    594 	return (ret_val);
    595 }
    596 
    597 /*
    598  * The following four functions are entry points recorded at the conv_list[]
    599  * defined at below.
    600  */
    601 static size_t
    602 kiconvstr_to_1252(char *inarray, size_t *inlen, char *outarray,
    603 	size_t *outlen, int flag, int *errno)
    604 {
    605 	return (kiconvstr_to_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
    606 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
    607 }
    608 
    609 static size_t
    610 kiconvstr_to_1(char *inarray, size_t *inlen, char *outarray,
    611 	size_t *outlen, int flag, int *errno)
    612 {
    613 	return (kiconvstr_to_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
    614 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
    615 }
    616 
    617 static size_t
    618 kiconvstr_to_15(char *inarray, size_t *inlen, char *outarray,
    619 	size_t *outlen, int flag, int *errno)
    620 {
    621 	return (kiconvstr_to_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
    622 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
    623 }
    624 
    625 static size_t
    626 kiconvstr_to_850(char *inarray, size_t *inlen, char *outarray,
    627 	size_t *outlen, int flag, int *errno)
    628 {
    629 	return (kiconvstr_to_sb(KICONV_TBLID_850, (uchar_t *)inarray,
    630 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
    631 }
    632 
    633 /*
    634  * The following is the common kiconvstr function for conversions from
    635  * single byte codesets to UTF-8.
    636  */
    637 static size_t
    638 kiconvstr_fr_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
    639 	size_t *outlen, int flag, int *errno)
    640 {
    641 	size_t ret_val;
    642 	uchar_t *ibtail;
    643 	uchar_t *obtail;
    644 	size_t i;
    645 	size_t k;
    646 	int8_t sz;
    647 	boolean_t do_not_ignore_null;
    648 
    649 	ret_val = 0;
    650 	ibtail = ib + *inlen;
    651 	obtail = ob + *outlen;
    652 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
    653 
    654 	while (ib < ibtail) {
    655 		if (*ib == '\0' && do_not_ignore_null)
    656 			break;
    657 
    658 		if (*ib < 0x80) {
    659 			if (ob >= obtail) {
    660 				*errno = E2BIG;
    661 				ret_val = (size_t)-1;
    662 				break;
    663 			}
    664 			*ob++ = *ib++;
    665 			continue;
    666 		}
    667 
    668 		k = *ib - 0x80;
    669 		sz = u8_number_of_bytes[to_u8_tbl[id][k].u8[0]];
    670 
    671 		if (sz <= 0) {
    672 			if (flag & KICONV_REPLACE_INVALID) {
    673 				if ((obtail - ob) < 3) {
    674 					*errno = E2BIG;
    675 					ret_val = (size_t)-1;
    676 					break;
    677 				}
    678 
    679 				/* Save KICONV_UTF8_REPLACEMENT_CHAR. */
    680 				*ob++ = 0xef;
    681 				*ob++ = 0xbf;
    682 				*ob++ = 0xbd;
    683 				ret_val++;
    684 				ib++;
    685 
    686 				continue;
    687 			}
    688 
    689 			*errno = EILSEQ;
    690 			ret_val = (size_t)-1;
    691 			break;
    692 		}
    693 
    694 		if ((obtail - ob) < sz) {
    695 			*errno = E2BIG;
    696 			ret_val = (size_t)-1;
    697 			break;
    698 		}
    699 
    700 		for (i = 0; i < sz; i++)
    701 			*ob++ = to_u8_tbl[id][k].u8[i];
    702 
    703 		ib++;
    704 	}
    705 
    706 	*inlen = ibtail - ib;
    707 	*outlen = obtail - ob;
    708 
    709 	return (ret_val);
    710 }
    711 
    712 /*
    713  * The following four functions are also entry points recorded at
    714  * the conv_list[] at below.
    715  */
    716 static size_t
    717 kiconvstr_fr_1252(char *inarray, size_t *inlen, char *outarray,
    718 	size_t *outlen, int flag, int *errno)
    719 {
    720 	return (kiconvstr_fr_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
    721 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
    722 }
    723 
    724 static size_t
    725 kiconvstr_fr_1(char *inarray, size_t *inlen, char *outarray,
    726 	size_t *outlen, int flag, int *errno)
    727 {
    728 	return (kiconvstr_fr_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
    729 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
    730 }
    731 
    732 static size_t
    733 kiconvstr_fr_15(char *inarray, size_t *inlen, char *outarray,
    734 	size_t *outlen, int flag, int *errno)
    735 {
    736 	return (kiconvstr_fr_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
    737 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
    738 }
    739 
    740 static size_t
    741 kiconvstr_fr_850(char *inarray, size_t *inlen, char *outarray,
    742 	size_t *outlen, int flag, int *errno)
    743 {
    744 	return (kiconvstr_fr_sb(KICONV_TBLID_850, (uchar_t *)inarray,
    745 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
    746 }
    747 
    748 /*
    749  * The following static vector contains the normalized code names
    750  * and their corresponding code ids. They are somewhat arbitrarily ordered
    751  * based on marketing data available. A code id could repeat for aliases.
    752  *
    753  * The vector was generated by using a small utility program called
    754  * codeidlistgen.c that you can find from PSARC/2007/173/materials/util/.
    755  *
    756  * The code ids must be portable, i.e., if needed, you can always generate
    757  * the code_list[] again with different code ids. You'll also need to
    758  * update the conv_list[] at below.
    759  */
    760 #define	KICONV_MAX_CODEID_ENTRY		68
    761 #define	KICONV_MAX_CODEID		42
    762 
    763 static kiconv_code_list_t code_list[KICONV_MAX_CODEID_ENTRY] = {
    764 	{ "utf8", 0 },
    765 	{ "cp1252", 1 },
    766 	{ "1252", 1 },
    767 	{ "iso88591", 2 },
    768 	{ "iso885915", 3 },
    769 	{ "cp850", 4 },
    770 	{ "850", 4 },
    771 	{ "eucjp", 5 },
    772 	{ "eucjpms", 6 },
    773 	{ "cp932", 7 },
    774 	{ "932", 7 },
    775 	{ "shiftjis", 8 },
    776 	{ "pck", 8 },
    777 	{ "sjis", 8 },
    778 	{ "gb18030", 9 },
    779 	{ "gbk", 10 },
    780 	{ "cp936", 10 },
    781 	{ "936", 10 },
    782 	{ "euccn", 11 },
    783 	{ "euckr", 12 },
    784 	{ "unifiedhangul", 13 },
    785 	{ "cp949", 13 },
    786 	{ "949", 13 },
    787 	{ "big5", 14 },
    788 	{ "cp950", 14 },
    789 	{ "950", 14 },
    790 	{ "big5hkscs", 15 },
    791 	{ "euctw", 16 },
    792 	{ "cp950hkscs", 17 },
    793 	{ "cp1250", 18 },
    794 	{ "1250", 18 },
    795 	{ "iso88592", 19 },
    796 	{ "cp852", 20 },
    797 	{ "852", 20 },
    798 	{ "cp1251", 21 },
    799 	{ "1251", 21 },
    800 	{ "iso88595", 22 },
    801 	{ "koi8r", 23 },
    802 	{ "cp866", 24 },
    803 	{ "866", 24 },
    804 	{ "cp1253", 25 },
    805 	{ "1253", 25 },
    806 	{ "iso88597", 26 },
    807 	{ "cp737", 27 },
    808 	{ "737", 27 },
    809 	{ "cp1254", 28 },
    810 	{ "1254", 28 },
    811 	{ "iso88599", 29 },
    812 	{ "cp857", 30 },
    813 	{ "857", 30 },
    814 	{ "cp1256", 31 },
    815 	{ "1256", 31 },
    816 	{ "iso88596", 32 },
    817 	{ "cp720", 33 },
    818 	{ "720", 33 },
    819 	{ "cp1255", 34 },
    820 	{ "1255", 34 },
    821 	{ "iso88598", 35 },
    822 	{ "cp862", 36 },
    823 	{ "862", 36 },
    824 	{ "cp1257", 37 },
    825 	{ "1257", 37 },
    826 	{ "iso885913", 38 },
    827 	{ "iso885910", 39 },
    828 	{ "iso885911", 40 },
    829 	{ "tis620", 40 },
    830 	{ "iso88593", 41 },
    831 	{ "iso88594", 42 },
    832 };
    833 
    834 /*
    835  * The list of code conversions supported are grouped together per
    836  * module which will be loaded as needed.
    837  */
    838 #define	KICONV_MAX_CONVERSIONS		84
    839 
    840 static kiconv_conv_list_t conv_list[KICONV_MAX_CONVERSIONS] = {
    841 	/* Embedded code conversions: */
    842 	{
    843 		1, 0, KICONV_EMBEDDED,
    844 		open_to_1252, kiconv_to_sb, close_to_sb, kiconvstr_to_1252
    845 	},
    846 	{
    847 		0, 1, KICONV_EMBEDDED,
    848 		open_fr_1252, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1252
    849 	},
    850 	{
    851 		2, 0, KICONV_EMBEDDED,
    852 		open_to_88591, kiconv_to_sb, close_to_sb, kiconvstr_to_1
    853 	},
    854 	{
    855 		0, 2, KICONV_EMBEDDED,
    856 		open_fr_88591, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1
    857 	},
    858 	{
    859 		3, 0, KICONV_EMBEDDED,
    860 		open_to_885915, kiconv_to_sb, close_to_sb, kiconvstr_to_15
    861 	},
    862 	{
    863 		0, 3, KICONV_EMBEDDED,
    864 		open_fr_885915, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_15
    865 	},
    866 	{
    867 		4, 0, KICONV_EMBEDDED,
    868 		open_to_850, kiconv_to_sb, close_to_sb, kiconvstr_to_850
    869 	},
    870 	{
    871 		0, 4, KICONV_EMBEDDED,
    872 		open_fr_850, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_850
    873 	},
    874 
    875 	/* kiconv_ja module conversions: */
    876 	{ 0, 5, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
    877 	{ 5, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
    878 	{ 0, 6, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
    879 	{ 6, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
    880 	{ 0, 7, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
    881 	{ 7, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
    882 	{ 0, 8, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
    883 	{ 8, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
    884 
    885 	/* kiconv_sc module conversions: */
    886 	{ 0, 9, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
    887 	{ 9, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
    888 	{ 0, 10, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
    889 	{ 10, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
    890 	{ 0, 11, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
    891 	{ 11, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
    892 
    893 	/* kiconv_ko module conversions: */
    894 	{ 0, 12, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
    895 	{ 12, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
    896 	{ 0, 13, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
    897 	{ 13, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
    898 
    899 	/* kiconv_tc module conversions: */
    900 	{ 0, 14, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
    901 	{ 14, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
    902 	{ 0, 15, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
    903 	{ 15, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
    904 	{ 0, 16, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
    905 	{ 16, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
    906 	{ 0, 17, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
    907 	{ 17, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
    908 
    909 	/* kiconv_emea module conversions: */
    910 	{ 0, 18, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    911 	{ 18, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    912 	{ 0, 19, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    913 	{ 19, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    914 	{ 0, 20, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    915 	{ 20, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    916 	{ 0, 21, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    917 	{ 21, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    918 	{ 0, 22, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    919 	{ 22, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    920 	{ 0, 23, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    921 	{ 23, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    922 	{ 0, 24, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    923 	{ 24, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    924 	{ 0, 25, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    925 	{ 25, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    926 	{ 0, 26, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    927 	{ 26, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    928 	{ 0, 27, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    929 	{ 27, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    930 	{ 0, 28, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    931 	{ 28, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    932 	{ 0, 29, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    933 	{ 29, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    934 	{ 0, 30, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    935 	{ 30, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    936 	{ 0, 31, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    937 	{ 31, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    938 	{ 0, 32, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    939 	{ 32, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    940 	{ 0, 33, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    941 	{ 33, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    942 	{ 0, 34, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    943 	{ 34, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    944 	{ 0, 35, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    945 	{ 35, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    946 	{ 0, 36, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    947 	{ 36, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    948 	{ 0, 37, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    949 	{ 37, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    950 	{ 0, 38, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    951 	{ 38, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    952 	{ 0, 39, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    953 	{ 39, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    954 	{ 0, 40, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    955 	{ 40, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    956 	{ 0, 41, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    957 	{ 41, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    958 	{ 0, 42, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    959 	{ 42, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
    960 };
    961 
    962 /* The list of implemeted and supported modules. */
    963 static kiconv_mod_list_t module_list[KICONV_MAX_MODULE_ID + 1] = {
    964 	"kiconv_embedded", 0,
    965 	"kiconv_ja", 0,
    966 	"kiconv_sc", 0,
    967 	"kiconv_ko", 0,
    968 	"kiconv_tc", 0,
    969 	"kiconv_emea", 0,
    970 };
    971 
    972 /*
    973  * We use conv_list_lock to restrict data access of both conv_list[] and
    974  * module_list[] as they are tightly coupled critical sections that need to be
    975  * dealt together as a unit.
    976  */
    977 static kmutex_t conv_list_lock;
    978 
    979 void
    980 kiconv_init()
    981 {
    982 	mutex_init(&conv_list_lock, NULL, MUTEX_DEFAULT, NULL);
    983 }
    984 
    985 /*
    986  * The following is used to check on whether a kiconv module is being
    987  * used or not at the _fini() of the module.
    988  */
    989 size_t
    990 kiconv_module_ref_count(size_t mid)
    991 {
    992 	int count;
    993 
    994 	if (mid <= 0 || mid > KICONV_MAX_MODULE_ID)
    995 		return (0);
    996 
    997 	mutex_enter(&conv_list_lock);
    998 
    999 	count = module_list[mid].refcount;
   1000 
   1001 	mutex_exit(&conv_list_lock);
   1002 
   1003 	return (count);
   1004 }
   1005 
   1006 /*
   1007  * This function "normalizes" a given code name, n, by not including skippable
   1008  * characters and folding uppercase letters to corresponding lowercase letters.
   1009  * We only fold 7-bit ASCII uppercase characters since the names should be in
   1010  * Portable Character Set of 7-bit ASCII.
   1011  *
   1012  * By doing this, we will be able to maximize the code name matches.
   1013  */
   1014 static size_t
   1015 normalize_codename(const char *n)
   1016 {
   1017 	char s[KICONV_MAX_CODENAME_LEN + 1];
   1018 	size_t i;
   1019 
   1020 	if (n == NULL)
   1021 		return ((size_t)-1);
   1022 
   1023 	for (i = 0; *n; n++) {
   1024 		if (KICONV_SKIPPABLE_CHAR(*n))
   1025 			continue;
   1026 
   1027 		/* If unreasonably lengthy, we don't support such names. */
   1028 		if (i >= KICONV_MAX_CODENAME_LEN)
   1029 			return ((size_t)-1);
   1030 
   1031 		s[i++] = (*n >= 'A' && *n <= 'Z') ? *n - 'A' + 'a' : *n;
   1032 	}
   1033 	s[i] = '\0';
   1034 
   1035 	/* With the normalized name, find the corresponding codeset id. */
   1036 	for (i = 0; i < KICONV_MAX_CODEID_ENTRY; i++)
   1037 		if (strcmp(s, code_list[i].name) == 0)
   1038 			return (code_list[i].id);
   1039 
   1040 	/*
   1041 	 * In future time, we will also have a few more lines of code at below
   1042 	 * that will deal with other user-created modules' fromcodes and
   1043 	 * tocodes including aliases in a different vector. For now, we don't
   1044 	 * support that but only the known names to this project at this time.
   1045 	 */
   1046 
   1047 	return ((size_t)-1);
   1048 }
   1049 
   1050 /*
   1051  * This function called from mod_install() registers supplied code
   1052  * conversions. At this point, it does not honor aliases and hence does not
   1053  * use nowait data field from the kiconv module info data structure.
   1054  */
   1055 int
   1056 kiconv_register_module(kiconv_module_info_t *info)
   1057 {
   1058 	size_t mid;
   1059 	size_t fid;
   1060 	size_t tid;
   1061 	size_t i;
   1062 	size_t j;
   1063 	kiconv_ops_t *op;
   1064 
   1065 	/* Validate the given kiconv module info. */
   1066 	if (info == NULL || info->module_name == NULL ||
   1067 	    info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
   1068 		return (EINVAL);