Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  *
     21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     22  * Use is subject to license terms.
     23  */
     24 
     25 #include <errno.h>
     26 #include <locale.h>
     27 #include <langinfo.h>
     28 #include <iconv.h>
     29 #include <ctype.h>
     30 #include <wctype.h>
     31 #include <strings.h>
     32 #include <string.h>
     33 #include <stdio.h>
     34 #include <stdlib.h>
     35 #include "includes.h"
     36 #include "xmalloc.h"
     37 #include "xlist.h"
     38 #include "compat.h"
     39 #include "log.h"
     40 
     41 #ifdef MIN
     42 #undef MIN
     43 #endif /* MIN */
     44 
     45 #define	MIN(x, y)	((x) < (y) ? (x) : (y))
     46 
     47 #define	LOCALE_PATH	"/usr/bin/locale"
     48 
     49 /* two-char country code, '-' and two-char region code */
     50 #define	LANGTAG_MAX	5
     51 
     52 static int locale_cmp(const void *d1, const void *d2);
     53 static char *g11n_locale2langtag(char *locale);
     54 
     55 static char *do_iconv(iconv_t cd, const char *s, uint_t *lenp, char **err_str);
     56 
     57 /*
     58  * native_codeset records the codeset of the default system locale.
     59  * It is used to convert the contents of file (eg /etc/issue) which is
     60  * supposed to be in the codeset of default system locale.
     61  */
     62 static char *native_codeset;
     63 
     64 /*
     65  * Convert locale string name into a language tag. The caller is responsible for
     66  * freeing the memory allocated for the result.
     67  */
     68 static char *
     69 g11n_locale2langtag(char *locale)
     70 {
     71 	char *langtag;
     72 
     73 	/* base cases */
     74 	if (!locale || !*locale)
     75 		return (NULL);
     76 
     77 	if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0)
     78 		return (xstrdup("i-default"));
     79 
     80 	/* punt for language codes which are not exactly 2 letters */
     81 	if (strlen(locale) < 2 ||
     82 	    !isalpha(locale[0]) ||
     83 	    !isalpha(locale[1]) ||
     84 	    (locale[2] != '\0' &&
     85 	    locale[2] != '_' &&
     86 	    locale[2] != '.' &&
     87 	    locale[2] != '@'))
     88 		return (NULL);
     89 
     90 
     91 	/* we have a primary language sub-tag */
     92 	langtag = (char *)xmalloc(LANGTAG_MAX + 1);
     93 
     94 	strncpy(langtag, locale, 2);
     95 	langtag[2] = '\0';
     96 
     97 	/* do we have country sub-tag? For example: cs_CZ */
     98 	if (locale[2] == '_') {
     99 		if (strlen(locale) < 5 ||
    100 		    !isalpha(locale[3]) ||
    101 		    !isalpha(locale[4]) ||
    102 		    (locale[5] != '\0' && (locale[5] != '.' &&
    103 		    locale[5] != '@'))) {
    104 			return (langtag);
    105 		}
    106 
    107 		/* example: create cs-CZ from cs_CZ */
    108 		if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2,
    109 		    locale + 3) == 5)
    110 			return (langtag);
    111 	}
    112 
    113 	/* in all other cases we just use the primary language sub-tag */
    114 	return (langtag);
    115 }
    116 
    117 uint_t
    118 g11n_langtag_is_default(char *langtag)
    119 {
    120 	return (strcmp(langtag, "i-default") == 0);
    121 }
    122 
    123 /*
    124  * This lang tag / locale matching function works only for two-character
    125  * language primary sub-tags and two-character country sub-tags.
    126  */
    127 uint_t
    128 g11n_langtag_matches_locale(char *langtag, char *locale)
    129 {
    130 	/* match "i-default" to the process' current locale if possible */
    131 	if (g11n_langtag_is_default(langtag)) {
    132 		if (strcasecmp(locale, "POSIX") == 0 ||
    133 		    strcasecmp(locale, "C") == 0)
    134 			return (1);
    135 		else
    136 			return (0);
    137 	}
    138 
    139 	/*
    140 	 * locale must be at least 2 chars long and the lang part must be
    141 	 * exactly two characters
    142 	 */
    143 	if (strlen(locale) < 2 ||
    144 	    (!isalpha(locale[0]) || !isalpha(locale[1]) ||
    145 	    (locale[2] != '\0' && locale[2] != '_' &&
    146 	    locale[2] != '.' && locale[2] != '@')))
    147 		return (0);
    148 
    149 	/* same thing with the langtag */
    150 	if (strlen(langtag) < 2 ||
    151 	    (!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
    152 	    (langtag[2] != '\0' && langtag[2] != '-')))
    153 		return (0);
    154 
    155 	/* primary language sub-tag and the locale's language part must match */
    156 	if (strncasecmp(langtag, locale, 2) != 0)
    157 		return (0);
    158 
    159 	/*
    160 	 * primary language sub-tag and the locale's language match, now
    161 	 * fuzzy check country part
    162 	 */
    163 
    164 	/* neither langtag nor locale have more than one component */
    165 	if (langtag[2] == '\0' &&
    166 	    (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
    167 		return (2);
    168 
    169 	/* langtag has only one sub-tag... */
    170 	if (langtag[2] == '\0')
    171 		return (1);
    172 
    173 	/* locale has no country code... */
    174 	if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
    175 		return (1);
    176 
    177 	/* langtag has more than one subtag and the locale has a country code */
    178 
    179 	/* ignore second subtag if not two chars */
    180 	if (strlen(langtag) < 5)
    181 		return (1);
    182 
    183 	if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
    184 	    (langtag[5] != '\0' && langtag[5] != '-'))
    185 		return (1);
    186 
    187 	/* ignore rest of locale if there is no two-character country part */
    188 	if (strlen(locale) < 5)
    189 		return (1);
    190 
    191 	if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
    192 	    (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
    193 		return (1);
    194 
    195 	/* if the country part matches, return 2 */
    196 	if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
    197 		return (2);
    198 
    199 	return (1);
    200 }
    201 
    202 char *
    203 g11n_getlocale()
    204 {
    205 	/* we have one text domain - always set it */
    206 	(void) textdomain(TEXT_DOMAIN);
    207 
    208 	/* if the locale is not set, set it from the env vars */
    209 	if (!setlocale(LC_MESSAGES, NULL))
    210 		(void) setlocale(LC_MESSAGES, "");
    211 
    212 	return (setlocale(LC_MESSAGES, NULL));
    213 }
    214 
    215 void
    216 g11n_setlocale(int category, const char *locale)
    217 {
    218 	char *curr;
    219 
    220 	if (native_codeset == NULL) {
    221 		/* set default locale, and record current codeset */
    222 		(void) setlocale(LC_ALL, "");
    223 		curr = nl_langinfo(CODESET);
    224 		native_codeset = xstrdup(curr);
    225 	}
    226 
    227 	/* we have one text domain - always set it */
    228 	(void) textdomain(TEXT_DOMAIN);
    229 
    230 	if (!locale)
    231 		return;
    232 
    233 	if (*locale && ((curr = setlocale(category, NULL))) &&
    234 	    strcmp(curr, locale) == 0)
    235 		return;
    236 
    237 	/* if <category> is bogus, setlocale() will do nothing */
    238 	(void) setlocale(category, locale);
    239 }
    240 
    241 char **
    242 g11n_getlocales()
    243 {
    244 	FILE *locale_out;
    245 	uint_t n_elems, list_size, long_line = 0;
    246 	char **list;
    247 	char locale[64];	/* 64 bytes is plenty for locale names */
    248 
    249 	if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL)
    250 		return (NULL);
    251 
    252 	/*
    253 	 * start with enough room for 65 locales - that's a lot fewer than
    254 	 * all the locales available for installation, but a lot more than
    255 	 * what most users will need and install
    256 	 */
    257 	n_elems = 0;
    258 	list_size = 192;
    259 	list = (char **) xmalloc(sizeof (char *) * (list_size + 1));
    260 	memset(list, 0, sizeof (char *) * (list_size + 1));
    261 
    262 	while (fgets(locale, sizeof (locale), locale_out)) {
    263 		/* skip long locale names (if any) */
    264 		if (!strchr(locale, '\n')) {
    265 			long_line = 1;
    266 			continue;
    267 		} else if (long_line) {
    268 			long_line = 0;
    269 			continue;
    270 		}
    271 
    272 		if (strncmp(locale, "iso_8859", 8) == 0)
    273 			/* ignore locale names like "iso_8859-1" */
    274 			continue;
    275 
    276 		if (n_elems == list_size) {
    277 			list_size *= 2;
    278 			list = (char **)xrealloc((void *) list,
    279 			    (list_size + 1) * sizeof (char *));
    280 			memset(&list[n_elems + 1], 0,
    281 			    sizeof (char *) * (list_size - n_elems + 1));
    282 		}
    283 
    284 		*(strchr(locale, '\n')) = '\0';	/* remove the trailing \n */
    285 		list[n_elems++] = xstrdup(locale);
    286 	}
    287 
    288 	(void) pclose(locale_out);
    289 
    290 	if (n_elems == 0) {
    291 		xfree(list);
    292 		return (NULL);
    293 	}
    294 
    295 	list[n_elems] = NULL;
    296 
    297 	qsort(list, n_elems - 1, sizeof (char *), locale_cmp);
    298 	return (list);
    299 }
    300 
    301 char *
    302 g11n_getlangs()
    303 {
    304 	char *locale;
    305 
    306 	if (getenv("SSH_LANGS"))
    307 		return (xstrdup(getenv("SSH_LANGS")));
    308 
    309 	locale = g11n_getlocale();
    310 
    311 	if (!locale || !*locale)
    312 		return (xstrdup("i-default"));
    313 
    314 	return (g11n_locale2langtag(locale));
    315 }
    316 
    317 char *
    318 g11n_locales2langs(char **locale_set)
    319 {
    320 	char **p, **r, **q;
    321 	char *langtag, *langs;
    322 	int locales, skip;
    323 
    324 	for (locales = 0, p = locale_set; p && *p; p++)
    325 		locales++;
    326 
    327 	r = (char **)xmalloc((locales + 1) * sizeof (char *));
    328 	memset(r, 0, (locales + 1) * sizeof (char *));
    329 
    330 	for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) {
    331 		skip = 0;
    332 		if ((langtag = g11n_locale2langtag(*p)) == NULL)
    333 			continue;
    334 		for (q = r; (q - r) < locales; q++) {
    335 			if (!*q)
    336 				break;
    337 			if (*q && strcmp(*q, langtag) == 0)
    338 				skip = 1;
    339 		}
    340 		if (!skip)
    341 			*(q++) = langtag;
    342 		else
    343 			xfree(langtag);
    344 		*q = NULL;
    345 	}
    346 
    347 	langs = xjoin(r, ',');
    348 	g11n_freelist(r);
    349 
    350 	return (langs);
    351 }
    352 
    353 static int
    354 sortcmp(const void *d1, const void *d2)
    355 {
    356 	char *s1 = *(char **)d1;
    357 	char *s2 = *(char **)d2;
    358 
    359 	return (strcmp(s1, s2));
    360 }
    361 
    362 int
    363 g11n_langtag_match(char *langtag1, char *langtag2)
    364 {
    365 	int len1, len2;
    366 	char c1, c2;
    367 
    368 	len1 = (strchr(langtag1, '-')) ?
    369 	    (strchr(langtag1, '-') - langtag1)
    370 	    : strlen(langtag1);
    371 
    372 	len2 = (strchr(langtag2, '-')) ?
    373 	    (strchr(langtag2, '-') - langtag2)
    374 	    : strlen(langtag2);
    375 
    376 	/* no match */
    377 	if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
    378 		return (0);
    379 
    380 	c1 = *(langtag1 + len1);
    381 	c2 = *(langtag2 + len2);
    382 
    383 	/* no country sub-tags - exact match */
    384 	if (c1 == '\0' && c2 == '\0')
    385 		return (2);
    386 
    387 	/* one langtag has a country sub-tag, the other doesn't */
    388 	if (c1 == '\0' || c2 == '\0')
    389 		return (1);
    390 
    391 	/* can't happen - both langtags have a country sub-tag */
    392 	if (c1 != '-' || c2 != '-')
    393 		return (1);
    394 
    395 	/* compare country subtags */
    396 	langtag1 = langtag1 + len1 + 1;
    397 	langtag2 = langtag2 + len2 + 1;
    398 
    399 	len1 = (strchr(langtag1, '-')) ?
    400 	    (strchr(langtag1, '-') - langtag1) : strlen(langtag1);
    401 
    402 	len2 = (strchr(langtag2, '-')) ?
    403 	    (strchr(langtag2, '-') - langtag2) : strlen(langtag2);
    404 
    405 	if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
    406 		return (1);
    407 
    408 	/* country tags matched - exact match */
    409 	return (2);
    410 }
    411 
    412 char *
    413 g11n_langtag_set_intersect(char *set1, char *set2)
    414 {
    415 	char **list1, **list2, **list3, **p, **q, **r;
    416 	char *set3, *lang_subtag;
    417 	uint_t n1, n2, n3;
    418 	uint_t do_append;
    419 
    420 	list1 = xsplit(set1, ',');
    421 	list2 = xsplit(set2, ',');
    422 
    423 	for (n1 = 0, p = list1; p && *p; p++, n1++)
    424 		;
    425 	for (n2 = 0, p = list2; p && *p; p++, n2++)
    426 		;
    427 
    428 	list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1));
    429 	*list3 = NULL;
    430 
    431 	/*
    432 	 * we must not sort the user langtags - sorting or not the server's
    433 	 * should not affect the outcome
    434 	 */
    435 	qsort(list2, n2, sizeof (char *), sortcmp);
    436 
    437 	for (n3 = 0, p = list1; p && *p; p++) {
    438 		do_append = 0;
    439 		for (q = list2; q && *q; q++) {
    440 			if (g11n_langtag_match(*p, *q) != 2) continue;
    441 			/* append element */
    442 			for (r = list3; (r - list3) <= (n1 + n2); r++) {
    443 				do_append = 1;
    444 				if (!*r)
    445 					break;
    446 				if (strcmp(*p, *r) == 0) {
    447 					do_append = 0;
    448 					break;
    449 				}
    450 			}
    451 			if (do_append && n3 <= (n1 + n2)) {
    452 				list3[n3++] = xstrdup(*p);
    453 				list3[n3] = NULL;
    454 			}
    455 		}
    456 	}
    457 
    458 	for (p = list1; p && *p; p++) {
    459 		do_append = 0;
    460 		for (q = list2; q && *q; q++) {
    461 			if (g11n_langtag_match(*p, *q) != 1)
    462 				continue;
    463 
    464 			/* append element */
    465 			lang_subtag = xstrdup(*p);
    466 			if (strchr(lang_subtag, '-'))
    467 				*(strchr(lang_subtag, '-')) = '\0';
    468 			for (r = list3; (r - list3) <= (n1 + n2); r++) {
    469 				do_append = 1;
    470 				if (!*r)
    471 					break;
    472 				if (strcmp(lang_subtag, *r) == 0) {
    473 					do_append = 0;
    474 					break;
    475 				}
    476 			}
    477 			if (do_append && n3 <= (n1 + n2)) {
    478 				list3[n3++] = lang_subtag;
    479 				list3[n3] = NULL;
    480 			} else
    481 				xfree(lang_subtag);
    482 		}
    483 	}
    484 
    485 	set3 = xjoin(list3, ',');
    486 	xfree_split_list(list1);
    487 	xfree_split_list(list2);
    488 	xfree_split_list(list3);
    489 
    490 	return (set3);
    491 }
    492 
    493 char *
    494 g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
    495 {
    496 	char *list, *result;
    497 	char **xlist;
    498 
    499 	/* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
    500 	list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
    501 
    502 	if (!list)
    503 		return (NULL);
    504 
    505 	xlist = xsplit(list, ',');
    506 
    507 	xfree(list);
    508 
    509 	if (!xlist || !*xlist)
    510 		return (NULL);
    511 
    512 	result = xstrdup(*xlist);
    513 	xfree_split_list(xlist);
    514 
    515 	return (result);
    516 }
    517 
    518 /*
    519  * Compare locales, preferring UTF-8 codesets to others, otherwise doing
    520  * a stright strcmp()
    521  */
    522 static int
    523 locale_cmp(const void *d1, const void *d2)
    524 {
    525 	char *dot_ptr;
    526 	char *s1 = *(char **)d1;
    527 	char *s2 = *(char **)d2;
    528 	int s1_is_utf8 = 0;
    529 	int s2_is_utf8 = 0;
    530 
    531 	/* check if s1 is a UTF-8 locale */
    532 	if (((dot_ptr = strchr((char *)s1, '.')) != NULL) &&
    533 	    (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
    534 	    (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
    535 		s1_is_utf8++;
    536 	}
    537 
    538 	/* check if s2 is a UTF-8 locale */
    539 	if (((dot_ptr = strchr((char *)s2, '.')) != NULL) &&
    540 	    (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
    541 	    (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
    542 		s2_is_utf8++;
    543 	}
    544 
    545 	/* prefer UTF-8 locales */
    546 	if (s1_is_utf8 && !s2_is_utf8)
    547 		return (-1);
    548 
    549 	if (s2_is_utf8 && !s1_is_utf8)
    550 		return (1);
    551 
    552 	/* prefer any locale over the default locales */
    553 	if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 ||
    554 	    strcmp(s1, "common") == 0) {
    555 		if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 &&
    556 		    strcmp(s2, "common") != 0)
    557 			return (1);
    558 	}
    559 
    560 	if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 ||
    561 	    strcmp(s2, "common") == 0) {
    562 		if (strcmp(s1, "C") != 0 &&
    563 		    strcmp(s1, "POSIX") != 0 &&
    564 		    strcmp(s1, "common") != 0)
    565 			return (-1);
    566 	}
    567 
    568 	return (strcmp(s1, s2));
    569 }
    570 
    571 
    572 char **
    573 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set)
    574 {
    575 	char **langtag_list, **result, **p, **q, **r;
    576 	char *s;
    577 	uint_t do_append, n_langtags, n_locales, n_results, max_results;
    578 
    579 	if (locale_set == NULL)
    580 		return (NULL);
    581 
    582 	/* count lang tags and locales */
    583 	for (n_locales = 0, p = locale_set; p && *p; p++)
    584 		n_locales++;
    585 
    586 	n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
    587 	/* count the number of langtags */
    588 	for (; s = strchr(s, ','); s++, n_langtags++)
    589 		;
    590 
    591 	qsort(locale_set, n_locales, sizeof (char *), locale_cmp);
    592 
    593 	langtag_list = xsplit(langtag_set, ',');
    594 	for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++)
    595 		;
    596 
    597 	max_results = MIN(n_locales, n_langtags) * 2;
    598 	result = (char **) xmalloc(sizeof (char *) * (max_results + 1));
    599 	*result = NULL;
    600 	n_results = 0;
    601 
    602 	/* more specific matches first */
    603 	for (p = langtag_list; p && *p; p++) {
    604 		do_append = 0;
    605 		for (q = locale_set; q && *q; q++) {
    606 			if (g11n_langtag_matches_locale(*p, *q) == 2) {
    607 				do_append = 1;
    608 				for (r = result; (r - result) <=
    609 				    MIN(n_locales, n_langtags); r++) {
    610 					if (!*r)
    611 						break;
    612 					if (strcmp(*q, *r) == 0) {
    613 						do_append = 0;
    614 						break;
    615 					}
    616 				}
    617 				if (do_append && n_results < max_results) {
    618 					result[n_results++] = xstrdup(*q);
    619 					result[n_results] = NULL;
    620 				}
    621 				break;
    622 			}
    623 		}
    624 	}
    625 
    626 	for (p = langtag_list; p && *p; p++) {
    627 		do_append = 0;
    628 		for (q = locale_set; q && *q; q++) {
    629 			if (g11n_langtag_matches_locale(*p, *q) == 1) {
    630 				do_append = 1;
    631 				for (r = result; (r - result) <=
    632 				    MIN(n_locales, n_langtags); r++) {
    633 					if (!*r)
    634 						break;
    635 					if (strcmp(*q, *r) == 0) {
    636 						do_append = 0;
    637 						break;
    638 					}
    639 				}
    640 				if (do_append && n_results < max_results) {
    641 					result[n_results++] = xstrdup(*q);
    642 					result[n_results] = NULL;
    643 				}
    644 				break;
    645 			}
    646 		}
    647 	}
    648 
    649 	xfree_split_list(langtag_list);
    650 
    651 	return (result);
    652 }
    653 
    654 char *
    655 g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
    656 {
    657 	char **results, **locales, *result = NULL;
    658 
    659 	if (srvr_locales == NULL)
    660 		locales = g11n_getlocales();
    661 	else
    662 		locales = srvr_locales;
    663 
    664 	if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
    665 	    locales)) == NULL)
    666 		goto err;
    667 
    668 	if (*results != NULL)
    669 		result = xstrdup(*results);
    670 
    671 	xfree_split_list(results);
    672 
    673 err:
    674 	if (locales != NULL && locales != srvr_locales)
    675 		g11n_freelist(locales);
    676 	return (result);
    677 }
    678 
    679 /*
    680  * Functions for converting to UTF-8 from the local codeset and
    681  * converting from UTF-8 to the local codeset.
    682  *
    683  * The error_str parameter is an pointer to a char variable where to
    684  * store a string suitable for use with error() or fatal() or friends.
    685  * It is also used for an error indicator when NULL is returned.
    686  *
    687  * If conversion isn't necessary, *error_str is set to NULL, and
    688  * NULL is returned.
    689  * If conversion error occured, *error_str points to an error message,
    690  * and NULL is returned.
    691  */
    692 char *
    693 g11n_convert_from_utf8(const char *str, uint_t *lenp, char **error_str)
    694 {
    695 	static char *last_codeset;
    696 	static iconv_t cd = (iconv_t)-1;
    697 	char	*codeset;
    698 
    699 	*error_str = NULL;
    700 
    701 	codeset = nl_langinfo(CODESET);
    702 
    703 	if (strcmp(codeset, "UTF-8") == 0)
    704 		return (NULL);
    705 
    706 	if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
    707 		if (last_codeset != NULL) {
    708 			xfree(last_codeset);
    709 			last_codeset = NULL;
    710 		}
    711 		if (cd != (iconv_t)-1)
    712 			(void) iconv_close(cd);
    713 
    714 		if ((cd = iconv_open(codeset, "UTF-8")) == (iconv_t)-1) {
    715 			*error_str = gettext("Cannot convert UTF-8 "
    716 			    "strings to the local codeset");
    717 			return (NULL);
    718 		}
    719 		last_codeset = xstrdup(codeset);
    720 	}
    721 	return (do_iconv(cd, str, lenp, error_str));
    722 }
    723 
    724 char *
    725 g11n_convert_to_utf8(const char *str, uint_t *lenp,
    726     int native, char **error_str)
    727 {
    728 	static char *last_codeset;
    729 	static iconv_t cd = (iconv_t)-1;
    730 	char	*codeset;
    731 
    732 	*error_str = NULL;
    733 
    734 	if (native)
    735 		codeset = native_codeset;
    736 	else
    737 		codeset = nl_langinfo(CODESET);
    738 
    739 	if (strcmp(codeset, "UTF-8") == 0)
    740 		return (NULL);
    741 
    742 	if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
    743 		if (last_codeset != NULL) {
    744 			xfree(last_codeset);
    745 			last_codeset = NULL;
    746 		}
    747 		if (cd != (iconv_t)-1)
    748 			(void) iconv_close(cd);
    749 
    750 		if ((cd = iconv_open("UTF-8", codeset)) == (iconv_t)-1) {
    751 			*error_str = gettext("Cannot convert the "
    752 			    "local codeset strings to UTF-8");
    753 			return (NULL);
    754 		}
    755 		last_codeset = xstrdup(codeset);
    756 	}
    757 	return (do_iconv(cd, str, lenp, error_str));
    758 }
    759 
    760 /*
    761  * Wrapper around iconv()
    762  *
    763  * The caller is responsible for freeing the result. NULL is returned when
    764  * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
    765  * The caller must ensure that the input string isn't NULL pointer.
    766  */
    767 static char *
    768 do_iconv(iconv_t cd, const char *str, uint_t *lenp, char **err_str)
    769 {
    770 	int	ilen, olen;
    771 	size_t	ileft, oleft;
    772 	char	*ostr, *optr;
    773 	const char *istr;
    774 
    775 	ilen = *lenp;
    776 	olen = ilen + 1;
    777 
    778 	ostr = NULL;
    779 	for (;;) {
    780 		olen *= 2;
    781 		oleft = olen;
    782 		ostr = optr = xrealloc(ostr, olen);
    783 		istr = (const char *)str;
    784 		if ((ileft = ilen) == 0)
    785 			break;
    786 
    787 		if (iconv(cd, &istr, &ileft, &optr, &oleft) != (size_t)-1) {
    788 			/* success: generate reset sequence */
    789 			if (iconv(cd, NULL, NULL,
    790 			    &optr, &oleft) == (size_t)-1 && errno == E2BIG) {
    791 				continue;
    792 			}
    793 			break;
    794 		}
    795 		/* failed */
    796 		if (errno != E2BIG) {
    797 			oleft = olen;
    798 			(void) iconv(cd, NULL, NULL, &ostr, &oleft);
    799 			xfree(ostr);
    800 			*err_str = gettext("Codeset conversion failed");
    801 			return (NULL);
    802 		}
    803 	}
    804 	olen = optr - ostr;
    805 	optr = xmalloc(olen + 1);
    806 	(void) memcpy(optr, ostr, olen);
    807 	xfree(ostr);
    808 
    809 	optr[olen] = '\0';
    810 	*lenp = olen;
    811 
    812 	return (optr);
    813 }
    814 
    815 /*
    816  * A filter for output string. Control and unprintable characters
    817  * are converted into visible form (eg "\ooo").
    818  */
    819 char *
    820 g11n_filter_string(char *s)
    821 {
    822 	int	mb_cur_max = MB_CUR_MAX;
    823 	int	mblen, len;
    824 	char	*os = s;
    825 	wchar_t	wc;
    826 	char	*obuf, *op;
    827 
    828 	/* all character may be converted into the form of \ooo */
    829 	obuf = op = xmalloc(strlen(s) * 4 + 1);
    830 
    831 	while (*s != '\0') {
    832 		mblen = mbtowc(&wc, s, mb_cur_max);
    833 		if (mblen <= 0) {
    834 			mblen = 1;
    835 			wc = (unsigned char)*s;
    836 		}
    837 		if (!iswprint(wc) &&
    838 		    wc != L'\n' && wc != L'\r' && wc != L'\t') {
    839 			/*
    840 			 * control chars which need to be replaced
    841 			 * with safe character sequence.
    842 			 */
    843 			while (mblen != 0) {
    844 				op += sprintf(op, "\\%03o",
    845 				    (unsigned char)*s++);
    846 				mblen--;
    847 			}
    848 		} else {
    849 			while (mblen != 0) {
    850 				*op++ = *s++;
    851 				mblen--;
    852 			}
    853 		}
    854 	}
    855 	*op = '\0';
    856 	len = op - obuf + 1;
    857 	op = xrealloc(os, len);
    858 	(void) memcpy(op, obuf, len);
    859 	xfree(obuf);
    860 	return (op);
    861 }
    862 
    863 /*
    864  * Once we negotiated with a langtag, server need to map it to a system
    865  * locale. That is done based on the locale supported on the server side.
    866  * We know (with the locale supported on Solaris) how the langtag is
    867  * mapped to. However, from the client point of view, there is no way to
    868  * know exactly what locale(encoding) will be used.
    869  *
    870  * With the bug fix of SSH_BUG_STRING_ENCODING, it is guaranteed that the
    871  * UTF-8 characters always come over the wire, so it is no longer the problem
    872  * as long as both side has the bug fix. However if the server side doesn't
    873  * have the fix, client can't safely perform the code conversion since the
    874  * incoming character encoding is unknown.
    875  *
    876  * To alleviate this situation, we take an empirical approach to find
    877  * encoding from langtag.
    878  *
    879  * If langtag has a subtag, we can directly map the langtag to UTF-8 locale
    880  * (eg en-US can be mapped to en_US.UTF-8) with a few exceptions.
    881  * Certain xx_YY locales don't support UTF-8 encoding (probably due to lack
    882  * of L10N support ..). Those are:
    883  *
    884  * 	no_NO, no_NY, sr_SP, sr_YU
    885  *
    886  * They all use ISO8859-X encoding.
    887  *
    888  * For those "xx" langtags, some of them can be mapped to "xx.UTF-8",
    889  * but others cannot. So we need to use the "xx" as the locale name.
    890  * Those locales are:
    891  *
    892  * ar, ca, cs, da, et, fi, he, hu, ja, lt, lv, nl, no, pt, sh, th, tr
    893  *
    894  * Their encoding vary. They could be ISO8859-X or EUC or something else.
    895  * So we don't perform code conversion for these langtags.
    896  */
    897 static const char *non_utf8_langtag[] = {
    898 	"no-NO", "no-NY", "sr-SP", "sr-YU",
    899 	"ar", "ca", "cs", "da", "et", "fi", "he", "hu", "ja",
    900 	"lt", "lv", "nl", "no", "pt", "sh", "th", "tr", NULL};
    901 
    902 void
    903 g11n_test_langtag(const char *lang, int server)
    904 {
    905 	const char	**lp;
    906 
    907 	if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) {
    908 		/*
    909 		 * We negotiated with real locale name (not lang tag).
    910 		 * We shouldn't expect UTF-8, thus shouldn't do code
    911 		 * conversion.
    912 		 */
    913 		datafellows |= SSH_BUG_STRING_ENCODING;
    914 		return;
    915 	}
    916 
    917 	if (datafellows & SSH_BUG_STRING_ENCODING) {
    918 		if (server) {
    919 			/*
    920 			 * Whatever bug exists in the client side, server
    921 			 * side has nothing to do, since server has no way
    922 			 * to know what actual encoding is used on the client
    923 			 * side. For example, even if we negotiated with
    924 			 * en_US, client locale could be en_US.ISO8859-X or
    925 			 * en_US.UTF-8.
    926 			 */
    927 			return;
    928 		}
    929 		/*
    930 		 * We are on the client side. We'll check with known
    931 		 * locales to see if non-UTF8 characters could come in.
    932 		 */
    933 		for (lp = non_utf8_langtag; *lp != NULL; lp++) {
    934 			if (strcmp(lang, *lp) == 0)
    935 				break;
    936 		}
    937 		if (*lp == NULL) {
    938 			debug2("Server is expected to use UTF-8 locale");
    939 			datafellows &= ~SSH_BUG_STRING_ENCODING;
    940 		} else {
    941 			/*
    942 			 * Server is expected to use non-UTF8 encoding.
    943 			 */
    944 			debug2("Enforcing no code conversion: %s", lang);
    945 		}
    946 	}
    947 }
    948 
    949 /*
    950  * Free all strings in the list and then free the list itself. We know that the
    951  * list ends with a NULL pointer.
    952  */
    953 void
    954 g11n_freelist(char **list)
    955 {
    956 	int i = 0;
    957 
    958 	while (list[i] != NULL) {
    959 		xfree(list[i]);
    960 		i++;
    961 	}
    962 
    963 	xfree(list);
    964 }
    965