1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 2628 jp161948 * Common Development and Distribution License (the "License"). 6 2628 jp161948 * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel * 21 9600 Nobutomo * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 22 0 stevel * Use is subject to license terms. 23 0 stevel */ 24 0 stevel 25 0 stevel #include <errno.h> 26 0 stevel #include <locale.h> 27 0 stevel #include <langinfo.h> 28 0 stevel #include <iconv.h> 29 0 stevel #include <ctype.h> 30 9600 Nobutomo #include <wctype.h> 31 0 stevel #include <strings.h> 32 0 stevel #include <string.h> 33 0 stevel #include <stdio.h> 34 0 stevel #include <stdlib.h> 35 0 stevel #include "includes.h" 36 0 stevel #include "xmalloc.h" 37 0 stevel #include "xlist.h" 38 9600 Nobutomo #include "compat.h" 39 9600 Nobutomo #include "log.h" 40 0 stevel 41 0 stevel #ifdef MIN 42 0 stevel #undef MIN 43 0 stevel #endif /* MIN */ 44 0 stevel 45 2705 jp161948 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 46 0 stevel 47 2705 jp161948 #define LOCALE_PATH "/usr/bin/locale" 48 0 stevel 49 2705 jp161948 /* two-char country code, '-' and two-char region code */ 50 2705 jp161948 #define LANGTAG_MAX 5 51 0 stevel 52 0 stevel static int locale_cmp(const void *d1, const void *d2); 53 0 stevel static char *g11n_locale2langtag(char *locale); 54 0 stevel 55 9600 Nobutomo static char *do_iconv(iconv_t cd, const char *s, uint_t *lenp, char **err_str); 56 9600 Nobutomo 57 9600 Nobutomo /* 58 9600 Nobutomo * native_codeset records the codeset of the default system locale. 59 9600 Nobutomo * It is used to convert the contents of file (eg /etc/issue) which is 60 9600 Nobutomo * supposed to be in the codeset of default system locale. 61 9600 Nobutomo */ 62 9600 Nobutomo static char *native_codeset; 63 0 stevel 64 5562 jp161948 /* 65 5562 jp161948 * Convert locale string name into a language tag. The caller is responsible for 66 5562 jp161948 * freeing the memory allocated for the result. 67 5562 jp161948 */ 68 2705 jp161948 static char * 69 0 stevel g11n_locale2langtag(char *locale) 70 0 stevel { 71 2705 jp161948 char *langtag; 72 0 stevel 73 2705 jp161948 /* base cases */ 74 2705 jp161948 if (!locale || !*locale) 75 2705 jp161948 return (NULL); 76 0 stevel 77 2705 jp161948 if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0) 78 5562 jp161948 return (xstrdup("i-default")); 79 0 stevel 80 2705 jp161948 /* punt for language codes which are not exactly 2 letters */ 81 2705 jp161948 if (strlen(locale) < 2 || 82 2705 jp161948 !isalpha(locale[0]) || 83 2705 jp161948 !isalpha(locale[1]) || 84 2705 jp161948 (locale[2] != '\0' && 85 2705 jp161948 locale[2] != '_' && 86 2705 jp161948 locale[2] != '.' && 87 2705 jp161948 locale[2] != '@')) 88 2705 jp161948 return (NULL); 89 0 stevel 90 0 stevel 91 2705 jp161948 /* we have a primary language sub-tag */ 92 2705 jp161948 langtag = (char *)xmalloc(LANGTAG_MAX + 1); 93 0 stevel 94 2705 jp161948 strncpy(langtag, locale, 2); 95 2705 jp161948 langtag[2] = '\0'; 96 0 stevel 97 2705 jp161948 /* do we have country sub-tag? For example: cs_CZ */ 98 2705 jp161948 if (locale[2] == '_') { 99 2705 jp161948 if (strlen(locale) < 5 || 100 2705 jp161948 !isalpha(locale[3]) || 101 2705 jp161948 !isalpha(locale[4]) || 102 2705 jp161948 (locale[5] != '\0' && (locale[5] != '.' && 103 2705 jp161948 locale[5] != '@'))) { 104 2705 jp161948 return (langtag); 105 2705 jp161948 } 106 2705 jp161948 107 2705 jp161948 /* example: create cs-CZ from cs_CZ */ 108 2705 jp161948 if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2, 109 2705 jp161948 locale + 3) == 5) 110 2705 jp161948 return (langtag); 111 0 stevel } 112 0 stevel 113 2705 jp161948 /* in all other cases we just use the primary language sub-tag */ 114 2705 jp161948 return (langtag); 115 0 stevel } 116 0 stevel 117 2705 jp161948 uint_t 118 0 stevel g11n_langtag_is_default(char *langtag) 119 0 stevel { 120 2705 jp161948 return (strcmp(langtag, "i-default") == 0); 121 0 stevel } 122 0 stevel 123 0 stevel /* 124 0 stevel * This lang tag / locale matching function works only for two-character 125 0 stevel * language primary sub-tags and two-character country sub-tags. 126 0 stevel */ 127 2705 jp161948 uint_t 128 0 stevel g11n_langtag_matches_locale(char *langtag, char *locale) 129 0 stevel { 130 2705 jp161948 /* match "i-default" to the process' current locale if possible */ 131 2705 jp161948 if (g11n_langtag_is_default(langtag)) { 132 2705 jp161948 if (strcasecmp(locale, "POSIX") == 0 || 133 2705 jp161948 strcasecmp(locale, "C") == 0) 134 2705 jp161948 return (1); 135 2705 jp161948 else 136 2705 jp161948 return (0); 137 2705 jp161948 } 138 0 stevel 139 2705 jp161948 /* 140 2705 jp161948 * locale must be at least 2 chars long and the lang part must be 141 2705 jp161948 * exactly two characters 142 2705 jp161948 */ 143 2705 jp161948 if (strlen(locale) < 2 || 144 2705 jp161948 (!isalpha(locale[0]) || !isalpha(locale[1]) || 145 2705 jp161948 (locale[2] != '\0' && locale[2] != '_' && 146 2705 jp161948 locale[2] != '.' && locale[2] != '@'))) 147 2705 jp161948 return (0); 148 0 stevel 149 2705 jp161948 /* same thing with the langtag */ 150 2705 jp161948 if (strlen(langtag) < 2 || 151 2705 jp161948 (!isalpha(langtag[0]) || !isalpha(langtag[1]) || 152 2705 jp161948 (langtag[2] != '\0' && langtag[2] != '-'))) 153 2705 jp161948 return (0); 154 0 stevel 155 2705 jp161948 /* primary language sub-tag and the locale's language part must match */ 156 2705 jp161948 if (strncasecmp(langtag, locale, 2) != 0) 157 2705 jp161948 return (0); 158 0 stevel 159 2705 jp161948 /* 160 2705 jp161948 * primary language sub-tag and the locale's language match, now 161 2705 jp161948 * fuzzy check country part 162 2705 jp161948 */ 163 0 stevel 164 2705 jp161948 /* neither langtag nor locale have more than one component */ 165 2705 jp161948 if (langtag[2] == '\0' && 166 2705 jp161948 (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')) 167 2705 jp161948 return (2); 168 0 stevel 169 2705 jp161948 /* langtag has only one sub-tag... */ 170 2705 jp161948 if (langtag[2] == '\0') 171 2705 jp161948 return (1); 172 0 stevel 173 2705 jp161948 /* locale has no country code... */ 174 2705 jp161948 if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@') 175 2705 jp161948 return (1); 176 0 stevel 177 2705 jp161948 /* langtag has more than one subtag and the locale has a country code */ 178 0 stevel 179 2705 jp161948 /* ignore second subtag if not two chars */ 180 2705 jp161948 if (strlen(langtag) < 5) 181 2705 jp161948 return (1); 182 0 stevel 183 2705 jp161948 if (!isalpha(langtag[3]) || !isalpha(langtag[4]) || 184 2705 jp161948 (langtag[5] != '\0' && langtag[5] != '-')) 185 2705 jp161948 return (1); 186 0 stevel 187 2705 jp161948 /* ignore rest of locale if there is no two-character country part */ 188 2705 jp161948 if (strlen(locale) < 5) 189 2705 jp161948 return (1); 190 0 stevel 191 2705 jp161948 if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) || 192 2705 jp161948 (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@')) 193 2705 jp161948 return (1); 194 0 stevel 195 2705 jp161948 /* if the country part matches, return 2 */ 196 2705 jp161948 if (strncasecmp(&langtag[3], &locale[3], 2) == 0) 197 2705 jp161948 return (2); 198 2705 jp161948 199 2705 jp161948 return (1); 200 0 stevel } 201 0 stevel 202 0 stevel char * 203 0 stevel g11n_getlocale() 204 0 stevel { 205 2705 jp161948 /* we have one text domain - always set it */ 206 2705 jp161948 (void) textdomain(TEXT_DOMAIN); 207 0 stevel 208 2705 jp161948 /* if the locale is not set, set it from the env vars */ 209 2705 jp161948 if (!setlocale(LC_MESSAGES, NULL)) 210 2705 jp161948 (void) setlocale(LC_MESSAGES, ""); 211 0 stevel 212 2705 jp161948 return (setlocale(LC_MESSAGES, NULL)); 213 0 stevel } 214 0 stevel 215 0 stevel void 216 0 stevel g11n_setlocale(int category, const char *locale) 217 0 stevel { 218 2705 jp161948 char *curr; 219 9600 Nobutomo 220 9600 Nobutomo if (native_codeset == NULL) { 221 9600 Nobutomo /* set default locale, and record current codeset */ 222 9600 Nobutomo (void) setlocale(LC_ALL, ""); 223 9600 Nobutomo curr = nl_langinfo(CODESET); 224 9600 Nobutomo native_codeset = xstrdup(curr); 225 9600 Nobutomo } 226 0 stevel 227 2705 jp161948 /* we have one text domain - always set it */ 228 2705 jp161948 (void) textdomain(TEXT_DOMAIN); 229 0 stevel 230 2705 jp161948 if (!locale) 231 2705 jp161948 return; 232 0 stevel 233 2705 jp161948 if (*locale && ((curr = setlocale(category, NULL))) && 234 2705 jp161948 strcmp(curr, locale) == 0) 235 2705 jp161948 return; 236 0 stevel 237 2705 jp161948 /* if <category> is bogus, setlocale() will do nothing */ 238 2705 jp161948 (void) setlocale(category, locale); 239 0 stevel } 240 0 stevel 241 0 stevel char ** 242 0 stevel g11n_getlocales() 243 0 stevel { 244 2705 jp161948 FILE *locale_out; 245 2705 jp161948 uint_t n_elems, list_size, long_line = 0; 246 2705 jp161948 char **list; 247 2705 jp161948 char locale[64]; /* 64 bytes is plenty for locale names */ 248 0 stevel 249 2705 jp161948 if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) 250 2705 jp161948 return (NULL); 251 0 stevel 252 2705 jp161948 /* 253 2705 jp161948 * start with enough room for 65 locales - that's a lot fewer than 254 2705 jp161948 * all the locales available for installation, but a lot more than 255 2705 jp161948 * what most users will need and install 256 2705 jp161948 */ 257 2705 jp161948 n_elems = 0; 258 2705 jp161948 list_size = 192; 259 2705 jp161948 list = (char **) xmalloc(sizeof (char *) * (list_size + 1)); 260 2705 jp161948 memset(list, 0, sizeof (char *) * (list_size + 1)); 261 0 stevel 262 2705 jp161948 while (fgets(locale, sizeof (locale), locale_out)) { 263 2705 jp161948 /* skip long locale names (if any) */ 264 2705 jp161948 if (!strchr(locale, '\n')) { 265 2705 jp161948 long_line = 1; 266 2705 jp161948 continue; 267 2705 jp161948 } else if (long_line) { 268 2705 jp161948 long_line = 0; 269 2705 jp161948 continue; 270 2705 jp161948 } 271 0 stevel 272 2705 jp161948 if (strncmp(locale, "iso_8859", 8) == 0) 273 2705 jp161948 /* ignore locale names like "iso_8859-1" */ 274 2705 jp161948 continue; 275 2705 jp161948 276 2705 jp161948 if (n_elems == list_size) { 277 2705 jp161948 list_size *= 2; 278 2705 jp161948 list = (char **)xrealloc((void *) list, 279 2705 jp161948 (list_size + 1) * sizeof (char *)); 280 2705 jp161948 memset(&list[n_elems + 1], 0, 281 2705 jp161948 sizeof (char *) * (list_size - n_elems + 1)); 282 2705 jp161948 } 283 2705 jp161948 284 2705 jp161948 *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */ 285 2705 jp161948 list[n_elems++] = xstrdup(locale); 286 0 stevel } 287 3109 jp161948 288 6288 jp161948 (void) pclose(locale_out); 289 6288 jp161948 290 5562 jp161948 if (n_elems == 0) { 291 5562 jp161948 xfree(list); 292 3109 jp161948 return (NULL); 293 5562 jp161948 } 294 0 stevel 295 2705 jp161948 list[n_elems] = NULL; 296 0 stevel 297 2705 jp161948 qsort(list, n_elems - 1, sizeof (char *), locale_cmp); 298 2705 jp161948 return (list); 299 0 stevel } 300 0 stevel 301 0 stevel char * 302 0 stevel g11n_getlangs() 303 0 stevel { 304 2705 jp161948 char *locale; 305 0 stevel 306 2705 jp161948 if (getenv("SSH_LANGS")) 307 2705 jp161948 return (xstrdup(getenv("SSH_LANGS"))); 308 0 stevel 309 2705 jp161948 locale = g11n_getlocale(); 310 0 stevel 311 2705 jp161948 if (!locale || !*locale) 312 2705 jp161948 return (xstrdup("i-default")); 313 0 stevel 314 2705 jp161948 return (g11n_locale2langtag(locale)); 315 0 stevel } 316 0 stevel 317 0 stevel char * 318 0 stevel g11n_locales2langs(char **locale_set) 319 0 stevel { 320 2705 jp161948 char **p, **r, **q; 321 5562 jp161948 char *langtag, *langs; 322 2705 jp161948 int locales, skip; 323 0 stevel 324 2705 jp161948 for (locales = 0, p = locale_set; p && *p; p++) 325 2705 jp161948 locales++; 326 0 stevel 327 2705 jp161948 r = (char **)xmalloc((locales + 1) * sizeof (char *)); 328 2705 jp161948 memset(r, 0, (locales + 1) * sizeof (char *)); 329 0 stevel 330 2705 jp161948 for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) { 331 2705 jp161948 skip = 0; 332 2705 jp161948 if ((langtag = g11n_locale2langtag(*p)) == NULL) 333 2705 jp161948 continue; 334 2705 jp161948 for (q = r; (q - r) < locales; q++) { 335 2705 jp161948 if (!*q) 336 2705 jp161948 break; 337 2705 jp161948 if (*q && strcmp(*q, langtag) == 0) 338 2705 jp161948 skip = 1; 339 2705 jp161948 } 340 2705 jp161948 if (!skip) 341 2705 jp161948 *(q++) = langtag; 342 5562 jp161948 else 343 5562 jp161948 xfree(langtag); 344 2705 jp161948 *q = NULL; 345 0 stevel } 346 2705 jp161948 347 5562 jp161948 langs = xjoin(r, ','); 348 5562 jp161948 g11n_freelist(r); 349 5562 jp161948 350 5562 jp161948 return (langs); 351 0 stevel } 352 0 stevel 353 2705 jp161948 static int 354 0 stevel sortcmp(const void *d1, const void *d2) 355 0 stevel { 356 2705 jp161948 char *s1 = *(char **)d1; 357 2705 jp161948 char *s2 = *(char **)d2; 358 0 stevel 359 2705 jp161948 return (strcmp(s1, s2)); 360 0 stevel } 361 0 stevel 362 0 stevel int 363 0 stevel g11n_langtag_match(char *langtag1, char *langtag2) 364 0 stevel { 365 2705 jp161948 int len1, len2; 366 2705 jp161948 char c1, c2; 367 0 stevel 368 2705 jp161948 len1 = (strchr(langtag1, '-')) ? 369 5562 jp161948 (strchr(langtag1, '-') - langtag1) 370 5562 jp161948 : strlen(langtag1); 371 0 stevel 372 2705 jp161948 len2 = (strchr(langtag2, '-')) ? 373 5562 jp161948 (strchr(langtag2, '-') - langtag2) 374 5562 jp161948 : strlen(langtag2); 375 0 stevel 376 2705 jp161948 /* no match */ 377 2705 jp161948 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 378 2705 jp161948 return (0); 379 0 stevel 380 2705 jp161948 c1 = *(langtag1 + len1); 381 2705 jp161948 c2 = *(langtag2 + len2); 382 0 stevel 383 2705 jp161948 /* no country sub-tags - exact match */ 384 2705 jp161948 if (c1 == '\0' && c2 == '\0') 385 2705 jp161948 return (2); 386 0 stevel 387 2705 jp161948 /* one langtag has a country sub-tag, the other doesn't */ 388 2705 jp161948 if (c1 == '\0' || c2 == '\0') 389 2705 jp161948 return (1); 390 0 stevel 391 2705 jp161948 /* can't happen - both langtags have a country sub-tag */ 392 2705 jp161948 if (c1 != '-' || c2 != '-') 393 2705 jp161948 return (1); 394 0 stevel 395 2705 jp161948 /* compare country subtags */ 396 2705 jp161948 langtag1 = langtag1 + len1 + 1; 397 2705 jp161948 langtag2 = langtag2 + len2 + 1; 398 0 stevel 399 2705 jp161948 len1 = (strchr(langtag1, '-')) ? 400 2705 jp161948 (strchr(langtag1, '-') - langtag1) : strlen(langtag1); 401 0 stevel 402 2705 jp161948 len2 = (strchr(langtag2, '-')) ? 403 2705 jp161948 (strchr(langtag2, '-') - langtag2) : strlen(langtag2); 404 0 stevel 405 2705 jp161948 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 406 2705 jp161948 return (1); 407 0 stevel 408 2705 jp161948 /* country tags matched - exact match */ 409 2705 jp161948 return (2); 410 0 stevel } 411 0 stevel 412 0 stevel char * 413 0 stevel g11n_langtag_set_intersect(char *set1, char *set2) 414 0 stevel { 415 2705 jp161948 char **list1, **list2, **list3, **p, **q, **r; 416 2705 jp161948 char *set3, *lang_subtag; 417 2705 jp161948 uint_t n1, n2, n3; 418 2705 jp161948 uint_t do_append; 419 0 stevel 420 2705 jp161948 list1 = xsplit(set1, ','); 421 2705 jp161948 list2 = xsplit(set2, ','); 422 0 stevel 423 2705 jp161948 for (n1 = 0, p = list1; p && *p; p++, n1++) 424 2705 jp161948 ; 425 2705 jp161948 for (n2 = 0, p = list2; p && *p; p++, n2++) 426 2705 jp161948 ; 427 0 stevel 428 2705 jp161948 list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1)); 429 2705 jp161948 *list3 = NULL; 430 0 stevel 431 2705 jp161948 /* 432 2705 jp161948 * we must not sort the user langtags - sorting or not the server's 433 2705 jp161948 * should not affect the outcome 434 2705 jp161948 */ 435 2705 jp161948 qsort(list2, n2, sizeof (char *), sortcmp); 436 2705 jp161948 437 2705 jp161948 for (n3 = 0, p = list1; p && *p; p++) { 438 2705 jp161948 do_append = 0; 439 2705 jp161948 for (q = list2; q && *q; q++) { 440 2705 jp161948 if (g11n_langtag_match(*p, *q) != 2) continue; 441 2705 jp161948 /* append element */ 442 2705 jp161948 for (r = list3; (r - list3) <= (n1 + n2); r++) { 443 2705 jp161948 do_append = 1; 444 2705 jp161948 if (!*r) 445 2705 jp161948 break; 446 2705 jp161948 if (strcmp(*p, *r) == 0) { 447 2705 jp161948 do_append = 0; 448 2705 jp161948 break; 449 2705 jp161948 } 450 2705 jp161948 } 451 2705 jp161948 if (do_append && n3 <= (n1 + n2)) { 452 2705 jp161948 list3[n3++] = xstrdup(*p); 453 2705 jp161948 list3[n3] = NULL; 454 2705 jp161948 } 455 0 stevel } 456 0 stevel } 457 0 stevel 458 2705 jp161948 for (p = list1; p && *p; p++) { 459 2705 jp161948 do_append = 0; 460 2705 jp161948 for (q = list2; q && *q; q++) { 461 2705 jp161948 if (g11n_langtag_match(*p, *q) != 1) 462 2705 jp161948 continue; 463 2705 jp161948 464 2705 jp161948 /* append element */ 465 2705 jp161948 lang_subtag = xstrdup(*p); 466 2705 jp161948 if (strchr(lang_subtag, '-')) 467 2705 jp161948 *(strchr(lang_subtag, '-')) = '\0'; 468 2705 jp161948 for (r = list3; (r - list3) <= (n1 + n2); r++) { 469 2705 jp161948 do_append = 1; 470 2705 jp161948 if (!*r) 471 2705 jp161948 break; 472 2705 jp161948 if (strcmp(lang_subtag, *r) == 0) { 473 2705 jp161948 do_append = 0; 474 2705 jp161948 break; 475 2705 jp161948 } 476 2705 jp161948 } 477 2705 jp161948 if (do_append && n3 <= (n1 + n2)) { 478 2705 jp161948 list3[n3++] = lang_subtag; 479 2705 jp161948 list3[n3] = NULL; 480 2705 jp161948 } else 481 2705 jp161948 xfree(lang_subtag); 482 0 stevel } 483 0 stevel } 484 0 stevel 485 2705 jp161948 set3 = xjoin(list3, ','); 486 2705 jp161948 xfree_split_list(list1); 487 2705 jp161948 xfree_split_list(list2); 488 2705 jp161948 xfree_split_list(list3); 489 0 stevel 490 2705 jp161948 return (set3); 491 0 stevel } 492 0 stevel 493 0 stevel char * 494 0 stevel g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags) 495 0 stevel { 496 2705 jp161948 char *list, *result; 497 2705 jp161948 char **xlist; 498 0 stevel 499 2705 jp161948 /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */ 500 2705 jp161948 list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags); 501 0 stevel 502 2705 jp161948 if (!list) 503 2705 jp161948 return (NULL); 504 0 stevel 505 2705 jp161948 xlist = xsplit(list, ','); 506 0 stevel 507 2705 jp161948 xfree(list); 508 0 stevel 509 2705 jp161948 if (!xlist || !*xlist) 510 2705 jp161948 return (NULL); 511 0 stevel 512 2705 jp161948 result = xstrdup(*xlist); 513 2705 jp161948 xfree_split_list(xlist); 514 0 stevel 515 2705 jp161948 return (result); 516 0 stevel } 517 0 stevel 518 0 stevel /* 519 0 stevel * Compare locales, preferring UTF-8 codesets to others, otherwise doing 520 0 stevel * a stright strcmp() 521 0 stevel */ 522 2705 jp161948 static int 523 0 stevel locale_cmp(const void *d1, const void *d2) 524 0 stevel { 525 2705 jp161948 char *dot_ptr; 526 2705 jp161948 char *s1 = *(char **)d1; 527 2705 jp161948 char *s2 = *(char **)d2; 528 2705 jp161948 int s1_is_utf8 = 0; 529 2705 jp161948 int s2_is_utf8 = 0; 530 0 stevel 531 2705 jp161948 /* check if s1 is a UTF-8 locale */ 532 2705 jp161948 if (((dot_ptr = strchr((char *)s1, '.')) != NULL) && 533 2705 jp161948 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 534 2705 jp161948 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 535 2705 jp161948 s1_is_utf8++; 536 2705 jp161948 } 537 0 stevel 538 2705 jp161948 /* check if s2 is a UTF-8 locale */ 539 2705 jp161948 if (((dot_ptr = strchr((char *)s2, '.')) != NULL) && 540 2705 jp161948 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 541 2705 jp161948 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 542 2705 jp161948 s2_is_utf8++; 543 2705 jp161948 } 544 0 stevel 545 2705 jp161948 /* prefer UTF-8 locales */ 546 2705 jp161948 if (s1_is_utf8 && !s2_is_utf8) 547 2705 jp161948 return (-1); 548 0 stevel 549 2705 jp161948 if (s2_is_utf8 && !s1_is_utf8) 550 2705 jp161948 return (1); 551 0 stevel 552 2705 jp161948 /* prefer any locale over the default locales */ 553 2705 jp161948 if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 || 554 2705 jp161948 strcmp(s1, "common") == 0) { 555 2705 jp161948 if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 && 556 2705 jp161948 strcmp(s2, "common") != 0) 557 2705 jp161948 return (1); 558 2705 jp161948 } 559 0 stevel 560 2705 jp161948 if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 || 561 2705 jp161948 strcmp(s2, "common") == 0) { 562 2705 jp161948 if (strcmp(s1, "C") != 0 && 563 2705 jp161948 strcmp(s1, "POSIX") != 0 && 564 2705 jp161948 strcmp(s1, "common") != 0) 565 2705 jp161948 return (-1); 566 2705 jp161948 } 567 2705 jp161948 568 2705 jp161948 return (strcmp(s1, s2)); 569 0 stevel } 570 0 stevel 571 0 stevel 572 0 stevel char ** 573 2705 jp161948 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set) 574 0 stevel { 575 2705 jp161948 char **langtag_list, **result, **p, **q, **r; 576 2705 jp161948 char *s; 577 2705 jp161948 uint_t do_append, n_langtags, n_locales, n_results, max_results; 578 0 stevel 579 9845 Jan if (locale_set == NULL) 580 9845 Jan return (NULL); 581 9845 Jan 582 2705 jp161948 /* count lang tags and locales */ 583 2705 jp161948 for (n_locales = 0, p = locale_set; p && *p; p++) 584 2705 jp161948 n_locales++; 585 0 stevel 586 2705 jp161948 n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0; 587 2705 jp161948 /* count the number of langtags */ 588 2705 jp161948 for (; s = strchr(s, ','); s++, n_langtags++) 589 2705 jp161948 ; 590 0 stevel 591 2705 jp161948 qsort(locale_set, n_locales, sizeof (char *), locale_cmp); 592 0 stevel 593 2705 jp161948 langtag_list = xsplit(langtag_set, ','); 594 2705 jp161948 for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++) 595 2705 jp161948 ; 596 0 stevel 597 2705 jp161948 max_results = MIN(n_locales, n_langtags) * 2; 598 2705 jp161948 result = (char **) xmalloc(sizeof (char *) * (max_results + 1)); 599 2705 jp161948 *result = NULL; 600 2705 jp161948 n_results = 0; 601 2705 jp161948 602 2705 jp161948 /* more specific matches first */ 603 2705 jp161948 for (p = langtag_list; p && *p; p++) { 604 2705 jp161948 do_append = 0; 605 2705 jp161948 for (q = locale_set; q && *q; q++) { 606 2705 jp161948 if (g11n_langtag_matches_locale(*p, *q) == 2) { 607 2705 jp161948 do_append = 1; 608 2705 jp161948 for (r = result; (r - result) <= 609 2705 jp161948 MIN(n_locales, n_langtags); r++) { 610 2705 jp161948 if (!*r) 611 2705 jp161948 break; 612 2705 jp161948 if (strcmp(*q, *r) == 0) { 613 2705 jp161948 do_append = 0; 614 2705 jp161948 break; 615 2705 jp161948 } 616 2705 jp161948 } 617 2705 jp161948 if (do_append && n_results < max_results) { 618 2705 jp161948 result[n_results++] = xstrdup(*q); 619 2705 jp161948 result[n_results] = NULL; 620 2705 jp161948 } 621 2705 jp161948 break; 622 2705 jp161948 } 623 0 stevel } 624 2705 jp161948 } 625 2705 jp161948 626 2705 jp161948 for (p = langtag_list; p && *p; p++) { 627 2705 jp161948 do_append = 0; 628 2705 jp161948 for (q = locale_set; q && *q; q++) { 629 2705 jp161948 if (g11n_langtag_matches_locale(*p, *q) == 1) { 630 2705 jp161948 do_append = 1; 631 2705 jp161948 for (r = result; (r - result) <= 632 2705 jp161948 MIN(n_locales, n_langtags); r++) { 633 2705 jp161948 if (!*r) 634 2705 jp161948 break; 635 2705 jp161948 if (strcmp(*q, *r) == 0) { 636 2705 jp161948 do_append = 0; 637 2705 jp161948 break; 638 2705 jp161948 } 639 2705 jp161948 } 640 2705 jp161948 if (do_append && n_results < max_results) { 641 2705 jp161948 result[n_results++] = xstrdup(*q); 642 2705 jp161948 result[n_results] = NULL; 643 2705 jp161948 } 644 2705 jp161948 break; 645 2705 jp161948 } 646 0 stevel } 647 0 stevel } 648 0 stevel 649 2705 jp161948 xfree_split_list(langtag_list); 650 0 stevel 651 2705 jp161948 return (result); 652 0 stevel } 653 0 stevel 654 0 stevel char * 655 0 stevel g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales) 656 0 stevel { 657 5562 jp161948 char **results, **locales, *result = NULL; 658 5562 jp161948 659 5562 jp161948 if (srvr_locales == NULL) 660 5562 jp161948 locales = g11n_getlocales(); 661 5562 jp161948 else 662 5562 jp161948 locales = srvr_locales; 663 0 stevel 664 2705 jp161948 if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags, 665 5562 jp161948 locales)) == NULL) 666 5562 jp161948 goto err; 667 0 stevel 668 2705 jp161948 if (*results != NULL) 669 2705 jp161948 result = xstrdup(*results); 670 0 stevel 671 2705 jp161948 xfree_split_list(results); 672 0 stevel 673 5562 jp161948 err: 674 9845 Jan if (locales != NULL && locales != srvr_locales) 675 5562 jp161948 g11n_freelist(locales); 676 2705 jp161948 return (result); 677 0 stevel } 678 0 stevel 679 9600 Nobutomo /* 680 9600 Nobutomo * Functions for converting to UTF-8 from the local codeset and 681 9600 Nobutomo * converting from UTF-8 to the local codeset. 682 9600 Nobutomo * 683 9600 Nobutomo * The error_str parameter is an pointer to a char variable where to 684 9600 Nobutomo * store a string suitable for use with error() or fatal() or friends. 685 9600 Nobutomo * It is also used for an error indicator when NULL is returned. 686 9600 Nobutomo * 687 9600 Nobutomo * If conversion isn't necessary, *error_str is set to NULL, and 688 9600 Nobutomo * NULL is returned. 689 9600 Nobutomo * If conversion error occured, *error_str points to an error message, 690 9600 Nobutomo * and NULL is returned. 691 9600 Nobutomo */ 692 9600 Nobutomo char * 693 9600 Nobutomo g11n_convert_from_utf8(const char *str, uint_t *lenp, char **error_str) 694 9600 Nobutomo { 695 9600 Nobutomo static char *last_codeset; 696 9600 Nobutomo static iconv_t cd = (iconv_t)-1; 697 9600 Nobutomo char *codeset; 698 0 stevel 699 9600 Nobutomo *error_str = NULL; 700 0 stevel 701 9600 Nobutomo codeset = nl_langinfo(CODESET); 702 0 stevel 703 9600 Nobutomo if (strcmp(codeset, "UTF-8") == 0) 704 9600 Nobutomo return (NULL); 705 0 stevel 706 9600 Nobutomo if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) { 707 9600 Nobutomo if (last_codeset != NULL) { 708 9600 Nobutomo xfree(last_codeset); 709 9600 Nobutomo last_codeset = NULL; 710 9600 Nobutomo } 711 9600 Nobutomo if (cd != (iconv_t)-1) 712 9600 Nobutomo (void) iconv_close(cd); 713 0 stevel 714 9600 Nobutomo if ((cd = iconv_open(codeset, "UTF-8")) == (iconv_t)-1) { 715 9600 Nobutomo *error_str = gettext("Cannot convert UTF-8 " 716 9600 Nobutomo "strings to the local codeset"); 717 9600 Nobutomo return (NULL); 718 2705 jp161948 } 719 9600 Nobutomo last_codeset = xstrdup(codeset); 720 0 stevel } 721 9600 Nobutomo return (do_iconv(cd, str, lenp, error_str)); 722 0 stevel } 723 0 stevel 724 0 stevel char * 725 9600 Nobutomo g11n_convert_to_utf8(const char *str, uint_t *lenp, 726 9600 Nobutomo int native, char **error_str) 727 0 stevel { 728 9600 Nobutomo static char *last_codeset; 729 9600 Nobutomo static iconv_t cd = (iconv_t)-1; 730 9600 Nobutomo char *codeset; 731 0 stevel 732 9600 Nobutomo *error_str = NULL; 733 9600 Nobutomo 734 9600 Nobutomo if (native) 735 9600 Nobutomo codeset = native_codeset; 736 9600 Nobutomo else 737 9600 Nobutomo codeset = nl_langinfo(CODESET); 738 9600 Nobutomo 739 9600 Nobutomo if (strcmp(codeset, "UTF-8") == 0) 740 9600 Nobutomo return (NULL); 741 9600 Nobutomo 742 9600 Nobutomo if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) { 743 9600 Nobutomo if (last_codeset != NULL) { 744 9600 Nobutomo xfree(last_codeset); 745 9600 Nobutomo last_codeset = NULL; 746 2705 jp161948 } 747 9600 Nobutomo if (cd != (iconv_t)-1) 748 9600 Nobutomo (void) iconv_close(cd); 749 9600 Nobutomo 750 9600 Nobutomo if ((cd = iconv_open("UTF-8", codeset)) == (iconv_t)-1) { 751 9600 Nobutomo *error_str = gettext("Cannot convert the " 752 9600 Nobutomo "local codeset strings to UTF-8"); 753 9600 Nobutomo return (NULL); 754 9600 Nobutomo } 755 9600 Nobutomo last_codeset = xstrdup(codeset); 756 0 stevel } 757 9600 Nobutomo return (do_iconv(cd, str, lenp, error_str)); 758 0 stevel } 759 0 stevel 760 0 stevel /* 761 0 stevel * Wrapper around iconv() 762 0 stevel * 763 9600 Nobutomo * The caller is responsible for freeing the result. NULL is returned when 764 0 stevel * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). 765 9600 Nobutomo * The caller must ensure that the input string isn't NULL pointer. 766 0 stevel */ 767 9600 Nobutomo static char * 768 9600 Nobutomo do_iconv(iconv_t cd, const char *str, uint_t *lenp, char **err_str) 769 2705 jp161948 { 770 9600 Nobutomo int ilen, olen; 771 9600 Nobutomo size_t ileft, oleft; 772 9600 Nobutomo char *ostr, *optr; 773 9600 Nobutomo const char *istr; 774 0 stevel 775 9600 Nobutomo ilen = *lenp; 776 9600 Nobutomo olen = ilen + 1; 777 0 stevel 778 9600 Nobutomo ostr = NULL; 779 9600 Nobutomo for (;;) { 780 9600 Nobutomo olen *= 2; 781 9600 Nobutomo oleft = olen; 782 9600 Nobutomo ostr = optr = xrealloc(ostr, olen); 783 9600 Nobutomo istr = (const char *)str; 784 9600 Nobutomo if ((ileft = ilen) == 0) 785 9600 Nobutomo break; 786 2705 jp161948 787 9600 Nobutomo if (iconv(cd, &istr, &ileft, &optr, &oleft) != (size_t)-1) { 788 9600 Nobutomo /* success: generate reset sequence */ 789 9600 Nobutomo if (iconv(cd, NULL, NULL, 790 9600 Nobutomo &optr, &oleft) == (size_t)-1 && errno == E2BIG) { 791 9600 Nobutomo continue; 792 9600 Nobutomo } 793 9600 Nobutomo break; 794 9600 Nobutomo } 795 9600 Nobutomo /* failed */ 796 9600 Nobutomo if (errno != E2BIG) { 797 9600 Nobutomo oleft = olen; 798 9600 Nobutomo (void) iconv(cd, NULL, NULL, &ostr, &oleft); 799 9600 Nobutomo xfree(ostr); 800 9600 Nobutomo *err_str = gettext("Codeset conversion failed"); 801 9600 Nobutomo return (NULL); 802 9600 Nobutomo } 803 9600 Nobutomo } 804 9600 Nobutomo olen = optr - ostr; 805 9600 Nobutomo optr = xmalloc(olen + 1); 806 9600 Nobutomo (void) memcpy(optr, ostr, olen); 807 9600 Nobutomo xfree(ostr); 808 2705 jp161948 809 9600 Nobutomo optr[olen] = '\0'; 810 9600 Nobutomo *lenp = olen; 811 2705 jp161948 812 9600 Nobutomo return (optr); 813 9600 Nobutomo } 814 2705 jp161948 815 9600 Nobutomo /* 816 9600 Nobutomo * A filter for output string. Control and unprintable characters 817 9600 Nobutomo * are converted into visible form (eg "\ooo"). 818 9600 Nobutomo */ 819 9600 Nobutomo char * 820 9600 Nobutomo g11n_filter_string(char *s) 821 9600 Nobutomo { 822 9600 Nobutomo int mb_cur_max = MB_CUR_MAX; 823 9600 Nobutomo int mblen, len; 824 9600 Nobutomo char *os = s; 825 9600 Nobutomo wchar_t wc; 826 9600 Nobutomo char *obuf, *op; 827 2705 jp161948 828 9600 Nobutomo /* all character may be converted into the form of \ooo */ 829 9600 Nobutomo obuf = op = xmalloc(strlen(s) * 4 + 1); 830 9600 Nobutomo 831 9600 Nobutomo while (*s != '\0') { 832 9600 Nobutomo mblen = mbtowc(&wc, s, mb_cur_max); 833 9600 Nobutomo if (mblen <= 0) { 834 9600 Nobutomo mblen = 1; 835 9600 Nobutomo wc = (unsigned char)*s; 836 9600 Nobutomo } 837 9600 Nobutomo if (!iswprint(wc) && 838 9600 Nobutomo wc != L'\n' && wc != L'\r' && wc != L'\t') { 839 9600 Nobutomo /* 840 9600 Nobutomo * control chars which need to be replaced 841 9600 Nobutomo * with safe character sequence. 842 9600 Nobutomo */ 843 9600 Nobutomo while (mblen != 0) { 844 9600 Nobutomo op += sprintf(op, "\\%03o", 845 9600 Nobutomo (unsigned char)*s++); 846 9600 Nobutomo mblen--; 847 9600 Nobutomo } 848 9600 Nobutomo } else { 849 9600 Nobutomo while (mblen != 0) { 850 9600 Nobutomo *op++ = *s++; 851 9600 Nobutomo mblen--; 852 2705 jp161948 } 853 0 stevel } 854 9600 Nobutomo } 855 9600 Nobutomo *op = '\0'; 856 9600 Nobutomo len = op - obuf + 1; 857 9600 Nobutomo op = xrealloc(os, len); 858 9600 Nobutomo (void) memcpy(op, obuf, len); 859 9600 Nobutomo xfree(obuf); 860 9600 Nobutomo return (op); 861 9600 Nobutomo } 862 0 stevel 863 9600 Nobutomo /* 864 9600 Nobutomo * Once we negotiated with a langtag, server need to map it to a system 865 9600 Nobutomo * locale. That is done based on the locale supported on the server side. 866 9600 Nobutomo * We know (with the locale supported on Solaris) how the langtag is 867 9600 Nobutomo * mapped to. However, from the client point of view, there is no way to 868 9600 Nobutomo * know exactly what locale(encoding) will be used. 869 9600 Nobutomo * 870 9600 Nobutomo * With the bug fix of SSH_BUG_STRING_ENCODING, it is guaranteed that the 871 9600 Nobutomo * UTF-8 characters always come over the wire, so it is no longer the problem 872 9600 Nobutomo * as long as both side has the bug fix. However if the server side doesn't 873 9600 Nobutomo * have the fix, client can't safely perform the code conversion since the 874 9600 Nobutomo * incoming character encoding is unknown. 875 9600 Nobutomo * 876 9600 Nobutomo * To alleviate this situation, we take an empirical approach to find 877 9600 Nobutomo * encoding from langtag. 878 9600 Nobutomo * 879 9600 Nobutomo * If langtag has a subtag, we can directly map the langtag to UTF-8 locale 880 9600 Nobutomo * (eg en-US can be mapped to en_US.UTF-8) with a few exceptions. 881 9600 Nobutomo * Certain xx_YY locales don't support UTF-8 encoding (probably due to lack 882 9600 Nobutomo * of L10N support ..). Those are: 883 9600 Nobutomo * 884 9600 Nobutomo * no_NO, no_NY, sr_SP, sr_YU 885 9600 Nobutomo * 886 9600 Nobutomo * They all use ISO8859-X encoding. 887 9600 Nobutomo * 888 9600 Nobutomo * For those "xx" langtags, some of them can be mapped to "xx.UTF-8", 889 9600 Nobutomo * but others cannot. So we need to use the "xx" as the locale name. 890 9600 Nobutomo * Those locales are: 891 9600 Nobutomo * 892 9600 Nobutomo * ar, ca, cs, da, et, fi, he, hu, ja, lt, lv, nl, no, pt, sh, th, tr 893 9600 Nobutomo * 894 9600 Nobutomo * Their encoding vary. They could be ISO8859-X or EUC or something else. 895 9600 Nobutomo * So we don't perform code conversion for these langtags. 896 9600 Nobutomo */ 897 9600 Nobutomo static const char *non_utf8_langtag[] = { 898 9600 Nobutomo "no-NO", "no-NY", "sr-SP", "sr-YU", 899 9600 Nobutomo "ar", "ca", "cs", "da", "et", "fi", "he", "hu", "ja", 900 9600 Nobutomo "lt", "lv", "nl", "no", "pt", "sh", "th", "tr", NULL}; 901 2705 jp161948 902 9600 Nobutomo void 903 9600 Nobutomo g11n_test_langtag(const char *lang, int server) 904 9600 Nobutomo { 905 9600 Nobutomo const char **lp; 906 9600 Nobutomo 907 9600 Nobutomo if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) { 908 9600 Nobutomo /* 909 9600 Nobutomo * We negotiated with real locale name (not lang tag). 910 9600 Nobutomo * We shouldn't expect UTF-8, thus shouldn't do code 911 9600 Nobutomo * conversion. 912 9600 Nobutomo */ 913 9600 Nobutomo datafellows |= SSH_BUG_STRING_ENCODING; 914 9600 Nobutomo return; 915 9600 Nobutomo } 916 9600 Nobutomo 917 9600 Nobutomo if (datafellows & SSH_BUG_STRING_ENCODING) { 918 9600 Nobutomo if (server) { 919 9600 Nobutomo /* 920 9600 Nobutomo * Whatever bug exists in the client side, server 921 9600 Nobutomo * side has nothing to do, since server has no way 922 9600 Nobutomo * to know what actual encoding is used on the client 923 9600 Nobutomo * side. For example, even if we negotiated with 924 9600 Nobutomo * en_US, client locale could be en_US.ISO8859-X or 925 9600 Nobutomo * en_US.UTF-8. 926 9600 Nobutomo */ 927 9600 Nobutomo return; 928 9600 Nobutomo } 929 9600 Nobutomo /* 930 9600 Nobutomo * We are on the client side. We'll check with known 931 9600 Nobutomo * locales to see if non-UTF8 characters could come in. 932 9600 Nobutomo */ 933 9600 Nobutomo for (lp = non_utf8_langtag; *lp != NULL; lp++) { 934 9600 Nobutomo if (strcmp(lang, *lp) == 0) 935 9600 Nobutomo break; 936 9600 Nobutomo } 937 9600 Nobutomo if (*lp == NULL) { 938 9600 Nobutomo debug2("Server is expected to use UTF-8 locale"); 939 9600 Nobutomo datafellows &= ~SSH_BUG_STRING_ENCODING; 940 9600 Nobutomo } else { 941 9600 Nobutomo /* 942 9600 Nobutomo * Server is expected to use non-UTF8 encoding. 943 9600 Nobutomo */ 944 9600 Nobutomo debug2("Enforcing no code conversion: %s", lang); 945 9600 Nobutomo } 946 9600 Nobutomo } 947 0 stevel } 948 5562 jp161948 949 5562 jp161948 /* 950 5562 jp161948 * Free all strings in the list and then free the list itself. We know that the 951 5562 jp161948 * list ends with a NULL pointer. 952 5562 jp161948 */ 953 5562 jp161948 void 954 5562 jp161948 g11n_freelist(char **list) 955 5562 jp161948 { 956 5562 jp161948 int i = 0; 957 5562 jp161948 958 5562 jp161948 while (list[i] != NULL) { 959 5562 jp161948 xfree(list[i]); 960 5562 jp161948 i++; 961 5562 jp161948 } 962 5562 jp161948 963 5562 jp161948 xfree(list); 964 5562 jp161948 } 965