1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Case conversion functions for strings. Originally this module only 30 * dealt with ASCII strings. It has been updated to support European 31 * character set characters. The current implementation is based on 32 * code page table lookup rather than simple character range checks. 33 */ 34 35 #ifdef _KERNEL 36 #include <sys/types.h> 37 #include <sys/sunddi.h> 38 #else 39 #include <stdio.h> 40 #include <string.h> 41 #endif 42 #include <smbsrv/ctype.h> 43 #include <smbsrv/codepage.h> 44 #include <smbsrv/cp_cyrillic.h> 45 #include <smbsrv/cp_latin1.h> 46 #include <smbsrv/cp_latin2.h> 47 #include <smbsrv/cp_latin3.h> 48 #include <smbsrv/cp_latin4.h> 49 #include <smbsrv/cp_latin5.h> 50 #include <smbsrv/cp_latin6.h> 51 #include <smbsrv/cp_usascii.h> 52 53 /* 54 * Global pointer to the current code page. This is 55 * defaulted to a standard ASCII table. 56 */ 57 static codepage_t *current_codepage = usascii_codepage; 58 59 /* 60 * A flag indicating whether the codepage being used is ASCII 61 * When this flag is set, string opeartions can go faster. 62 */ 63 static int is_unicode = 0; 64 65 /* 66 * codepage_isupper 67 * 68 * Determine whether or not a character is an uppercase character. 69 * This function operates on the current codepage table. Returns 70 * non-zero if the character is uppercase. Otherwise returns zero. 71 */ 72 int 73 codepage_isupper(int c) 74 { 75 unsigned short mask = is_unicode ? 0xffff : 0xff; 76 77 return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER); 78 } 79 80 81 /* 82 * codepage_islower 83 * 84 * Determine whether or not a character is an lowercase character. 85 * This function operates on the current codepage table. Returns 86 * non-zero if the character is lowercase. Otherwise returns zero. 87 */ 88 int 89 codepage_islower(int c) 90 { 91 unsigned short mask = is_unicode ? 0xffff : 0xff; 92 93 return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER); 94 } 95 96 97 /* 98 * codepage_toupper 99 * 100 * Convert individual characters to their uppercase equivalent value. 101 * If the specified character is lowercase, the uppercase value will 102 * be returned. Otherwise the original value will be returned. 103 */ 104 int 105 codepage_toupper(int c) 106 { 107 unsigned short mask = is_unicode ? 0xffff : 0xff; 108 109 return (current_codepage[c & mask].upper); 110 } 111 112 113 /* 114 * codepage_tolower 115 * 116 * Convert individual characters to their lowercase equivalent value. 117 * If the specified character is uppercase, the lowercase value will 118 * be returned. Otherwise the original value will be returned. 119 */ 120 int 121 codepage_tolower(int c) 122 { 123 unsigned short mask = is_unicode ? 0xffff : 0xff; 124 125 return (current_codepage[c & mask].lower); 126 } 127 128 129 /* 130 * strupr 131 * 132 * Convert a string to uppercase using the appropriate codepage. The 133 * string is converted in place. A pointer to the string is returned. 134 * There is an assumption here that uppercase and lowercase values 135 * always result encode to the same length. 136 */ 137 char * 138 utf8_strupr(char *s) 139 { 140 mts_wchar_t c; 141 char *p = s; 142 143 while (*p) { 144 if (mts_isascii(*p)) { 145 *p = codepage_toupper(*p); 146 p++; 147 } else { 148 if (mts_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 149 return (0); 150 151 if (c == 0) 152 break; 153 154 c = codepage_toupper(c); 155 p += mts_wctomb(p, c); 156 } 157 } 158 159 return (s); 160 } 161 162 163 /* 164 * strlwr 165 * 166 * Convert a string to lowercase using the appropriate codepage. The 167 * string is converted in place. A pointer to the string is returned. 168 * There is an assumption here that uppercase and lowercase values 169 * always result encode to the same length. 170 */ 171 char * 172 utf8_strlwr(char *s) 173 { 174 mts_wchar_t c; 175 char *p = s; 176 177 while (*p) { 178 if (mts_isascii(*p)) { 179 *p = codepage_tolower(*p); 180 p++; 181 } else { 182 if (mts_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 183 return (0); 184 185 if (c == 0) 186 break; 187 188 c = codepage_tolower(c); 189 p += mts_wctomb(p, c); 190 } 191 } 192 193 return (s); 194 } 195 196 197 /* 198 * isstrlwr 199 * 200 * Returns 1 if string contains NO uppercase chars 0 otherwise. However, 201 * -1 is returned if "s" is not a valid multi-byte string. 202 */ 203 int 204 utf8_isstrlwr(const char *s) 205 { 206 mts_wchar_t c; 207 int n; 208 const char *p = s; 209 210 while (*p) { 211 if (mts_isascii(*p) && codepage_isupper(*p)) 212 return (0); 213 else { 214 if ((n = mts_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 215 return (-1); 216 217 if (c == 0) 218 break; 219 220 if (codepage_isupper(c)) 221 return (0); 222 223 p += n; 224 } 225 } 226 227 return (1); 228 } 229 230 231 /* 232 * isstrupr 233 * 234 * Returns 1 if string contains NO lowercase chars 0 otherwise. However, 235 * -1 is returned if "s" is not a valid multi-byte string. 236 */ 237 int 238 utf8_isstrupr(const char *s) 239 { 240 mts_wchar_t c; 241 int n; 242 const char *p = s; 243 244 while (*p) { 245 if (mts_isascii(*p) && codepage_islower(*p)) 246 return (0); 247 else { 248 if ((n = mts_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 249 return (-1); 250 251 if (c == 0) 252 break; 253 254 if (codepage_islower(c)) 255 return (0); 256 257 p += n; 258 } 259 } 260 261 return (1); 262 } 263 264 265 /* 266 * strcasecmp 267 * 268 * Compare the null-terminated strings s1 and s2 and return an integer 269 * greater than, equal to, or less than 0, according as s1 is lexico 270 * graphically greater than, equal to, or less than s2 after translation 271 * of each corresponding character to lowercase. The strings themselves 272 * are not modified. 273 * 274 * Out: 0 if strings are equal 275 * < 0 if first string < second string 276 * > 0 if first string > second string 277 */ 278 int 279 utf8_strcasecmp(const char *s1, const char *s2) 280 { 281 mts_wchar_t c1, c2; 282 int n1, n2; 283 const char *p1 = s1; 284 const char *p2 = s2; 285 286 for (;;) { 287 if (mts_isascii(*p1)) 288 c1 = *p1++; 289 else { 290 if ((n1 = mts_mbtowc(&c1, p1, MTS_MB_CHAR_MAX)) < 0) 291 return (-1); 292 p1 += n1; 293 } 294 295 if (mts_isascii(*p2)) 296 c2 = *p2++; 297 else { 298 if ((n2 = mts_mbtowc(&c2, p2, MTS_MB_CHAR_MAX)) < 0) 299 return (1); 300 p2 += n2; 301 } 302 303 if (c1 == 0 || c2 == 0) 304 break; 305 306 if (c1 == c2) 307 continue; 308 309 c1 = codepage_tolower(c1); 310 c2 = codepage_tolower(c2); 311 312 if (c1 != c2) 313 break; 314 } 315 316 return ((int)c1 - (int)c2); 317 } 318 319 320 /* 321 * strncasecmp 322 * 323 * Compare two null-terminated strings, s1 and s2, of at most len 324 * characters and return an int greater than, equal to, or less than 0, 325 * dependent on whether s1 is lexicographically greater than, equal to, 326 * or less than s2 after translation of each corresponding character to 327 * lowercase. The original strings are not modified. 328 * 329 * Out: 0 if strings are equal 330 * < 0 if first string < second string 331 * > 0 if first string > second string 332 */ 333 int 334 utf8_strncasecmp(const char *s1, const char *s2, int len) 335 { 336 mts_wchar_t c1, c2; 337 int n1, n2; 338 const char *p1 = s1; 339 const char *p2 = s2; 340 341 if (len <= 0) 342 return (0); 343 344 while (len--) { 345 if (mts_isascii(*p1)) 346 c1 = *p1++; 347 else { 348 if ((n1 = mts_mbtowc(&c1, p1, MTS_MB_CHAR_MAX)) < 0) 349 return (-1); 350 p1 += n1; 351 } 352 353 if (mts_isascii(*p2)) 354 c2 = *p2++; 355 else { 356 if ((n2 = mts_mbtowc(&c2, p2, MTS_MB_CHAR_MAX)) < 0) 357 return (1); 358 p2 += n2; 359 } 360 361 if (c1 == 0 || c2 == 0) 362 break; 363 364 if (c1 == c2) 365 continue; 366 367 c1 = codepage_tolower(c1); 368 c2 = codepage_tolower(c2); 369 370 if (c1 != c2) 371 break; 372 } 373 374 return ((int)c1 - (int)c2); 375 } 376 377 378 379 int 380 utf8_isstrascii(const char *s) 381 { 382 while (*s) { 383 if (mts_isascii(*s) == 0) 384 return (0); 385 s++; 386 } 387 return (1); 388 } 389