Home | History | Annotate | Download | only in smbsrv
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 /*
     29  * Case conversion functions for strings. Originally this module only
     30  * dealt with ASCII strings. It has been updated to support European
     31  * character set characters. The current implementation is based on
     32  * code page table lookup rather than simple character range checks.
     33  */
     34 
     35 #ifdef _KERNEL
     36 #include <sys/types.h>
     37 #include <sys/sunddi.h>
     38 #else
     39 #include <stdio.h>
     40 #include <string.h>
     41 #endif
     42 #include <smbsrv/ctype.h>
     43 #include <smbsrv/codepage.h>
     44 #include <smbsrv/cp_cyrillic.h>
     45 #include <smbsrv/cp_latin1.h>
     46 #include <smbsrv/cp_latin2.h>
     47 #include <smbsrv/cp_latin3.h>
     48 #include <smbsrv/cp_latin4.h>
     49 #include <smbsrv/cp_latin5.h>
     50 #include <smbsrv/cp_latin6.h>
     51 #include <smbsrv/cp_usascii.h>
     52 
     53 /*
     54  * Global pointer to the current code page. This is
     55  * defaulted to a standard ASCII table.
     56  */
     57 static codepage_t *current_codepage = usascii_codepage;
     58 
     59 /*
     60  * A flag indicating whether the codepage being used is ASCII
     61  * When this flag is set, string opeartions can go faster.
     62  */
     63 static int is_unicode = 0;
     64 
     65 /*
     66  * codepage_isupper
     67  *
     68  * Determine whether or not a character is an uppercase character.
     69  * This function operates on the current codepage table. Returns
     70  * non-zero if the character is uppercase. Otherwise returns zero.
     71  */
     72 int
     73 codepage_isupper(int c)
     74 {
     75 	unsigned short mask = is_unicode ? 0xffff : 0xff;
     76 
     77 	return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
     78 }
     79 
     80 
     81 /*
     82  * codepage_islower
     83  *
     84  * Determine whether or not a character is an lowercase character.
     85  * This function operates on the current codepage table. Returns
     86  * non-zero if the character is lowercase. Otherwise returns zero.
     87  */
     88 int
     89 codepage_islower(int c)
     90 {
     91 	unsigned short mask = is_unicode ? 0xffff : 0xff;
     92 
     93 	return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
     94 }
     95 
     96 
     97 /*
     98  * codepage_toupper
     99  *
    100  * Convert individual characters to their uppercase equivalent value.
    101  * If the specified character is lowercase, the uppercase value will
    102  * be returned. Otherwise the original value will be returned.
    103  */
    104 int
    105 codepage_toupper(int c)
    106 {
    107 	unsigned short mask = is_unicode ? 0xffff : 0xff;
    108 
    109 	return (current_codepage[c & mask].upper);
    110 }
    111 
    112 
    113 /*
    114  * codepage_tolower
    115  *
    116  * Convert individual characters to their lowercase equivalent value.
    117  * If the specified character is uppercase, the lowercase value will
    118  * be returned. Otherwise the original value will be returned.
    119  */
    120 int
    121 codepage_tolower(int c)
    122 {
    123 	unsigned short mask = is_unicode ? 0xffff : 0xff;
    124 
    125 	return (current_codepage[c & mask].lower);
    126 }
    127 
    128 
    129 /*
    130  * strupr
    131  *
    132  * Convert a string to uppercase using the appropriate codepage. The
    133  * string is converted in place. A pointer to the string is returned.
    134  * There is an assumption here that uppercase and lowercase values
    135  * always result encode to the same length.
    136  */
    137 char *
    138 utf8_strupr(char *s)
    139 {
    140 	mts_wchar_t c;
    141 	char *p = s;
    142 
    143 	while (*p) {
    144 		if (mts_isascii(*p)) {
    145 			*p = codepage_toupper(*p);
    146 			p++;
    147 		} else {
    148 			if (mts_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
    149 				return (0);
    150 
    151 			if (c == 0)
    152 				break;
    153 
    154 			c = codepage_toupper(c);
    155 			p += mts_wctomb(p, c);
    156 		}
    157 	}
    158 
    159 	return (s);
    160 }
    161 
    162 
    163 /*
    164  * strlwr
    165  *
    166  * Convert a string to lowercase using the appropriate codepage. The
    167  * string is converted in place. A pointer to the string is returned.
    168  * There is an assumption here that uppercase and lowercase values
    169  * always result encode to the same length.
    170  */
    171 char *
    172 utf8_strlwr(char *s)
    173 {
    174 	mts_wchar_t c;
    175 	char *p = s;
    176 
    177 	while (*p) {
    178 		if (mts_isascii(*p)) {
    179 			*p = codepage_tolower(*p);
    180 			p++;
    181 		} else {
    182 			if (mts_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
    183 				return (0);
    184 
    185 			if (c == 0)
    186 				break;
    187 
    188 			c = codepage_tolower(c);
    189 			p += mts_wctomb(p, c);
    190 		}
    191 	}
    192 
    193 	return (s);
    194 }
    195 
    196 
    197 /*
    198  * isstrlwr
    199  *
    200  * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
    201  * -1 is returned if "s" is not a valid multi-byte string.
    202  */
    203 int
    204 utf8_isstrlwr(const char *s)
    205 {
    206 	mts_wchar_t c;
    207 	int n;
    208 	const char *p = s;
    209 
    210 	while (*p) {
    211 		if (mts_isascii(*p) && codepage_isupper(*p))
    212 			return (0);
    213 		else {
    214 			if ((n = mts_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
    215 				return (-1);
    216 
    217 			if (c == 0)
    218 				break;
    219 
    220 			if (codepage_isupper(c))
    221 				return (0);
    222 
    223 			p += n;
    224 		}
    225 	}
    226 
    227 	return (1);
    228 }
    229 
    230 
    231 /*
    232  * isstrupr
    233  *
    234  * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
    235  * -1 is returned if "s" is not a valid multi-byte string.
    236  */
    237 int
    238 utf8_isstrupr(const char *s)
    239 {
    240 	mts_wchar_t c;
    241 	int n;
    242 	const char *p = s;
    243 
    244 	while (*p) {
    245 		if (mts_isascii(*p) && codepage_islower(*p))
    246 			return (0);
    247 		else {
    248 			if ((n = mts_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
    249 				return (-1);
    250 
    251 			if (c == 0)
    252 				break;
    253 
    254 			if (codepage_islower(c))
    255 				return (0);
    256 
    257 			p += n;
    258 		}
    259 	}
    260 
    261 	return (1);
    262 }
    263 
    264 
    265 /*
    266  * strcasecmp
    267  *
    268  * Compare the null-terminated strings s1 and s2 and return an integer
    269  * greater than, equal to, or less than 0, according as s1 is lexico
    270  * graphically greater than, equal to, or less than s2 after translation
    271  * of each corresponding character to lowercase. The strings themselves
    272  * are not modified.
    273  *
    274  * Out:    0 if strings are equal
    275  *       < 0 if first string < second string
    276  *       > 0 if first string > second string
    277  */
    278 int
    279 utf8_strcasecmp(const char *s1, const char *s2)
    280 {
    281 	mts_wchar_t c1, c2;
    282 	int n1, n2;
    283 	const char *p1 = s1;
    284 	const char *p2 = s2;
    285 
    286 	for (;;) {
    287 		if (mts_isascii(*p1))
    288 			c1 = *p1++;
    289 		else {
    290 			if ((n1 = mts_mbtowc(&c1, p1, MTS_MB_CHAR_MAX)) < 0)
    291 				return (-1);
    292 			p1 += n1;
    293 		}
    294 
    295 		if (mts_isascii(*p2))
    296 			c2 = *p2++;
    297 		else {
    298 			if ((n2 = mts_mbtowc(&c2, p2, MTS_MB_CHAR_MAX)) < 0)
    299 				return (1);
    300 			p2 += n2;
    301 		}
    302 
    303 		if (c1 == 0 || c2 == 0)
    304 			break;
    305 
    306 		if (c1 == c2)
    307 			continue;
    308 
    309 		c1 = codepage_tolower(c1);
    310 		c2 = codepage_tolower(c2);
    311 
    312 		if (c1 != c2)
    313 			break;
    314 	}
    315 
    316 	return ((int)c1 - (int)c2);
    317 }
    318 
    319 
    320 /*
    321  * strncasecmp
    322  *
    323  * Compare two null-terminated strings, s1 and s2, of at most len
    324  * characters and return an int greater than, equal to, or less than 0,
    325  * dependent on whether s1 is lexicographically greater than, equal to,
    326  * or less than s2 after translation of each corresponding character to
    327  * lowercase. The original strings are not modified.
    328  *
    329  * Out:    0 if strings are equal
    330  *       < 0 if first string < second string
    331  *       > 0 if first string > second string
    332  */
    333 int
    334 utf8_strncasecmp(const char *s1, const char *s2, int len)
    335 {
    336 	mts_wchar_t c1, c2;
    337 	int n1, n2;
    338 	const char *p1 = s1;
    339 	const char *p2 = s2;
    340 
    341 	if (len <= 0)
    342 		return (0);
    343 
    344 	while (len--) {
    345 		if (mts_isascii(*p1))
    346 			c1 = *p1++;
    347 		else {
    348 			if ((n1 = mts_mbtowc(&c1, p1, MTS_MB_CHAR_MAX)) < 0)
    349 				return (-1);
    350 			p1 += n1;
    351 		}
    352 
    353 		if (mts_isascii(*p2))
    354 			c2 = *p2++;
    355 		else {
    356 			if ((n2 = mts_mbtowc(&c2, p2, MTS_MB_CHAR_MAX)) < 0)
    357 				return (1);
    358 			p2 += n2;
    359 		}
    360 
    361 		if (c1 == 0 || c2 == 0)
    362 			break;
    363 
    364 		if (c1 == c2)
    365 			continue;
    366 
    367 		c1 = codepage_tolower(c1);
    368 		c2 = codepage_tolower(c2);
    369 
    370 		if (c1 != c2)
    371 			break;
    372 	}
    373 
    374 	return ((int)c1 - (int)c2);
    375 }
    376 
    377 
    378 
    379 int
    380 utf8_isstrascii(const char *s)
    381 {
    382 	while (*s) {
    383 		if (mts_isascii(*s) == 0)
    384 			return (0);
    385 		s++;
    386 	}
    387 	return (1);
    388 }
    389