Home | History | Annotate | Download | only in include
      1 /*
      2  * Copyright (C) 2008 by the Massachusetts Institute of Technology,
      3  * Cambridge, MA, USA.  All Rights Reserved.
      4  *
      5  * This software is being provided to you, the LICENSEE, by the
      6  * Massachusetts Institute of Technology (M.I.T.) under the following
      7  * license.  By obtaining, using and/or copying this software, you agree
      8  * that you have read, understood, and will comply with these terms and
      9  * conditions:
     10  *
     11  * Export of this software from the United States of America may
     12  * require a specific license from the United States Government.
     13  * It is the responsibility of any person or organization contemplating
     14  * export to obtain such a license before exporting.
     15  *
     16  * WITHIN THAT CONSTRAINT, permission to use, copy, modify and distribute
     17  * this software and its documentation for any purpose and without fee or
     18  * royalty is hereby granted, provided that you agree to comply with the
     19  * following copyright notice and statements, including the disclaimer, and
     20  * that the same appear on ALL copies of the software and documentation,
     21  * including modifications that you make for internal use or for
     22  * distribution:
     23  *
     24  * THIS SOFTWARE IS PROVIDED "AS IS", AND M.I.T. MAKES NO REPRESENTATIONS
     25  * OR WARRANTIES, EXPRESS OR IMPLIED.  By way of example, but not
     26  * limitation, M.I.T. MAKES NO REPRESENTATIONS OR WARRANTIES OF
     27  * MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF
     28  * THE LICENSED SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY
     29  * PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
     30  *
     31  * The name of the Massachusetts Institute of Technology or M.I.T. may NOT
     32  * be used in advertising or publicity pertaining to distribution of the
     33  * software.  Title to copyright in this software and any associated
     34  * documentation shall at all times remain with M.I.T., and USER agrees to
     35  * preserve same.
     36  *
     37  * Furthermore if you modify this software you must label
     38  * your software as modified software and not distribute it in such a
     39  * fashion that it might be confused with the original M.I.T. software.
     40  */
     41 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
     42  *
     43  * Copyright 1998-2008 The OpenLDAP Foundation.
     44  * All rights reserved.
     45  *
     46  * Redistribution and use in source and binary forms, with or without
     47  * modification, are permitted only as authorized by the OpenLDAP
     48  * Public License.
     49  *
     50  * A copy of this license is available in file LICENSE in the
     51  * top-level directory of the distribution or, alternatively, at
     52  * <http://www.OpenLDAP.org/license.html>.
     53  */
     54 /* This notice applies to changes, created by or for Novell, Inc.,
     55  * to preexisting works for which notices appear elsewhere in this file.
     56  *
     57  * Copyright (C) 2000 Novell, Inc. All Rights Reserved.
     58  *
     59  * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND TREATIES.
     60  * USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT TO VERSION
     61  * 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS AVAILABLE AT
     62  * HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE" IN THE
     63  * TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION OF THIS
     64  * WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP PUBLIC
     65  * LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT THE
     66  * PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY.
     67  */
     68 
     69 #ifndef K5_UTF8_H
     70 #define K5_UTF8_H
     71 
     72 #include "autoconf.h"
     73 
     74 #ifdef HAVE_SYS_TYPES_H
     75 #include <sys/types.h>
     76 #endif
     77 
     78 #ifdef HAVE_UNISTD_H
     79 #include <unistd.h>
     80 #endif
     81 
     82 #ifdef HAVE_STDLIB_H
     83 #include <stdlib.h>
     84 #endif
     85 
     86 #if INT_MAX == 0x7fff
     87 typedef	unsigned int	krb5_ucs2;
     88 #elif SHRT_MAX == 0x7fff
     89 typedef	unsigned short	krb5_ucs2;
     90 #else
     91 #error undefined 16 bit type
     92 #endif
     93 
     94 #if INT_MAX == 0x7fffffffL
     95 typedef int	krb5_ucs4;
     96 #elif LONG_MAX == 0x7fffffffL
     97 typedef long	krb5_ucs4;
     98 #elif SHRT_MAX == 0x7fffffffL
     99 typedef short	krb5_ucs4;
    100 #else
    101 #error: undefined 32 bit type
    102 #endif
    103 
    104 #define KRB5_MAX_UTF8_LEN   (sizeof(krb5_ucs2) * 3/2)
    105 
    106 int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out);
    107 size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf);
    108 
    109 int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out);
    110 size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf);
    111 
    112 int
    113 krb5int_ucs2s_to_utf8s(const krb5_ucs2 *ucs2s,
    114 		       char **utf8s,
    115 		       size_t *utf8slen);
    116 
    117 int
    118 krb5int_ucs2cs_to_utf8s(const krb5_ucs2 *ucs2s,
    119 			size_t ucs2slen,
    120 		        char **utf8s,
    121 		        size_t *utf8slen);
    122 
    123 int
    124 krb5int_ucs2les_to_utf8s(const unsigned char *ucs2les,
    125 			 char **utf8s,
    126 			 size_t *utf8slen);
    127 
    128 int
    129 krb5int_ucs2lecs_to_utf8s(const unsigned char *ucs2les,
    130 			  size_t ucs2leslen,
    131 			  char **utf8s,
    132 			  size_t *utf8slen);
    133 
    134 int
    135 krb5int_utf8s_to_ucs2s(const char *utf8s,
    136 		       krb5_ucs2 **ucs2s,
    137 		       size_t *ucs2chars);
    138 
    139 int
    140 krb5int_utf8cs_to_ucs2s(const char *utf8s,
    141 			size_t utf8slen,
    142 		        krb5_ucs2 **ucs2s,
    143 		        size_t *ucs2chars);
    144 
    145 int
    146 krb5int_utf8s_to_ucs2les(const char *utf8s,
    147 			 unsigned char **ucs2les,
    148 		         size_t *ucs2leslen);
    149 
    150 int
    151 krb5int_utf8cs_to_ucs2les(const char *utf8s,
    152 			  size_t utf8slen,
    153 		          unsigned char **ucs2les,
    154 			  size_t *ucs2leslen);
    155 
    156 /* returns the number of bytes in the UTF-8 string */
    157 size_t krb5int_utf8_bytes(const char *);
    158 /* returns the number of UTF-8 characters in the string */
    159 size_t krb5int_utf8_chars(const char *);
    160 /* returns the number of UTF-8 characters in the counted string */
    161 size_t krb5int_utf8c_chars(const char *, size_t);
    162 /* returns the length (in bytes) of the UTF-8 character */
    163 int krb5int_utf8_offset(const char *);
    164 /* returns the length (in bytes) indicated by the UTF-8 character */
    165 int krb5int_utf8_charlen(const char *);
    166 
    167 /* returns the length (in bytes) indicated by the UTF-8 character
    168  * also checks that shortest possible encoding was used
    169  */
    170 int krb5int_utf8_charlen2(const char *);
    171 
    172 /* copies a UTF-8 character and returning number of bytes copied */
    173 int krb5int_utf8_copy(char *, const char *);
    174 
    175 /* returns pointer of next UTF-8 character in string */
    176 char *krb5int_utf8_next( const char *);
    177 /* returns pointer of previous UTF-8 character in string */
    178 char *krb5int_utf8_prev( const char *);
    179 
    180 /* primitive ctype routines -- not aware of non-ascii characters */
    181 int krb5int_utf8_isascii( const char *);
    182 int krb5int_utf8_isalpha( const char *);
    183 int krb5int_utf8_isalnum( const char *);
    184 int krb5int_utf8_isdigit( const char *);
    185 int krb5int_utf8_isxdigit( const char *);
    186 int krb5int_utf8_isspace( const char *);
    187 
    188 /* span characters not in set, return bytes spanned */
    189 size_t krb5int_utf8_strcspn( const char* str, const char *set);
    190 /* span characters in set, return bytes spanned */
    191 size_t krb5int_utf8_strspn( const char* str, const char *set);
    192 /* return first occurance of character in string */
    193 char *krb5int_utf8_strchr( const char* str, const char *chr);
    194 /* return first character of set in string */
    195 char *krb5int_utf8_strpbrk( const char* str, const char *set);
    196 /* reentrant tokenizer */
    197 char *krb5int_utf8_strtok( char* sp, const char* sep, char **last);
    198 
    199 /* Optimizations */
    200 extern const char krb5int_utf8_lentab[128];
    201 extern const char krb5int_utf8_mintab[32];
    202 
    203 #define KRB5_UTF8_ISASCII(p) ( !(*(const unsigned char *)(p) & 0x80 ) )
    204 #define KRB5_UTF8_CHARLEN(p) ( KRB5_UTF8_ISASCII(p) \
    205 	? 1 : krb5int_utf8_lentab[*(const unsigned char *)(p) ^ 0x80] )
    206 
    207 /* This is like CHARLEN but additionally validates to make sure
    208  * the char used the shortest possible encoding.
    209  * 'l' is used to temporarily hold the result of CHARLEN.
    210  */
    211 #define KRB5_UTF8_CHARLEN2(p, l) ( ( ( l = KRB5_UTF8_CHARLEN( p )) < 3 || \
    212 	( krb5int_utf8_mintab[*(const unsigned char *)(p) & 0x1f] & (p)[1] ) ) ? \
    213 	l : 0 )
    214 
    215 #define KRB5_UTF8_OFFSET(p) ( KRB5_UTF8_ISASCII(p) \
    216 	? 1 : krb5int_utf8_offset((p)) )
    217 
    218 #define KRB5_UTF8_COPY(d,s) ( KRB5_UTF8_ISASCII(s) \
    219 	? (*(d) = *(s), 1) : krb5int_utf8_copy((d),(s)) )
    220 
    221 #define KRB5_UTF8_NEXT(p) (	KRB5_UTF8_ISASCII(p) \
    222 	? (char *)(p)+1 : krb5int_utf8_next((p)) )
    223 
    224 #define KRB5_UTF8_INCR(p) ((p) = KRB5_UTF8_NEXT(p))
    225 
    226 /* For symmetry */
    227 #define KRB5_UTF8_PREV(p) (krb5int_utf8_prev((p)))
    228 #define KRB5_UTF8_DECR(p) ((p)=KRB5_UTF8_PREV((p)))
    229 
    230 /*
    231  * these macros assume 'x' is an ASCII x
    232  * and assume the "C" locale
    233  */
    234 #define KRB5_ASCII(c)		(!((c) & 0x80))
    235 #define KRB5_SPACE(c)		((c) == ' ' || (c) == '\t' || (c) == '\n')
    236 #define KRB5_DIGIT(c)		((c) >= '0' && (c) <= '9')
    237 #define KRB5_LOWER(c)		((c) >= 'a' && (c) <= 'z')
    238 #define KRB5_UPPER(c)		((c) >= 'A' && (c) <= 'Z')
    239 #define KRB5_ALPHA(c)		(KRB5_LOWER(c) || KRB5_UPPER(c))
    240 #define KRB5_ALNUM(c)		(KRB5_ALPHA(c) || KRB5_DIGIT(c))
    241 
    242 #define KRB5_LDH(c)		(KRB5_ALNUM(c) || (c) == '-')
    243 
    244 #define KRB5_HEXLOWER(c)	((c) >= 'a' && (c) <= 'f')
    245 #define KRB5_HEXUPPER(c)	((c) >= 'A' && (c) <= 'F')
    246 #define KRB5_HEX(c)		(KRB5_DIGIT(c) || \
    247 				KRB5_HEXLOWER(c) || KRB5_HEXUPPER(c))
    248 
    249 #endif /* K5_UTF8_H */
    250