Go to the source code of this file.
Defines | |
#define | UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) |
#define | UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) |
#define | UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) |
#define | UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) |
#define | UTF16_GET_PAIR_VALUE(first, second) (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) |
#define | UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) |
Takes a supplementary code point (0x10000. | |
#define | UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) |
Takes a supplementary code point (0x10000. | |
#define | UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) |
alias for UTF_FIRST_SURROGATE | |
#define | UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) |
alias for UTF_SECOND_SURROGATE | |
#define | UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) |
#define | UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) |
#define | UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) |
#define | UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) |
#define | UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) |
#define | UTF16_MAX_CHAR_LENGTH 2 |
#define | UTF16_ARRAY_SIZE(size) (size) |
#define | UTF16_GET_CHAR_UNSAFE(s, i, c) |
#define | UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) |
#define | UTF16_NEXT_CHAR_UNSAFE(s, i, c) |
#define | UTF16_APPEND_CHAR_UNSAFE(s, i, c) |
#define | UTF16_FWD_1_UNSAFE(s, i) |
#define | UTF16_FWD_N_UNSAFE(s, i, n) |
#define | UTF16_SET_CHAR_START_UNSAFE(s, i) |
#define | UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) |
#define | UTF16_APPEND_CHAR_SAFE(s, i, length, c) |
#define | UTF16_FWD_1_SAFE(s, i, length) |
#define | UTF16_FWD_N_SAFE(s, i, length, n) |
#define | UTF16_SET_CHAR_START_SAFE(s, start, i) |
#define | UTF16_PREV_CHAR_UNSAFE(s, i, c) |
#define | UTF16_BACK_1_UNSAFE(s, i) |
#define | UTF16_BACK_N_UNSAFE(s, i, n) |
#define | UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) |
#define | UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) |
#define | UTF16_BACK_1_SAFE(s, start, i) |
#define | UTF16_BACK_N_SAFE(s, start, i, n) |
#define | UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) |
This file defines macros to deal with UTF-16 code units and code points. "Safe" macros check for length overruns and illegal sequences, and also for irregular sequences when the strict option is set. "Unsafe" macros are designed for maximum speed. utf16.h is included by utf.h after unicode/umachine.h and some common definitions.
Usage: ICU coding guidelines for if() statements should be followed when using these macros. Compound statements (curly braces {}) must be used for if-else-while... bodies and all macro statements should be terminated with semicolon.
|
Value: { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else if((uint32_t)(c)<=0x10ffff) { \ if((i)+1<(length)) { \ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ } else /* not enough space */ { \ (s)[(i)++]=UTF_ERROR_VALUE; \ } \ } else /* c>0x10ffff, write error value */ { \ (s)[(i)++]=UTF_ERROR_VALUE; \ } \ } |
|
Value: { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else { \ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ } \ } |
|
Value: { \
if(UTF_IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
--(i); \
} \
}
|
|
Value: { \
if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
--(i); \
} \
}
|
|
Value: { \
int32_t __N=(n); \
while(__N>0 && (i)>(start)) { \
UTF16_BACK_1_SAFE(s, start, i); \
--__N; \
} \
}
|
|
Value: { \
int32_t __N=(n); \
while(__N>0) { \
UTF16_BACK_1_UNSAFE(s, i); \
--__N; \
} \
}
|
|
Value: { \
if(UTF_IS_FIRST_SURROGATE((s)[(i)++]) && (i)<(length) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
++(i); \
} \
}
|
|
Value: { \
if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
++(i); \
} \
}
|
|
Value: { \
int32_t __N=(n); \
while(__N>0 && (i)<(length)) { \
UTF16_FWD_1_SAFE(s, i, length); \
--__N; \
} \
}
|
|
Value: { \
int32_t __N=(n); \
while(__N>0) { \
UTF16_FWD_1_UNSAFE(s, i); \
--__N; \
} \
}
|
|
Value: { \ (c)=(s)[i]; \ if(UTF_IS_SURROGATE(c)) { \ uint16_t __c2; \ if(UTF_IS_SURROGATE_FIRST(c)) { \ if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ } else if(strict) {\ /* unmatched first surrogate */ \ (c)=UTF_ERROR_VALUE; \ } \ } else { \ if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ } else if(strict) {\ /* unmatched second surrogate */ \ (c)=UTF_ERROR_VALUE; \ } \ } \ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ (c)=UTF_ERROR_VALUE; \ } \ } |
|
Value: { \ (c)=(s)[i]; \ if(UTF_IS_SURROGATE(c)) { \ if(UTF_IS_SURROGATE_FIRST(c)) { \ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ } else { \ (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ } \ } \ } |
|
Value: { \ (c)=(s)[(i)++]; \ if(UTF_IS_FIRST_SURROGATE(c)) { \ uint16_t __c2; \ if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ ++(i); \ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ } else if(strict) {\ /* unmatched first surrogate */ \ (c)=UTF_ERROR_VALUE; \ } \ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ /* unmatched second surrogate or other non-character */ \ (c)=UTF_ERROR_VALUE; \ } \ } |
|
Value: { \
(c)=(s)[(i)++]; \
if(UTF_IS_FIRST_SURROGATE(c)) { \
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
} \
}
|
|
Value: { \ (c)=(s)[--(i)]; \ if(UTF_IS_SECOND_SURROGATE(c)) { \ uint16_t __c2; \ if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ --(i); \ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ } else if(strict) {\ /* unmatched second surrogate */ \ (c)=UTF_ERROR_VALUE; \ } \ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ /* unmatched first surrogate or other non-character */ \ (c)=UTF_ERROR_VALUE; \ } \ } |
|
Value: { \
(c)=(s)[--(i)]; \
if(UTF_IS_SECOND_SURROGATE(c)) { \
(c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
} \
}
|
|
Value: { \
if((start)<(i) && (i)<(length) && UTF_IS_FIRST_SURROGATE((s)[(i)-1]) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
++(i); \
} \
}
|
|
Value: { \
if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
++(i); \
} \
}
|
|
Value: { \
if(UTF_IS_SECOND_SURROGATE((s)[i]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
--(i); \
} \
}
|
|
Value: { \
if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
--(i); \
} \
}
|
|
Takes a supplementary code point (0x10000. .0x10ffff) and computes the first surrogate (0xd800..0xdbff) for UTF-16 encoding. |
|
Takes a supplementary code point (0x10000. .0x10ffff) and computes the second surrogate (0xdc00..0xdfff) for UTF-16 encoding. |