utf8.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00034
00035
#ifndef __UTF_H__
00036
# include "unicode/utf.h"
00037
#endif
00038
00039
#ifndef __UTF8_H__
00040
#define __UTF8_H__
00041
00042
00043
00050
#ifdef U_UTF8_IMPL
00051
U_CAPI const uint8_t
00052
utf8_countTrailBytes[256];
00053
#else
00054
U_CFUNC U_IMPORT
const uint8_t
00055
utf8_countTrailBytes[256];
00056
#endif
00057
00062 #define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
00063
00068 #define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
00069
00074
U_CAPI UChar32 U_EXPORT2
00075
utf8_nextCharSafeBody(
const uint8_t *s, int32_t *pi, int32_t length,
UChar32 c,
UBool strict);
00076
00081
U_CAPI int32_t U_EXPORT2
00082
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length,
UChar32 c,
UBool *pIsError);
00083
00088
U_CAPI UChar32 U_EXPORT2
00089
utf8_prevCharSafeBody(
const uint8_t *s, int32_t start, int32_t *pi,
UChar32 c,
UBool strict);
00090
00095
U_CAPI int32_t U_EXPORT2
00096
utf8_back1SafeBody(
const uint8_t *s, int32_t start, int32_t i);
00097
00098
00099
00106 #define U8_IS_SINGLE(c) (((c)&0x80)==0)
00107
00114 #define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
00115
00122 #define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
00123
00131 #define U8_LENGTH(c) \
00132
((uint32_t)(c)<=0x7f ? 1 : \
00133
((uint32_t)(c)<=0x7ff ? 2 : \
00134
((uint32_t)(c)<=0xd7ff ? 3 : \
00135
((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
00136
((uint32_t)(c)<=0xffff ? 3 : 4)\
00137
) \
00138
) \
00139
) \
00140
)
00141
00147 #define U8_MAX_LENGTH 4
00148
00165 #define U8_GET_UNSAFE(s, i, c) { \
00166
int32_t __I=(int32_t)(i); \
00167
U8_SET_CP_START_UNSAFE(s, __I); \
00168
U8_NEXT_UNSAFE(s, __I, c); \
00169
}
00170
00189 #define U8_GET(s, start, i, length, c) { \
00190
int32_t __I=(int32_t)(i); \
00191
U8_SET_CP_START(s, start, __I); \
00192
U8_NEXT(s, __I, length, c); \
00193
}
00194
00195
00196
00214 #define U8_NEXT_UNSAFE(s, i, c) { \
00215
(c)=(s)[(i)++]; \
00216
if((uint8_t)((c)-0xc0)<0x35) { \
00217
uint8_t __count=U8_COUNT_TRAIL_BYTES(c); \
00218
U8_MASK_LEAD_BYTE(c, __count); \
00219
switch(__count) { \
00220
\
00221 case 3: \
00222 (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00223 case 2: \
00224 (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00225 case 1: \
00226 (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00227 \
00228 break; \
00229 } \
00230 } \
00231 }
00232
00251 #define U8_NEXT(s, i, length, c) { \
00252
(c)=(s)[(i)++]; \
00253
if(((uint8_t)(c))>=0x80) { \
00254
if(U8_IS_LEAD(c)) { \
00255
(c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -1); \
00256
} else { \
00257
(c)=U_SENTINEL; \
00258
} \
00259
} \
00260
}
00261
00275 #define U8_APPEND_UNSAFE(s, i, c) { \
00276
if((uint32_t)(c)<=0x7f) { \
00277
(s)[(i)++]=(uint8_t)(c); \
00278
} else { \
00279
if((uint32_t)(c)<=0x7ff) { \
00280
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
00281
} else { \
00282
if((uint32_t)(c)<=0xffff) { \
00283
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
00284
} else { \
00285
(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
00286
(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
00287
} \
00288
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
00289
} \
00290
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
00291
} \
00292
}
00293
00311 #define U8_APPEND(s, i, length, c, isError) { \
00312
if((uint32_t)(c)<=0x7f) { \
00313
(s)[(i)++]=(uint8_t)(c); \
00314
} else { \
00315
(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, &(isError)); \
00316
} \
00317
}
00318
00329 #define U8_FWD_1_UNSAFE(s, i) { \
00330
(i)+=1+U8_COUNT_TRAIL_BYTES((s)[i]); \
00331
}
00332
00344 #define U8_FWD_1(s, i, length) { \
00345
uint8_t __b=(s)[(i)++]; \
00346
if(U8_IS_LEAD(__b)) { \
00347
uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
00348
if((i)+__count>(length)) { \
00349
__count=(uint8_t)((length)-(i)); \
00350
} \
00351
while(__count>0 && U8_IS_TRAIL((s)[i])) { \
00352
++(i); \
00353
--__count; \
00354
} \
00355
} \
00356
}
00357
00370 #define U8_FWD_N_UNSAFE(s, i, n) { \
00371
int32_t __N=(n); \
00372
while(__N>0) { \
00373
U8_FWD_1_UNSAFE(s, i); \
00374
--__N; \
00375
} \
00376
}
00377
00391 #define U8_FWD_N(s, i, length, n) { \
00392
int32_t __N=(n); \
00393
while(__N>0 && (i)<(length)) { \
00394
U8_FWD_1(s, i, length); \
00395
--__N; \
00396
} \
00397
}
00398
00412 #define U8_SET_CP_START_UNSAFE(s, i) { \
00413
while(U8_IS_TRAIL((s)[i])) { --(i); } \
00414
}
00415
00430 #define U8_SET_CP_START(s, start, i) { \
00431
if(U8_IS_TRAIL((s)[(i)])) { \
00432
(i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
00433
} \
00434
}
00435
00436
00437
00457 #define U8_PREV_UNSAFE(s, i, c) { \
00458
(c)=(s)[--(i)]; \
00459
if(U8_IS_TRAIL(c)) { \
00460
uint8_t __b, __count=1, __shift=6; \
00461
\
00462
\
00463 (c)&=0x3f; \
00464 for(;;) { \
00465 __b=(s)[--(i)]; \
00466 if(__b>=0xc0) { \
00467 U8_MASK_LEAD_BYTE(__b, __count); \
00468 (c)|=(UChar32)__b<<__shift; \
00469 break; \
00470 } else { \
00471 (c)|=(UChar32)(__b&0x3f)<<__shift; \
00472 ++__count; \
00473 __shift+=6; \
00474 } \
00475 } \
00476 } \
00477 }
00478
00499 #define U8_PREV(s, start, i, c) { \
00500
(c)=(s)[--(i)]; \
00501
if((c)>=0x80) { \
00502
if((c)<=0xbf) { \
00503
(c)=utf8_prevCharSafeBody(s, start, &(i), c, -1); \
00504
} else { \
00505
(c)=U_SENTINEL; \
00506
} \
00507
} \
00508
}
00509
00521 #define U8_BACK_1_UNSAFE(s, i) { \
00522
while(U8_IS_TRAIL((s)[--(i)])) {} \
00523
}
00524
00537 #define U8_BACK_1(s, start, i) { \
00538
if(U8_IS_TRAIL((s)[--(i)])) { \
00539
(i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
00540
} \
00541
}
00542
00556 #define U8_BACK_N_UNSAFE(s, i, n) { \
00557
int32_t __N=(n); \
00558
while(__N>0) { \
00559
U8_BACK_1_UNSAFE(s, i); \
00560
--__N; \
00561
} \
00562
}
00563
00578 #define U8_BACK_N(s, start, i, n) { \
00579
int32_t __N=(n); \
00580
while(__N>0 && (i)>(start)) { \
00581
U8_BACK_1(s, start, i); \
00582
--__N; \
00583
} \
00584
}
00585
00599 #define U8_SET_CP_LIMIT_UNSAFE(s, i) { \
00600
U8_BACK_1_UNSAFE(s, i); \
00601
U8_FWD_1_UNSAFE(s, i); \
00602
}
00603
00619 #define U8_SET_CP_LIMIT(s, start, i, length) { \
00620
if((start)<(i) && (i)<(length)) { \
00621
U8_BACK_1(s, start, i); \
00622
U8_FWD_1(s, i, length); \
00623
} \
00624
}
00625
00626
#endif
Generated on Wed Sep 15 17:18:10 2004 for ICU 2.8 by
1.3.8