00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifdef HAVE_CONFIG_H
00026 # include <config.h>
00027 #endif
00028
00029 #define DISABLE_DEBUGLOG
00030
00031
00032 #include "text.h"
00033 #include <stdlib.h>
00034 #include <assert.h>
00035 #include <string.h>
00036 #include <errno.h>
00037 #include <ctype.h>
00038 #ifdef HAVE_LOCALE_H
00039 # include <locale.h>
00040 #endif
00041
00042 #ifndef ICONV_CONST
00043 # define ICONV_CONST
00044 #endif
00045
00046 #ifdef HAVE_ICONV_H
00047 # include <iconv.h>
00048 #endif
00049
00050
00051 #include <gwenhywfar/gwenhywfarapi.h>
00052 #include <gwenhywfar/debug.h>
00053 #include <gwenhywfar/stringlist.h>
00054
00055
00056
00057 typedef struct {
00058 int character;
00059 const char *replace;
00060 } GWEN_TEXT_ESCAPE_ENTRY;
00061
00062 static const GWEN_TEXT_ESCAPE_ENTRY gwen_text__xml_escape_chars[]= {
00063 {'&', "&"},
00064 {'<', "<"},
00065 {'>', ">"},
00066 {'\'', "'"},
00067 {'\"', """},
00068 {0, 0}
00069 };
00070
00071
00072
00073 char *GWEN_Text_GetWord(const char *src,
00074 const char *delims,
00075 char *buffer,
00076 unsigned int maxsize,
00077 uint32_t flags,
00078 const char **next){
00079 unsigned int size;
00080 int lastWasBlank;
00081 int lastBlankPos;
00082 int insideQuotes;
00083 int lastWasEscape;
00084
00085 assert(maxsize);
00086
00087
00088 if (flags & GWEN_TEXT_FLAGS_DEL_LEADING_BLANKS) {
00089 while(*src && (unsigned char)(*src)<33)
00090 src++;
00091 }
00092
00093
00094 size=0;
00095 lastWasBlank=0;
00096 lastBlankPos=-1;
00097 lastWasEscape=0;
00098 insideQuotes=0;
00099
00100 if (*src=='"') {
00101 insideQuotes=1;
00102 if (flags & GWEN_TEXT_FLAGS_DEL_QUOTES)
00103 src++;
00104 }
00105
00106 while(*src && size<(maxsize-1)) {
00107 if (lastWasEscape) {
00108 buffer[size]=*src;
00109 size++;
00110 lastWasEscape=0;
00111 lastWasBlank=0;
00112 lastBlankPos=-1;
00113 }
00114 else {
00115 if (*src=='\\' && (flags & GWEN_TEXT_FLAGS_CHECK_BACKSLASH)) {
00116 lastWasEscape=1;
00117 lastWasBlank=0;
00118 lastBlankPos=-1;
00119 }
00120 else {
00121 if (!insideQuotes && strchr(delims, *src)!=0)
00122 break;
00123 if (*src=='"') {
00124 if (insideQuotes) {
00125 insideQuotes=0;
00126 src++;
00127 break;
00128 }
00129 else {
00130 DBG_DEBUG(GWEN_LOGDOMAIN,
00131 "Found a closing \" without an opening one "
00132 "(consider using a backslash to escape)");
00133 return 0;
00134 }
00135 }
00136
00137
00138 if (insideQuotes ||
00139 !lastWasBlank ||
00140 (lastWasBlank &&
00141 !(flags & GWEN_TEXT_FLAGS_DEL_MULTIPLE_BLANKS))) {
00142
00143
00144
00145 buffer[size]=*src;
00146 size++;
00147 }
00148
00149 if (isspace((int)((unsigned char)*src)) && !lastWasEscape) {
00150 lastWasBlank=1;
00151 lastBlankPos=size;
00152 }
00153 else {
00154 lastWasBlank=0;
00155 lastBlankPos=-1;
00156 }
00157 }
00158 }
00159
00160 src++;
00161 }
00162
00163
00164 buffer[size]=0;
00165
00166 if (insideQuotes) {
00167 DBG_DEBUG(GWEN_LOGDOMAIN, "Missing \" after word");
00168 return 0;
00169 }
00170
00171 if (flags & GWEN_TEXT_FLAGS_NEED_DELIMITER) {
00172 if (*src) {
00173 if (strchr(delims, *src)==0) {
00174 DBG_ERROR(GWEN_LOGDOMAIN, "No delimiter found within specified length");
00175 return 0;
00176 }
00177 }
00178 else {
00179 if (!(flags & GWEN_TEXT_FLAGS_NULL_IS_DELIMITER)) {
00180 DBG_ERROR(GWEN_LOGDOMAIN, "String ends without delimiter");
00181 return 0;
00182 }
00183 }
00184 }
00185
00186
00187 if (flags & GWEN_TEXT_FLAGS_DEL_TRAILING_BLANKS) {
00188 if (lastBlankPos!=-1)
00189 buffer[lastBlankPos]=0;
00190 }
00191
00192 *next=src;
00193 return buffer;
00194 }
00195
00196
00197
00198 int GWEN_Text_GetWordToBuffer(const char *src,
00199 const char *delims,
00200 GWEN_BUFFER *buf,
00201 uint32_t flags,
00202 const char **next){
00203 const char *savedSrc=src;
00204 int lastWasBlank;
00205 int lastBlankPos;
00206 int insideQuotes;
00207 int lastWasEscape;
00208
00209
00210 if (flags & GWEN_TEXT_FLAGS_DEL_LEADING_BLANKS) {
00211 while(*src && (unsigned char)(*src)<33) {
00212 if (strchr(delims, *src)) {
00213 *next=src;
00214 return 0;
00215 }
00216 src++;
00217 }
00218 }
00219
00220
00221 lastWasBlank=0;
00222 lastBlankPos=-1;
00223 lastWasEscape=0;
00224 insideQuotes=0;
00225
00226 if (*src=='"') {
00227 insideQuotes=1;
00228 if (flags & GWEN_TEXT_FLAGS_DEL_QUOTES)
00229 src++;
00230 }
00231
00232 while(*src) {
00233 if (lastWasEscape) {
00234 GWEN_Buffer_AppendByte(buf, *src);
00235 lastWasEscape=0;
00236 lastWasBlank=0;
00237 lastBlankPos=-1;
00238 }
00239 else {
00240 if (*src=='\\' && (flags & GWEN_TEXT_FLAGS_CHECK_BACKSLASH)) {
00241 lastWasEscape=1;
00242 lastWasBlank=0;
00243 lastBlankPos=-1;
00244 }
00245 else {
00246 if (!insideQuotes && strchr(delims, *src)!=0)
00247 break;
00248 if (*src=='"') {
00249 if (insideQuotes) {
00250 insideQuotes=0;
00251 src++;
00252 break;
00253 }
00254 else {
00255 DBG_ERROR(GWEN_LOGDOMAIN,
00256 "Found a closing \" without an opening one "
00257 "(consider using a backslash to escape)");
00258 return -1;
00259 }
00260 }
00261
00262
00263 if (insideQuotes ||
00264 !lastWasBlank ||
00265 (lastWasBlank &&
00266 !(flags & GWEN_TEXT_FLAGS_DEL_MULTIPLE_BLANKS))) {
00267
00268
00269
00270 GWEN_Buffer_AppendByte(buf, *src);
00271 }
00272
00273
00274 if (!lastWasEscape && *((unsigned char*)src)<33) {
00275 lastWasBlank=1;
00276 lastBlankPos=GWEN_Buffer_GetPos(buf);
00277 }
00278 else {
00279 lastWasBlank=0;
00280 lastBlankPos=-1;
00281 }
00282 }
00283 }
00284
00285 src++;
00286 }
00287
00288 if (insideQuotes) {
00289 DBG_ERROR(GWEN_LOGDOMAIN, "Missing \" after word (at %d: [%s])", (int)(src-savedSrc), savedSrc);
00290 return -1;
00291 }
00292
00293 if (flags & GWEN_TEXT_FLAGS_NEED_DELIMITER) {
00294 if (*src) {
00295 if (strchr(delims, *src)==0) {
00296 DBG_ERROR(GWEN_LOGDOMAIN, "No delimiter found within specified length");
00297 return -1;
00298 }
00299 }
00300 else {
00301 if (!(flags & GWEN_TEXT_FLAGS_NULL_IS_DELIMITER)) {
00302 DBG_ERROR(GWEN_LOGDOMAIN, "String ends without delimiter");
00303 return -1;
00304 }
00305 }
00306 }
00307
00308
00309 if (flags & GWEN_TEXT_FLAGS_DEL_TRAILING_BLANKS) {
00310 if (lastBlankPos!=-1)
00311 GWEN_Buffer_Crop(buf, 0, lastBlankPos);
00312 }
00313
00314 *next=src;
00315 return 0;
00316 }
00317
00318
00319
00320 char *GWEN_Text_Escape(const char *src,
00321 char *buffer,
00322 unsigned int maxsize) {
00323 unsigned int size;
00324
00325 size=0;
00326 while(*src) {
00327 unsigned char x;
00328
00329 x=(unsigned char)*src;
00330 if (!(
00331 (x>='A' && x<='Z') ||
00332 (x>='a' && x<='z') ||
00333 (x>='0' && x<='9'))) {
00334 unsigned char c;
00335
00336 if ((maxsize-1)<size+3) {
00337 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00338 return 0;
00339 }
00340 buffer[size++]='%';
00341 c=(((unsigned char)(*src))>>4)&0xf;
00342 if (c>9)
00343 c+=7;
00344 c+='0';
00345 buffer[size++]=c;
00346 c=((unsigned char)(*src))&0xf;
00347 if (c>9)
00348 c+=7;
00349 c+='0';
00350 buffer[size++]=c;
00351 }
00352 else {
00353 if (size<(maxsize-1))
00354 buffer[size++]=*src;
00355 else {
00356 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00357 return 0;
00358 }
00359 }
00360
00361 src++;
00362 }
00363
00364 buffer[size]=0;
00365 return buffer;
00366 }
00367
00368
00369
00370 char *GWEN_Text_EscapeTolerant(const char *src,
00371 char *buffer,
00372 unsigned int maxsize) {
00373 unsigned int size;
00374
00375 size=0;
00376 while(*src) {
00377 unsigned char x;
00378
00379 x=(unsigned char)*src;
00380 if (!(
00381 (x>='A' && x<='Z') ||
00382 (x>='a' && x<='z') ||
00383 (x>='0' && x<='9') ||
00384 x==' ' ||
00385 x=='.' ||
00386 x==',' ||
00387 x=='.' ||
00388 x=='*' ||
00389 x=='?'
00390 )) {
00391 unsigned char c;
00392
00393 if ((maxsize-1)<size+3) {
00394 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00395 return 0;
00396 }
00397 buffer[size++]='%';
00398 c=(((unsigned char)(*src))>>4)&0xf;
00399 if (c>9)
00400 c+=7;
00401 c+='0';
00402 buffer[size++]=c;
00403 c=((unsigned char)(*src))&0xf;
00404 if (c>9)
00405 c+=7;
00406 c+='0';
00407 buffer[size++]=c;
00408 }
00409 else {
00410 if (size<(maxsize-1))
00411 buffer[size++]=*src;
00412 else {
00413 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00414 return 0;
00415 }
00416 }
00417
00418 src++;
00419 }
00420
00421 buffer[size]=0;
00422 return buffer;
00423 }
00424
00425
00426
00427 char *GWEN_Text_UnescapeN(const char *src,
00428 unsigned int srclen,
00429 char *buffer,
00430 unsigned int maxsize){
00431 unsigned int size;
00432
00433 size=0;
00434
00435 while(*src && srclen>0) {
00436 unsigned char x;
00437
00438 x=(unsigned char)*src;
00439 if (
00440 (x>='A' && x<='Z') ||
00441 (x>='a' && x<='z') ||
00442 (x>='0' && x<='9')) {
00443 if (size<(maxsize-1))
00444 buffer[size++]=*src;
00445 else {
00446 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00447 return 0;
00448 }
00449 }
00450 else {
00451 if (*src=='%') {
00452 unsigned char d1, d2;
00453 unsigned char c;
00454
00455 if (srclen<3) {
00456 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (EOLN met)");
00457 return 0;
00458 }
00459
00460 src++;
00461 if (!(*src) || !isxdigit((int)*src)) {
00462 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
00463 return 0;
00464 }
00465
00466 d1=(unsigned char)(toupper(*src));
00467
00468
00469 src++;
00470 if (!(*src) || !isxdigit((int)*src)) {
00471 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
00472 return 0;
00473 }
00474 d2=(unsigned char)(toupper(*src));
00475
00476 d1-='0';
00477 if (d1>9)
00478 d1-=7;
00479 c=(d1<<4)&0xf0;
00480 d2-='0';
00481 if (d2>9)
00482 d2-=7;
00483 c+=(d2&0xf);
00484
00485 if (size<(maxsize-1))
00486 buffer[size++]=(char)c;
00487 else {
00488 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00489 return 0;
00490 }
00491 srclen-=2;
00492 }
00493 else {
00494 DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
00495 "characters in escaped string (\"%s\")",
00496 src);
00497 return 0;
00498 }
00499 }
00500 srclen--;
00501 src++;
00502 }
00503
00504 buffer[size]=0;
00505 return buffer;
00506 }
00507
00508
00509
00510 char *GWEN_Text_Unescape(const char *src,
00511 char *buffer,
00512 unsigned int maxsize){
00513 unsigned int srclen;
00514
00515 srclen=strlen(src);
00516 return GWEN_Text_UnescapeN(src, srclen, buffer, maxsize);
00517 }
00518
00519
00520
00521 char *GWEN_Text_UnescapeTolerantN(const char *src,
00522 unsigned int srclen,
00523 char *buffer,
00524 unsigned int maxsize){
00525 unsigned int size;
00526
00527 size=0;
00528
00529 while(*src && srclen>0) {
00530 unsigned char x;
00531
00532 x=(unsigned char)*src;
00533 if (
00534 (x>='A' && x<='Z') ||
00535 (x>='a' && x<='z') ||
00536 (x>='0' && x<='9') ||
00537 x==' ' ||
00538 x=='.' ||
00539 x==',' ||
00540 x=='.' ||
00541 x=='*' ||
00542 x=='?'
00543 ) {
00544 if (size<(maxsize-1))
00545 buffer[size++]=*src;
00546 else {
00547 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00548 return 0;
00549 }
00550 }
00551 else {
00552 if (*src=='%') {
00553 unsigned char d1, d2;
00554 unsigned char c;
00555
00556 if (srclen<3) {
00557 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (EOLN met)");
00558 return 0;
00559 }
00560
00561 src++;
00562 if (!(*src) || !isxdigit((int)*src)) {
00563 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
00564 return 0;
00565 }
00566
00567 d1=(unsigned char)(toupper(*src));
00568
00569
00570 src++;
00571 if (!(*src) || !isxdigit((int)*src)) {
00572 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
00573 return 0;
00574 }
00575 d2=(unsigned char)(toupper(*src));
00576
00577 d1-='0';
00578 if (d1>9)
00579 d1-=7;
00580 c=(d1<<4)&0xf0;
00581 d2-='0';
00582 if (d2>9)
00583 d2-=7;
00584 c+=(d2&0xf);
00585
00586 if (size<(maxsize-1))
00587 buffer[size++]=(char)c;
00588 else {
00589 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00590 return 0;
00591 }
00592 srclen-=2;
00593 }
00594 else {
00595 DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
00596 "characters in escaped string (\"%s\")",
00597 src);
00598 return 0;
00599 }
00600 }
00601 srclen--;
00602 src++;
00603 }
00604
00605 buffer[size]=0;
00606 return buffer;
00607 }
00608
00609
00610
00611 char *GWEN_Text_UnescapeTolerant(const char *src,
00612 char *buffer,
00613 unsigned int maxsize){
00614 unsigned int srclen;
00615
00616 srclen=strlen(src);
00617 return GWEN_Text_UnescapeTolerantN(src, srclen, buffer, maxsize);
00618 }
00619
00620
00621
00622 char *GWEN_Text_ToHex(const char *src, unsigned l,
00623 char *buffer, unsigned int maxsize) {
00624 unsigned int pos;
00625 unsigned int size;
00626
00627 if ((l*2)+1 > maxsize) {
00628 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00629 return 0;
00630 }
00631
00632 pos=0;
00633 size=0;
00634 while(pos<l) {
00635 unsigned char c;
00636
00637 c=(((unsigned char)(src[pos]))>>4)&0xf;
00638 if (c>9)
00639 c+=7;
00640 c+='0';
00641 buffer[size++]=c;
00642 c=((unsigned char)(src[pos]))&0xf;
00643 if (c>9)
00644 c+=7;
00645 c+='0';
00646 buffer[size++]=c;
00647 pos++;
00648 }
00649 buffer[size]=0;
00650 return buffer;
00651 }
00652
00653
00654
00655 char *GWEN_Text_ToHexGrouped(const char *src,
00656 unsigned l,
00657 char *buffer,
00658 unsigned maxsize,
00659 unsigned int groupsize,
00660 char delimiter,
00661 int skipLeadingZeroes) {
00662 unsigned int pos;
00663 unsigned int size;
00664 unsigned int j;
00665
00666 j=0;
00667
00668 pos=0;
00669 size=0;
00670 j=0;
00671 while(pos<l) {
00672 unsigned char c;
00673 int skipThis;
00674
00675 skipThis=0;
00676 c=(((unsigned char)(src[pos]))>>4)&0xf;
00677 if (skipLeadingZeroes) {
00678 if (c==0)
00679 skipThis=1;
00680 else
00681 skipLeadingZeroes=0;
00682 }
00683 if (c>9)
00684 c+=7;
00685 c+='0';
00686 if (!skipThis) {
00687 if (size+1>=maxsize) {
00688 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00689 return 0;
00690 }
00691 buffer[size++]=c;
00692 j++;
00693 if (j==groupsize) {
00694 if (size+1>=maxsize) {
00695 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00696 return 0;
00697 }
00698 buffer[size++]=delimiter;
00699 j=0;
00700 }
00701 }
00702
00703 skipThis=0;
00704 c=((unsigned char)(src[pos]))&0xf;
00705 if (skipLeadingZeroes) {
00706 if (c==0 && pos+1<l)
00707 skipThis=1;
00708 else
00709 skipLeadingZeroes=0;
00710 }
00711 if (c>9)
00712 c+=7;
00713 c+='0';
00714 if (size+1>=maxsize) {
00715 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00716 return 0;
00717 }
00718 if (!skipThis) {
00719 buffer[size++]=c;
00720 j++;
00721 if (j==groupsize) {
00722 if (pos+1<l) {
00723 if (size+1>=maxsize) {
00724 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
00725 return 0;
00726 }
00727 buffer[size++]=delimiter;
00728 }
00729 j=0;
00730 }
00731 }
00732 pos++;
00733 }
00734 buffer[size]=0;
00735 return buffer;
00736 }
00737
00738
00739
00740 int GWEN_Text_ToHexBuffer(const char *src, unsigned l,
00741 GWEN_BUFFER *buf,
00742 unsigned int groupsize,
00743 char delimiter,
00744 int skipLeadingZeroes){
00745 unsigned int pos;
00746 unsigned int size;
00747 unsigned int j;
00748
00749 j=0;
00750
00751 pos=0;
00752 size=0;
00753 j=0;
00754 while(pos<l) {
00755 unsigned char c;
00756 int skipThis;
00757
00758 skipThis=0;
00759 c=(((unsigned char)(src[pos]))>>4)&0xf;
00760 if (skipLeadingZeroes) {
00761 if (c==0)
00762 skipThis=1;
00763 else
00764 skipLeadingZeroes=0;
00765 }
00766 if (c>9)
00767 c+=7;
00768 c+='0';
00769 if (!skipThis) {
00770 if (GWEN_Buffer_AppendByte(buf, c)) {
00771 DBG_INFO(GWEN_LOGDOMAIN, "here");
00772 return -1;
00773 }
00774 j++;
00775 if (groupsize && j==groupsize) {
00776 if (GWEN_Buffer_AppendByte(buf, delimiter)) {
00777 DBG_INFO(GWEN_LOGDOMAIN, "here");
00778 return -1;
00779 }
00780 j=0;
00781 }
00782 }
00783
00784 skipThis=0;
00785 c=((unsigned char)(src[pos]))&0xf;
00786 if (skipLeadingZeroes) {
00787 if (c==0 && pos+1<l)
00788 skipThis=1;
00789 else
00790 skipLeadingZeroes=0;
00791 }
00792 if (c>9)
00793 c+=7;
00794 c+='0';
00795 if (!skipThis) {
00796 if (GWEN_Buffer_AppendByte(buf, c)) {
00797 DBG_INFO(GWEN_LOGDOMAIN, "here");
00798 return -1;
00799 }
00800 j++;
00801 if (groupsize && j==groupsize) {
00802 if (pos+1<l) {
00803 if (GWEN_Buffer_AppendByte(buf, delimiter)) {
00804 DBG_INFO(GWEN_LOGDOMAIN, "here");
00805 return -1;
00806 }
00807 }
00808 j=0;
00809 }
00810 }
00811 pos++;
00812 }
00813 return 0;
00814 }
00815
00816
00817
00818 int GWEN_Text_FromHex(const char *src, char *buffer, unsigned maxsize){
00819 unsigned int pos;
00820 unsigned int size;
00821
00822 pos=0;
00823 size=0;
00824 while(*src) {
00825 unsigned char d1, d2;
00826 unsigned char c;
00827
00828
00829 if (!isxdigit((int)*src)) {
00830 DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in hex string");
00831 return -1;
00832 }
00833 d1=(unsigned char)(toupper(*src));
00834
00835
00836 src++;
00837 if (!(*src) || !isxdigit((int)*src)) {
00838 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete hex byte (only 1 digit)");
00839 return -1;
00840 }
00841 d2=(unsigned char)(toupper(*src));
00842 src++;
00843
00844
00845 d1-='0';
00846 if (d1>9)
00847 d1-=7;
00848 c=(d1<<4)&0xf0;
00849 d2-='0';
00850 if (d2>9)
00851 d2-=7;
00852 c+=(d2&0xf);
00853
00854 if (size<(maxsize))
00855 buffer[size++]=(char)c;
00856 else {
00857 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small (maxsize=%d)", maxsize);
00858 return -1;
00859 }
00860 }
00861
00862 return size;
00863 }
00864
00865
00866
00867 int GWEN_Text_FromHexBuffer(const char *src, GWEN_BUFFER *buf) {
00868 while(*src) {
00869 unsigned char d1, d2;
00870 unsigned char c;
00871
00872
00873 if (isspace((int)*src)) {
00874 src++;
00875 }
00876 else {
00877 if (!isxdigit((int)*src)) {
00878 DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in hex string");
00879 return -1;
00880 }
00881 d1=(unsigned char)(toupper(*src));
00882
00883
00884 src++;
00885 if (!(*src) || !isxdigit((int)*src)) {
00886 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete hex byte (only 1 digit)");
00887 return -1;
00888 }
00889 d2=(unsigned char)(toupper(*src));
00890 src++;
00891
00892
00893 d1-='0';
00894 if (d1>9)
00895 d1-=7;
00896 c=(d1<<4)&0xf0;
00897 d2-='0';
00898 if (d2>9)
00899 d2-=7;
00900 c+=(d2&0xf);
00901
00902 GWEN_Buffer_AppendByte(buf, (char)c);
00903 }
00904 }
00905
00906 return 0;
00907 }
00908
00909
00910
00911 int GWEN_Text_FromBcdBuffer(const char *src, GWEN_BUFFER *buf) {
00912 unsigned int l;
00913 int fakeByte;
00914
00915 l=strlen(src);
00916 fakeByte=(l%2);
00917 while(*src) {
00918 unsigned char d1, d2;
00919 unsigned char c;
00920
00921 if (fakeByte) {
00922 d1=0;
00923 fakeByte=0;
00924 }
00925 else {
00926
00927 if (!isdigit((int)*src)) {
00928 DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in bcd string");
00929 return -1;
00930 }
00931 d1=(unsigned char)(*src);
00932 src++;
00933 }
00934
00935 if (!(*src) || !isxdigit((int)*src)) {
00936 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete BCD byte (only 1 digit)");
00937 return -1;
00938 }
00939 d2=(unsigned char)(*src);
00940 src++;
00941
00942
00943 d1-='0';
00944 c=(d1<<4)&0xf0;
00945 d2-='0';
00946 c+=(d2&0xf);
00947
00948 GWEN_Buffer_AppendByte(buf, (char)c);
00949 }
00950
00951 return 0;
00952 }
00953
00954
00955
00956 int GWEN_Text_ToBcdBuffer(const char *src, unsigned l,
00957 GWEN_BUFFER *buf,
00958 unsigned int groupsize,
00959 char delimiter,
00960 int skipLeadingZeroes){
00961 unsigned int pos;
00962 unsigned int size;
00963 unsigned int j;
00964
00965 j=0;
00966
00967 pos=0;
00968 size=0;
00969 j=0;
00970 while(pos<l) {
00971 unsigned char c;
00972 int skipThis;
00973
00974 skipThis=0;
00975 c=(((unsigned char)(src[pos]))>>4)&0xf;
00976 if (skipLeadingZeroes) {
00977 if (c==0)
00978 skipThis=1;
00979 else
00980 skipLeadingZeroes=0;
00981 }
00982 c+='0';
00983 if (!skipThis) {
00984 if (GWEN_Buffer_AppendByte(buf, c)) {
00985 DBG_INFO(GWEN_LOGDOMAIN, "here");
00986 return -1;
00987 }
00988 j++;
00989 if (groupsize && j==groupsize) {
00990 if (GWEN_Buffer_AppendByte(buf, delimiter)) {
00991 DBG_INFO(GWEN_LOGDOMAIN, "here");
00992 return -1;
00993 }
00994 j=0;
00995 }
00996 }
00997
00998 skipThis=0;
00999 c=((unsigned char)(src[pos]))&0xf;
01000 if (skipLeadingZeroes) {
01001 if (c==0 && pos+1<l)
01002 skipThis=1;
01003 else
01004 skipLeadingZeroes=0;
01005 }
01006 c+='0';
01007 if (!skipThis) {
01008 if (GWEN_Buffer_AppendByte(buf, c)) {
01009 DBG_INFO(GWEN_LOGDOMAIN, "here");
01010 return -1;
01011 }
01012 j++;
01013 if (groupsize && j==groupsize) {
01014 if (pos+1<l) {
01015 if (GWEN_Buffer_AppendByte(buf, delimiter)) {
01016 DBG_INFO(GWEN_LOGDOMAIN, "here");
01017 return -1;
01018 }
01019 }
01020 j=0;
01021 }
01022 }
01023 pos++;
01024 }
01025 return 0;
01026 }
01027
01028
01029
01030 int GWEN_Text_Compare(const char *s1, const char *s2, int ign) {
01031 if (s1)
01032 if (*s1==0)
01033 s1=0;
01034 if (s2)
01035 if (*s2==0)
01036 s2=0;
01037 if (!s1 && !s2)
01038 return 0;
01039 if (!s1 && s2)
01040 return 1;
01041 if (s1 && !s2)
01042 return -1;
01043 if (ign)
01044 return strcasecmp(s1, s2);
01045 else
01046 return strcmp(s1, s2);
01047 }
01048
01049
01050
01051 const char *GWEN_Text_StrCaseStr(const char *haystack, const char *needle) {
01052 while(*haystack) {
01053 while(*haystack && tolower(*haystack)!=tolower(*needle))
01054 haystack++;
01055 if (*haystack) {
01056 const char *t;
01057 const char *s;
01058
01059
01060 t=haystack;
01061 s=needle;
01062 t++;
01063 s++;
01064 while(*t && *s && (tolower(*t)==tolower(*s))) {
01065 t++;
01066 s++;
01067 }
01068 if (*s==0)
01069 return haystack;
01070 }
01071 else
01072
01073 break;
01074
01075 haystack++;
01076 }
01077
01078
01079 return NULL;
01080 }
01081
01082
01083
01084
01085 int GWEN_Text__cmpSegment(const char *w, unsigned int *wpos,
01086 const char *p, unsigned int *ppos,
01087 int sensecase,
01088 unsigned int *matches) {
01089 char a;
01090 char b;
01091 unsigned wlength;
01092 unsigned plength;
01093
01094 unsigned int _wpos = *wpos, _ppos = *ppos, _matches = *matches;
01095
01096 a=0;
01097 b=0;
01098 wlength=strlen(w);
01099 plength=strlen(p);
01100
01101 while (_wpos<wlength && _ppos<plength) {
01102 a=w[_wpos];
01103 b=p[_ppos];
01104 if (b=='*') {
01105 *wpos = _wpos;
01106 *ppos = _ppos;
01107 *matches = _matches;
01108 return 1;
01109 }
01110 if (!sensecase) {
01111 a=toupper(a);
01112 b=toupper(b);
01113 }
01114
01115 if (a==b)
01116 ++_matches;
01117 if (a!=b && b!='?') {
01118 *wpos = _wpos;
01119 *ppos = _ppos;
01120 *matches = _matches;
01121 return 0;
01122 }
01123 ++_wpos;
01124 ++_ppos;
01125 }
01126
01127 if (_wpos==wlength && _ppos==plength) {
01128 *wpos = _wpos;
01129 *ppos = _ppos;
01130 *matches = _matches;
01131 return 1;
01132 }
01133
01134 if (_wpos>=wlength && _ppos<plength)
01135 if (p[_ppos]=='*') {
01136 *wpos = _wpos;
01137 *ppos = _ppos;
01138 *matches = _matches;
01139 return 1;
01140 }
01141
01142 *wpos = _wpos;
01143 *ppos = _ppos;
01144 *matches = _matches;
01145 return 0;
01146 }
01147
01148
01149
01150 int GWEN_Text__findSegment(const char *w, unsigned int *wpos,
01151 const char *p, unsigned int *ppos,
01152 int sensecase,
01153 unsigned int *matches) {
01154 unsigned int lwpos, lppos, lmatches;
01155 unsigned wlength;
01156
01157 wlength=strlen(w);
01158 lwpos=*wpos;
01159 lppos=*ppos;
01160 lmatches=*matches;
01161 while(lwpos<wlength) {
01162 *ppos=lppos;
01163 *wpos=lwpos;
01164 *matches=lmatches;
01165 if (GWEN_Text__cmpSegment(w,wpos,p,ppos,sensecase,matches))
01166 return 1;
01167 lwpos++;
01168 }
01169 return 0;
01170 }
01171
01172
01173 int GWEN_Text_ComparePattern(const char *w, const char *p, int sensecase) {
01174 unsigned int ppos;
01175 unsigned int wpos;
01176 unsigned int matches;
01177 unsigned int plength;
01178
01179 ppos=wpos=matches=0;
01180 plength=strlen(p);
01181
01182
01183 if (!GWEN_Text__cmpSegment(w,&wpos,p,&ppos,sensecase,&matches)) {
01184 return -1;
01185 }
01186
01187 while(1) {
01188
01189 if (ppos>=plength)
01190 return matches;
01191
01192 ppos++;
01193
01194 if (ppos>=plength)
01195 return matches;
01196
01197 if (!GWEN_Text__findSegment(w,&wpos,p,&ppos,sensecase,&matches)) {
01198 return -1;
01199 }
01200 }
01201
01202 return -1;
01203 }
01204
01205
01206
01207 int GWEN_Text_NumToString(int num, char *buffer, unsigned int bufsize,
01208 int fillchar){
01209 char lbuffer[128];
01210 unsigned int i;
01211
01212 sprintf(lbuffer,"%d", num);
01213 i=strlen(lbuffer);
01214 if (i>=bufsize) {
01215 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small (%d>=%d)", i, bufsize);
01216 return -1;
01217 }
01218 if (fillchar>0) {
01219
01220 strcpy(buffer, lbuffer);
01221 while(i<bufsize-1)
01222 buffer[i++]=fillchar;
01223 buffer[i]=0;
01224 return bufsize;
01225 }
01226 else if (fillchar<0) {
01227 int j, k;
01228
01229 fillchar=-fillchar;
01230 j=bufsize-1-i;
01231 for (k=0; k<j; k++)
01232 buffer[k]=fillchar;
01233 buffer[k]=0;
01234 strcat(buffer, lbuffer);
01235 return bufsize;
01236 }
01237 else {
01238
01239 strcpy(buffer, lbuffer);
01240 return i;
01241 }
01242 }
01243
01244
01245
01246 void GWEN_Text_DumpString(const char *s, unsigned int l,
01247 unsigned int insert) {
01248 unsigned int i;
01249 unsigned int j;
01250 unsigned int pos;
01251 unsigned k;
01252
01253 pos=0;
01254 for (k=0; k<insert; k++)
01255 fprintf(stderr, " ");
01256 fprintf(stderr,"String size is %d:\n",l);
01257 while(pos<l) {
01258 for (k=0; k<insert; k++)
01259 fprintf(stderr, " ");
01260 fprintf(stderr,"%04x: ",pos);
01261 j=pos+16;
01262 if (j>=l)
01263 j=l;
01264
01265
01266 for (i=pos; i<j; i++) {
01267 fprintf(stderr,"%02x ",(unsigned char)s[i]);
01268 }
01269 if (j-pos<16)
01270 for (i=0; i<16-(j-pos); i++)
01271 fprintf(stderr," ");
01272
01273 for (i=pos; i<j; i++) {
01274 if (s[i]<32)
01275 fprintf(stderr,".");
01276 else
01277 fprintf(stderr,"%c",s[i]);
01278 }
01279 fprintf(stderr,"\n");
01280 pos+=16;
01281 }
01282 }
01283
01284
01285
01286 void GWEN_Text_DumpString2Buffer(const char *s, unsigned int l,
01287 GWEN_BUFFER *mbuf,
01288 unsigned int insert) {
01289 unsigned int i;
01290 unsigned int j;
01291 unsigned int pos;
01292 unsigned k;
01293 char numbuf[32];
01294
01295 pos=0;
01296 for (k=0; k<insert; k++)
01297 GWEN_Buffer_AppendByte(mbuf, ' ');
01298 GWEN_Buffer_AppendString(mbuf,"String size is ");
01299 snprintf(numbuf, sizeof(numbuf), "%d", l);
01300 GWEN_Buffer_AppendString(mbuf, numbuf);
01301 GWEN_Buffer_AppendByte(mbuf, '\n');
01302 while(pos<l) {
01303 for (k=0; k<insert; k++)
01304 GWEN_Buffer_AppendByte(mbuf, ' ');
01305 snprintf(numbuf, sizeof(numbuf),"%04x: ",pos);
01306 GWEN_Buffer_AppendString(mbuf, numbuf);
01307 j=pos+16;
01308 if (j>=l)
01309 j=l;
01310
01311
01312 for (i=pos; i<j; i++) {
01313 snprintf(numbuf, sizeof(numbuf),"%02x ", (unsigned char)s[i]);
01314 GWEN_Buffer_AppendString(mbuf, numbuf);
01315 }
01316 if (j-pos<16)
01317 for (i=0; i<16-(j-pos); i++)
01318 GWEN_Buffer_AppendString(mbuf, " ");
01319
01320 for (i=pos; i<j; i++) {
01321 if (s[i]<32)
01322 GWEN_Buffer_AppendByte(mbuf, '.');
01323 else
01324 GWEN_Buffer_AppendByte(mbuf, s[i]);
01325 }
01326 GWEN_Buffer_AppendByte(mbuf, '\n');
01327 pos+=16;
01328 }
01329 }
01330
01331
01332
01333
01334
01335
01336
01337 int GWEN_Text_EscapeToBuffer(const char *src, GWEN_BUFFER *buf) {
01338 while(*src) {
01339 unsigned char x;
01340
01341 x=(unsigned char)*src;
01342 if (!(
01343 (x>='A' && x<='Z') ||
01344 (x>='a' && x<='z') ||
01345 (x>='0' && x<='9'))) {
01346 unsigned char c;
01347
01348 GWEN_Buffer_AppendByte(buf, '%');
01349 c=(((unsigned char)(*src))>>4)&0xf;
01350 if (c>9)
01351 c+=7;
01352 c+='0';
01353 GWEN_Buffer_AppendByte(buf, c);
01354 c=((unsigned char)(*src))&0xf;
01355 if (c>9)
01356 c+=7;
01357 c+='0';
01358 GWEN_Buffer_AppendByte(buf, c);
01359 }
01360 else
01361 GWEN_Buffer_AppendByte(buf, *src);
01362
01363 src++;
01364 }
01365
01366 return 0;
01367 }
01368
01369
01370
01371 int GWEN_Text_UnescapeToBuffer(const char *src, GWEN_BUFFER *buf) {
01372 while(*src) {
01373 unsigned char x;
01374
01375 x=(unsigned char)*src;
01376 if (
01377 (x>='A' && x<='Z') ||
01378 (x>='a' && x<='z') ||
01379 (x>='0' && x<='9')) {
01380 GWEN_Buffer_AppendByte(buf, *src);
01381 }
01382 else {
01383 if (*src=='%') {
01384 unsigned char d1, d2;
01385 unsigned char c;
01386
01387
01388 src++;
01389 if (!(*src) || !isxdigit((int)*src)) {
01390 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
01391 return 0;
01392 }
01393
01394 d1=(unsigned char)(toupper(*src));
01395
01396
01397 src++;
01398 if (!(*src) || !isxdigit((int)*src)) {
01399 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
01400 return 0;
01401 }
01402 d2=(unsigned char)(toupper(*src));
01403
01404 d1-='0';
01405 if (d1>9)
01406 d1-=7;
01407 c=(d1<<4)&0xf0;
01408 d2-='0';
01409 if (d2>9)
01410 d2-=7;
01411 c+=(d2&0xf);
01412
01413 GWEN_Buffer_AppendByte(buf, (char)c);
01414 }
01415 else {
01416 DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
01417 "characters in escaped string (\"%s\")",
01418 src);
01419 return -1;
01420 }
01421 }
01422 src++;
01423 }
01424
01425 return 0;
01426 }
01427
01428
01429
01430 int GWEN_Text_EscapeToBufferTolerant(const char *src, GWEN_BUFFER *buf) {
01431 while(*src) {
01432 unsigned char x;
01433
01434 x=(unsigned char)*src;
01435 if (!(
01436 (x>='A' && x<='Z') ||
01437 (x>='a' && x<='z') ||
01438 (x>='0' && x<='9') ||
01439 x==' ' ||
01440 x=='.' ||
01441 x==',' ||
01442 x=='.' ||
01443 x=='_' ||
01444 x=='-' ||
01445 x=='*' ||
01446 x=='?'
01447 )) {
01448 unsigned char c;
01449
01450 GWEN_Buffer_AppendByte(buf, '%');
01451 c=(((unsigned char)(*src))>>4)&0xf;
01452 if (c>9)
01453 c+=7;
01454 c+='0';
01455 GWEN_Buffer_AppendByte(buf, c);
01456 c=((unsigned char)(*src))&0xf;
01457 if (c>9)
01458 c+=7;
01459 c+='0';
01460 GWEN_Buffer_AppendByte(buf, c);
01461 }
01462 else
01463 GWEN_Buffer_AppendByte(buf, *src);
01464
01465 src++;
01466 }
01467
01468 return 0;
01469 }
01470
01471
01472
01473 int GWEN_Text_UnescapeToBufferTolerant(const char *src, GWEN_BUFFER *buf) {
01474 while(*src) {
01475 const char *srcBak;
01476 int charHandled;
01477
01478 srcBak=src;
01479 charHandled=0;
01480 if (*src=='%') {
01481 if (strlen(src)>2) {
01482 unsigned char d1, d2;
01483 unsigned char c;
01484
01485 if (isxdigit((int)src[1]) && isxdigit((int)src[2])) {
01486
01487 src++;
01488
01489 d1=(unsigned char)(toupper(*src));
01490
01491
01492 src++;
01493 d2=(unsigned char)(toupper(*src));
01494
01495 d1-='0';
01496 if (d1>9)
01497 d1-=7;
01498 c=(d1<<4)&0xf0;
01499 d2-='0';
01500 if (d2>9)
01501 d2-=7;
01502 c+=(d2&0xf);
01503
01504 GWEN_Buffer_AppendByte(buf, (char)c);
01505 charHandled=1;
01506 }
01507 }
01508 }
01509 if (!charHandled)
01510 GWEN_Buffer_AppendByte(buf, *src);
01511 src++;
01512 }
01513
01514 return 0;
01515 }
01516
01517
01518
01519 int GWEN_Text_EscapeToBufferTolerant2(GWEN_BUFFER *src, GWEN_BUFFER *buf) {
01520 while(GWEN_Buffer_GetBytesLeft(src)) {
01521 int z;
01522 unsigned char x;
01523
01524 z=GWEN_Buffer_ReadByte(src);
01525 if (z==-1) {
01526 DBG_INFO(GWEN_LOGDOMAIN, "here");
01527 return -1;
01528 }
01529 x=(unsigned char)z;
01530 if (!(
01531 (x>='A' && x<='Z') ||
01532 (x>='a' && x<='z') ||
01533 (x>='0' && x<='9') ||
01534 x==' ' ||
01535 x=='.' ||
01536 x==',' ||
01537 x=='.' ||
01538 x=='*' ||
01539 x=='?'
01540 )) {
01541 unsigned char c;
01542
01543 GWEN_Buffer_AppendByte(buf, '%');
01544 c=(((unsigned char)x)>>4)&0xf;
01545 if (c>9)
01546 c+=7;
01547 c+='0';
01548 GWEN_Buffer_AppendByte(buf, c);
01549 c=((unsigned char)x)&0xf;
01550 if (c>9)
01551 c+=7;
01552 c+='0';
01553 GWEN_Buffer_AppendByte(buf, c);
01554 }
01555 else
01556 GWEN_Buffer_AppendByte(buf, x);
01557 }
01558
01559 return 0;
01560 }
01561
01562
01563
01564 void GWEN_Text_LogString(const char *s, unsigned int l,
01565 const char *logDomain,
01566 GWEN_LOGGER_LEVEL lv){
01567 GWEN_BUFFER *mbuf;
01568
01569 mbuf=GWEN_Buffer_new(0, ((l*16)<1024)?1024:l*16, 0, 1);
01570 GWEN_Text_DumpString2Buffer(s, l, mbuf, 0);
01571 GWEN_Logger_Log(logDomain, lv, GWEN_Buffer_GetStart(mbuf));
01572 GWEN_Buffer_free(mbuf);
01573 }
01574
01575
01576
01577 void GWEN_Text_CondenseBuffer(GWEN_BUFFER *buf){
01578 const char *p;
01579 char *dst;
01580 unsigned int size;
01581 unsigned int i;
01582 int lastWasBlank;
01583 char *lastBlankPos;
01584
01585 dst=GWEN_Buffer_GetStart(buf);
01586 p=dst;
01587 size=GWEN_Buffer_GetUsedBytes(buf);
01588 lastWasBlank=0;
01589 lastBlankPos=0;
01590
01591 for (i=0; i<size; i++) {
01592
01593 if (isspace((int)*p)) {
01594 if (!lastWasBlank) {
01595
01596 lastWasBlank=1;
01597 lastBlankPos=dst;
01598 *(dst++)=*p;
01599 }
01600 }
01601 else {
01602 lastWasBlank=0;
01603 lastBlankPos=0;
01604 *(dst++)=*p;
01605 }
01606 p++;
01607 }
01608
01609
01610 if (lastBlankPos!=0)
01611 dst=lastBlankPos;
01612
01613 size=dst-GWEN_Buffer_GetStart(buf);
01614 GWEN_Buffer_Crop(buf, 0, size);
01615 }
01616
01617
01618
01619 int GWEN_Text_DoubleToBuffer(double num, GWEN_BUFFER *buf){
01620 char numbuf[128];
01621 int rv;
01622 #ifdef HAVE_SETLOCALE
01623 const char *orig_locale = setlocale(LC_NUMERIC, NULL);
01624 char *currentLocale = strdup(orig_locale ? orig_locale : "C");
01625 setlocale(LC_NUMERIC,"C");
01626 #endif
01627
01628 rv=snprintf(numbuf, sizeof(numbuf), "%f", num);
01629
01630 #ifdef HAVE_SETLOCALE
01631 setlocale(LC_NUMERIC, currentLocale);
01632 free(currentLocale);
01633 #endif
01634
01635 if (rv<1 || rv>=sizeof(numbuf))
01636 return -1;
01637 GWEN_Buffer_AppendString(buf, numbuf);
01638 return 0;
01639 }
01640
01641
01642
01643 int GWEN_Text_StringToDouble(const char *s, double *num){
01644 int rv;
01645 #ifdef HAVE_SETLOCALE
01646 const char *orig_locale = setlocale(LC_NUMERIC, NULL);
01647 char *currentLocale = strdup(orig_locale ? orig_locale : "C");
01648 setlocale(LC_NUMERIC,"C");
01649 #endif
01650
01651 rv=sscanf(s, "%lf", num);
01652
01653 #ifdef HAVE_SETLOCALE
01654 setlocale(LC_NUMERIC, currentLocale);
01655 free(currentLocale);
01656 #endif
01657
01658 if (rv!=1)
01659 return -1;
01660 return 0;
01661 }
01662
01663
01664
01665 double GWEN_Text__CheckSimilarity(const char *s1, const char *s2, int ign){
01666 int nboth;
01667 int nmatch;
01668 double pc;
01669
01670 nboth=strlen(s1)+strlen(s2);
01671 nmatch=0;
01672 if (ign) {
01673 while(*s1 && *s2) {
01674 const char *t;
01675 int lmatch;
01676
01677
01678 t=s2;
01679 lmatch=0;
01680 while(*t) {
01681 if (toupper(*s1)==toupper(*t)) {
01682 lmatch=2;
01683 break;
01684 }
01685 if (isalnum((int)*s1) && isalnum((int)*t)) {
01686 lmatch=1;
01687 break;
01688 }
01689 t++;
01690 }
01691
01692 if (lmatch) {
01693 nmatch+=lmatch;
01694 s2=t+1;
01695 }
01696
01697 s1++;
01698 }
01699 }
01700 else {
01701 while(*s1 && *s2) {
01702 const char *t;
01703 int lmatch;
01704
01705
01706 t=s2;
01707 lmatch=0;
01708 while(*t) {
01709 if (*s1==*t) {
01710 lmatch=2;
01711 break;
01712 }
01713 if (toupper(*s1)==toupper(*t)) {
01714 lmatch=1;
01715 break;
01716 }
01717 if (isalnum((int)*s1) && isalnum((int)*t)) {
01718 lmatch=1;
01719 break;
01720 }
01721 t++;
01722 }
01723
01724 if (lmatch) {
01725 nmatch+=lmatch;
01726 s2=t+1;
01727 }
01728
01729 s1++;
01730 }
01731 }
01732
01733 pc=(nmatch*100)/nboth;
01734 return pc;
01735 }
01736
01737
01738
01739 double GWEN_Text_CheckSimilarity(const char *s1, const char *s2, int ign){
01740 double pc1, pc2;
01741
01742 pc1=GWEN_Text__CheckSimilarity(s1, s2, ign);
01743 pc2=GWEN_Text__CheckSimilarity(s2, s1, ign);
01744 if (pc2>pc1)
01745 return pc2;
01746 return pc1;
01747 }
01748
01749
01750
01751 int GWEN_Text_CountUtf8Chars(const char *s, int len) {
01752 int count;
01753 int handled;
01754
01755 if (len==0)
01756 len=strlen(s);
01757 count=0;
01758 handled=0;
01759 while(handled<len) {
01760 unsigned char c;
01761 int i;
01762
01763 c=(unsigned char)*s;
01764 if ((c & 0xfe)==0xfc)
01765 i=5;
01766 else if ((c & 0xfc)==0xf8)
01767 i=4;
01768 else if ((c & 0xf8)==0xf0)
01769 i=3;
01770 else if ((c & 0xf0)==0xe0)
01771 i=2;
01772 else if ((c & 0xe0)==0xc0)
01773 i=1;
01774 else if (c & 0x80) {
01775 DBG_ERROR(GWEN_LOGDOMAIN, "Invalid UTF8 character at pos %d", handled);
01776 return -1;
01777 }
01778 else
01779 i=0;
01780 if (handled+i+1>len) {
01781 DBG_ERROR(GWEN_LOGDOMAIN,
01782 "Incomplete UTF8 sequence at pos %d", handled);
01783 return -1;
01784 }
01785 s++;
01786 if (i) {
01787 int j;
01788
01789 for (j=0; j<i; j++) {
01790 if ((((unsigned char)*s) & 0xc0)!=0xc0) {
01791 DBG_ERROR(GWEN_LOGDOMAIN,
01792 "Invalid UTF8 sequence at pos %d (rel %d of %d)",
01793 handled, j, i);
01794 }
01795 s++;
01796 }
01797 }
01798 handled+=i+1;
01799 count++;
01800 }
01801
01802 return count;
01803 }
01804
01805
01806
01807 int GWEN_Text_UnescapeXmlToBuffer(const char *src, GWEN_BUFFER *buf) {
01808 char *pdst;
01809 uint32_t roomLeft;
01810 uint32_t bytesAdded;
01811
01812 #define GWEN_TEXT__APPENDCHAR(chr) \
01813 if (roomLeft<2) { \
01814 if (bytesAdded) { \
01815 GWEN_Buffer_IncrementPos(buf, bytesAdded); \
01816 GWEN_Buffer_AdjustUsedBytes(buf); \
01817 } \
01818 GWEN_Buffer_AllocRoom(buf, 2); \
01819 pdst=GWEN_Buffer_GetPosPointer(buf); \
01820 roomLeft=GWEN_Buffer_GetMaxUnsegmentedWrite(buf); \
01821 bytesAdded=0; \
01822 } \
01823 *(pdst++)=(unsigned char)chr; \
01824 *pdst=0; \
01825 bytesAdded++; \
01826 roomLeft--
01827
01828 pdst=GWEN_Buffer_GetPosPointer(buf);
01829 roomLeft=GWEN_Buffer_GetMaxUnsegmentedWrite(buf);
01830 bytesAdded=0;
01831
01832 while(*src) {
01833 unsigned char x;
01834 int match;
01835
01836 match=0;
01837 x=(unsigned char)*src;
01838 if (x=='&') {
01839 if (src[1]=='#') {
01840 unsigned char num=0;
01841
01842 src++;
01843 src++;
01844 while(*src && isdigit((int)*src)) {
01845 num*=10;
01846 num+=(*src)-'0';
01847 src++;
01848 }
01849 src++;
01850 GWEN_TEXT__APPENDCHAR(num);
01851 }
01852 else if (strncmp(src+1, "szlig;", 6)==0) {
01853 GWEN_TEXT__APPENDCHAR(0xc3);
01854 GWEN_TEXT__APPENDCHAR(0x9f);
01855 src+=7;
01856 match=1;
01857 }
01858 else if (strncmp(src+1, "Auml;", 5)==0) {
01859 GWEN_TEXT__APPENDCHAR(0xc3);
01860 GWEN_TEXT__APPENDCHAR(0x84);
01861 src+=6;
01862 match=1;
01863 }
01864 else if (strncmp(src+1, "Ouml;", 5)==0) {
01865 GWEN_TEXT__APPENDCHAR(0xc3);
01866 GWEN_TEXT__APPENDCHAR(0x96);
01867 src+=6;
01868 match=1;
01869 }
01870 else if (strncmp(src+1, "Uuml;", 5)==0) {
01871 GWEN_TEXT__APPENDCHAR(0xc3);
01872 GWEN_TEXT__APPENDCHAR(0x9c);
01873 src+=6;
01874 match=1;
01875 }
01876 else if (strncmp(src+1, "auml;", 5)==0) {
01877 GWEN_TEXT__APPENDCHAR(0xc3);
01878 GWEN_TEXT__APPENDCHAR(0xa4);
01879 src+=6;
01880 match=1;
01881 }
01882 else if (strncmp(src+1, "ouml;", 5)==0) {
01883 GWEN_TEXT__APPENDCHAR(0xc3);
01884 GWEN_TEXT__APPENDCHAR(0xb6);
01885 src+=6;
01886 match=1;
01887 }
01888 else if (strncmp(src+1, "uuml;", 5)==0) {
01889 GWEN_TEXT__APPENDCHAR(0xc3);
01890 GWEN_TEXT__APPENDCHAR(0xbc);
01891 src+=6;
01892 match=1;
01893 }
01894 else {
01895 const GWEN_TEXT_ESCAPE_ENTRY *e;
01896 e=gwen_text__xml_escape_chars;
01897 while(e->replace) {
01898 int l;
01899
01900 l=strlen(e->replace);
01901 if (strncasecmp(src, e->replace, l)==0) {
01902 GWEN_TEXT__APPENDCHAR(e->character);
01903
01904 src+=l;
01905 match=1;
01906 break;
01907 }
01908 e++;
01909 }
01910 }
01911 }
01912 if (!match) {
01913 GWEN_TEXT__APPENDCHAR(*(src++));
01914 }
01915 }
01916
01917 if (bytesAdded) {
01918 GWEN_Buffer_IncrementPos(buf, bytesAdded);
01919 GWEN_Buffer_AdjustUsedBytes(buf);
01920 }
01921
01922 return 0;
01923 #undef GWEN_TEXT__APPENDCHAR
01924 }
01925
01926
01927
01928 int GWEN_Text_EscapeXmlToBuffer(const char *src, GWEN_BUFFER *buf) {
01929 while(*src) {
01930 unsigned char x;
01931 const GWEN_TEXT_ESCAPE_ENTRY *e;
01932 int match;
01933
01934 match=0;
01935 x=(unsigned char)*src;
01936 e=gwen_text__xml_escape_chars;
01937 while(e->replace) {
01938 if (x==e->character) {
01939 GWEN_Buffer_AppendString(buf, e->replace);
01940 match=1;
01941 break;
01942 }
01943 e++;
01944 }
01945
01946 if (!match) {
01947 if (0 && x>127) {
01948 char numbuf[32];
01949
01950 snprintf(numbuf, sizeof(numbuf), "&#%d;", x);
01951 GWEN_Buffer_AppendString(buf, numbuf);
01952 }
01953 else
01954 GWEN_Buffer_AppendByte(buf, *src);
01955 }
01956 src++;
01957 }
01958
01959 return 0;
01960 }
01961
01962
01963
01964 int GWEN_Text_ConvertCharset(const char *fromCharset,
01965 const char *toCharset,
01966 const char *text, int len,
01967 GWEN_BUFFER *tbuf) {
01968 if (len) {
01969 if (fromCharset && *fromCharset && toCharset && *toCharset &&
01970 strcasecmp(fromCharset, toCharset)!=0) {
01971 #ifndef HAVE_ICONV
01972 DBG_INFO(GWEN_LOGDOMAIN,
01973 "iconv not available, can not convert from \"%s\" to \"%s\"",
01974 fromCharset, toCharset);
01975 #else
01976 iconv_t ic;
01977
01978 ic=iconv_open(toCharset, fromCharset);
01979 if (ic==((iconv_t)-1)) {
01980 DBG_ERROR(GWEN_LOGDOMAIN, "Charset \"%s\" or \"%s\" not available",
01981 fromCharset, toCharset);
01982 }
01983 else {
01984 char *outbuf;
01985 char *pOutbuf;
01986
01987
01988
01989
01990 ICONV_CONST char *pInbuf;
01991 size_t inLeft;
01992 size_t outLeft;
01993 size_t done;
01994 size_t space;
01995
01996
01997 pInbuf=(char*)text;
01998
01999 outLeft=len*2;
02000 space=outLeft;
02001 outbuf=(char*)malloc(outLeft);
02002 assert(outbuf);
02003
02004 inLeft=len;
02005 pInbuf=(char*)text;
02006 pOutbuf=outbuf;
02007 done=iconv(ic, &pInbuf, &inLeft, &pOutbuf, &outLeft);
02008 if (done==(size_t)-1) {
02009 DBG_ERROR(GWEN_LOGDOMAIN, "Error in conversion: %s (%d)",
02010 strerror(errno), errno);
02011 free(outbuf);
02012 iconv_close(ic);
02013 return GWEN_ERROR_GENERIC;
02014 }
02015
02016 GWEN_Buffer_AppendBytes(tbuf, outbuf, space-outLeft);
02017 free(outbuf);
02018 DBG_DEBUG(GWEN_LOGDOMAIN, "Conversion done.");
02019 iconv_close(ic);
02020 return 0;
02021 }
02022 #endif
02023 }
02024
02025 GWEN_Buffer_AppendBytes(tbuf, text, len);
02026 }
02027 return 0;
02028 }
02029