00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #ifdef HAVE_CONFIG_H
00029 # include <config.h>
00030 #endif
00031
00032
00033 #include "xmlctx_p.h"
00034 #include "gwenhywfar/debug.h"
00035 #include "gwenhywfar/misc.h"
00036 #include "gwenhywfar/text.h"
00037 #include "gwenhywfar/path.h"
00038 #include "i18n_l.h"
00039
00040 #include <stdlib.h>
00041 #include <assert.h>
00042 #include <string.h>
00043 #include <ctype.h>
00044
00045
00046
00047 GWEN_INHERIT_FUNCTIONS(GWEN_XML_CONTEXT)
00048
00049
00050
00051
00052 GWEN_XML_CONTEXT *GWEN_XmlCtx_new(uint32_t flags) {
00053 GWEN_XML_CONTEXT *ctx;
00054
00055 GWEN_NEW_OBJECT(GWEN_XML_CONTEXT, ctx);
00056 ctx->_refCount=1;
00057 GWEN_INHERIT_INIT(GWEN_XML_CONTEXT, ctx);
00058
00059 ctx->flags=flags;
00060
00061 return ctx;
00062 }
00063
00064
00065
00066 void GWEN_XmlCtx_free(GWEN_XML_CONTEXT *ctx) {
00067 if (ctx) {
00068 assert(ctx->_refCount);
00069 if (ctx->_refCount==1) {
00070 GWEN_INHERIT_FINI(GWEN_XML_CONTEXT, ctx);
00071 ctx->_refCount=0;
00072 GWEN_FREE_OBJECT(ctx);
00073 }
00074 else
00075 ctx->_refCount--;
00076 }
00077 }
00078
00079
00080
00081 void GWEN_XmlCtx_Attach(GWEN_XML_CONTEXT *ctx) {
00082 assert(ctx);
00083 assert(ctx->_refCount);
00084 ctx->_refCount++;
00085 }
00086
00087
00088
00089 uint32_t GWEN_XmlCtx_GetFlags(const GWEN_XML_CONTEXT *ctx) {
00090 assert(ctx);
00091 return ctx->flags;
00092 }
00093
00094
00095
00096 void GWEN_XmlCtx_SetFlags(GWEN_XML_CONTEXT *ctx, uint32_t f) {
00097 assert(ctx);
00098 ctx->flags=f;
00099 }
00100
00101
00102
00103 int GWEN_XmlCtx_GetDepth(const GWEN_XML_CONTEXT *ctx) {
00104 assert(ctx);
00105 return ctx->depth;
00106 }
00107
00108
00109
00110 void GWEN_XmlCtx_SetDepth(GWEN_XML_CONTEXT *ctx, int i) {
00111 assert(ctx);
00112 ctx->depth=i;
00113 }
00114
00115
00116
00117 void GWEN_XmlCtx_IncDepth(GWEN_XML_CONTEXT *ctx) {
00118 assert(ctx);
00119 ctx->depth++;
00120 }
00121
00122
00123
00124 int GWEN_XmlCtx_DecDepth(GWEN_XML_CONTEXT *ctx) {
00125 assert(ctx);
00126 if (ctx->depth<1)
00127 return -1;
00128 ctx->depth--;
00129 return 0;
00130 }
00131
00132
00133
00134 uint32_t GWEN_XmlCtx_GetFinishedElement(const GWEN_XML_CONTEXT *ctx) {
00135 assert(ctx);
00136 return ctx->finishedElements;
00137 }
00138
00139
00140
00141 void GWEN_XmlCtx_IncFinishedElement(GWEN_XML_CONTEXT *ctx) {
00142 assert(ctx);
00143 ctx->finishedElements++;
00144 }
00145
00146
00147
00148 void GWEN_XmlCtx_ResetFinishedElement(GWEN_XML_CONTEXT *ctx) {
00149 assert(ctx);
00150 ctx->finishedElements=0;
00151 }
00152
00153
00154
00155 void GWEN_XmlCtx_SetCurrentNode(GWEN_XML_CONTEXT *ctx, GWEN_XMLNODE *n) {
00156 assert(ctx);
00157 ctx->currentNode=n;
00158 }
00159
00160
00161
00162 GWEN_XMLNODE *GWEN_XmlCtx_GetCurrentNode(const GWEN_XML_CONTEXT *ctx) {
00163 assert(ctx);
00164 return ctx->currentNode;
00165 }
00166
00167
00168
00169 void GWEN_XmlCtx_SetCurrentHeader(GWEN_XML_CONTEXT *ctx, GWEN_XMLNODE *n) {
00170 assert(ctx);
00171 ctx->currentHeader=n;
00172 }
00173
00174
00175
00176 GWEN_XMLNODE *GWEN_XmlCtx_GetCurrentHeader(const GWEN_XML_CONTEXT *ctx) {
00177 assert(ctx);
00178 return ctx->currentHeader;
00179 }
00180
00181
00182
00183 GWEN_XMLCTX_STARTTAG_FN GWEN_XmlCtx_SetStartTagFn(GWEN_XML_CONTEXT *ctx,
00184 GWEN_XMLCTX_STARTTAG_FN f){
00185 GWEN_XMLCTX_STARTTAG_FN of;
00186
00187 assert(ctx);
00188 of=ctx->startTagFn;
00189 ctx->startTagFn=f;
00190 return of;
00191 }
00192
00193
00194
00195 GWEN_XMLCTX_ENDTAG_FN GWEN_XmlCtx_SetEndTagFn(GWEN_XML_CONTEXT *ctx,
00196 GWEN_XMLCTX_ENDTAG_FN f) {
00197 GWEN_XMLCTX_ENDTAG_FN of;
00198
00199 assert(ctx);
00200 of=ctx->endTagFn;
00201 ctx->endTagFn=f;
00202 return of;
00203 }
00204
00205
00206
00207 GWEN_XMLCTX_ADDDATA_FN GWEN_XmlCtx_SetAddDataFn(GWEN_XML_CONTEXT *ctx,
00208 GWEN_XMLCTX_ADDDATA_FN f) {
00209 GWEN_XMLCTX_ADDDATA_FN of;
00210
00211 assert(ctx);
00212 of=ctx->addDataFn;
00213 ctx->addDataFn=f;
00214 return of;
00215 }
00216
00217
00218
00219 GWEN_XMLCTX_ADDATTR_FN GWEN_XmlCtx_SetAddAttrFn(GWEN_XML_CONTEXT *ctx,
00220 GWEN_XMLCTX_ADDATTR_FN f) {
00221 GWEN_XMLCTX_ADDATTR_FN of;
00222
00223 assert(ctx);
00224 of=ctx->addAttrFn;
00225 ctx->addAttrFn=f;
00226 return of;
00227 }
00228
00229
00230
00231 GWEN_XMLCTX_ADDCOMMENT_FN
00232 GWEN_XmlCtx_SetAddCommentFn(GWEN_XML_CONTEXT *ctx,
00233 GWEN_XMLCTX_ADDCOMMENT_FN f) {
00234 GWEN_XMLCTX_ADDCOMMENT_FN of;
00235
00236 assert(ctx);
00237 of=ctx->addCommentFn;
00238 ctx->addCommentFn=f;
00239 return of;
00240 }
00241
00242
00243
00244
00245 int GWEN_XmlCtx_StartTag(GWEN_XML_CONTEXT *ctx, const char *tagName) {
00246 assert(ctx);
00247
00248 if (ctx->startTagFn)
00249 return ctx->startTagFn(ctx, tagName);
00250 else {
00251 DBG_INFO(GWEN_LOGDOMAIN, "Starting tag: [%s]", tagName);
00252 return 0;
00253 }
00254 }
00255
00256
00257
00258 int GWEN_XmlCtx_EndTag(GWEN_XML_CONTEXT *ctx, int closing) {
00259 assert(ctx);
00260
00261 if (ctx->endTagFn)
00262 return ctx->endTagFn(ctx, closing);
00263 else {
00264 DBG_INFO(GWEN_LOGDOMAIN, "Ending tag (%s)", closing?"closing":"not closing");
00265 return 0;
00266 }
00267 }
00268
00269
00270
00271 int GWEN_XmlCtx_AddData(GWEN_XML_CONTEXT *ctx, const char *data) {
00272 assert(ctx);
00273
00274 if (ctx->addDataFn)
00275 return ctx->addDataFn(ctx, data);
00276 else {
00277 DBG_INFO(GWEN_LOGDOMAIN, "Adding data: [%s]", data);
00278 return 0;
00279 }
00280 }
00281
00282
00283
00284 int GWEN_XmlCtx_AddComment(GWEN_XML_CONTEXT *ctx, const char *data) {
00285 assert(ctx);
00286
00287 if (ctx->addCommentFn)
00288 return ctx->addCommentFn(ctx, data);
00289 else {
00290 DBG_INFO(GWEN_LOGDOMAIN, "Adding comment: [%s]", data);
00291 return 0;
00292 }
00293 }
00294
00295
00296
00297 int GWEN_XmlCtx_AddAttr(GWEN_XML_CONTEXT *ctx,
00298 const char *attrName,
00299 const char *attrData) {
00300 assert(ctx);
00301
00302 if (ctx->addAttrFn)
00303 return ctx->addAttrFn(ctx, attrName, attrData);
00304 else {
00305 DBG_INFO(GWEN_LOGDOMAIN, "Adding attribute: [%s]=[%s]",
00306 attrName, attrData);
00307 return 0;
00308 }
00309 }
00310
00311
00312
00313
00314
00315
00316
00317
00318 GWEN_XML_CONTEXT *GWEN_XmlCtxStore_new(GWEN_XMLNODE *n, uint32_t flags) {
00319 GWEN_XML_CONTEXT *ctx;
00320
00321 ctx=GWEN_XmlCtx_new(flags);
00322 assert(ctx);
00323
00324 GWEN_XmlCtx_SetCurrentNode(ctx, n);
00325
00326 GWEN_XmlCtx_SetStartTagFn(ctx, GWEN_XmlCtxStore_StartTag);
00327 GWEN_XmlCtx_SetEndTagFn(ctx, GWEN_XmlCtxStore_EndTag);
00328 GWEN_XmlCtx_SetAddDataFn(ctx, GWEN_XmlCtxStore_AddData);
00329 GWEN_XmlCtx_SetAddCommentFn(ctx, GWEN_XmlCtxStore_AddComment);
00330 GWEN_XmlCtx_SetAddAttrFn(ctx, GWEN_XmlCtxStore_AddAttr);
00331
00332 return ctx;
00333 }
00334
00335
00336
00337 int GWEN_XmlCtxStore_StartTag(GWEN_XML_CONTEXT *ctx, const char *tagName) {
00338 GWEN_XMLNODE *currNode;
00339 GWEN_XMLNODE *newNode;
00340
00341 currNode=GWEN_XmlCtx_GetCurrentNode(ctx);
00342 if (currNode==NULL)
00343 return GWEN_ERROR_INVALID;
00344
00345 if (*tagName=='?' && (GWEN_XmlCtx_GetFlags(ctx) & GWEN_XML_FLAGS_HANDLE_HEADERS)) {
00346 newNode=GWEN_XMLNode_new(GWEN_XMLNodeTypeTag, tagName);
00347 assert(newNode);
00348 DBG_VERBOUS(GWEN_LOGDOMAIN, "Adding header [%s] to [%s]",
00349 GWEN_XMLNode_GetData(newNode),
00350 GWEN_XMLNode_GetData(currNode));
00351 GWEN_XMLNode_AddHeader(currNode, newNode);
00352 GWEN_XmlCtx_SetCurrentHeader(ctx, newNode);
00353 }
00354 else if (strcasecmp(tagName, "!DOCTYPE")==0) {
00355 newNode=GWEN_XMLNode_new(GWEN_XMLNodeTypeTag, tagName);
00356 assert(newNode);
00357 DBG_VERBOUS(GWEN_LOGDOMAIN, "Adding header [%s] to [%s]",
00358 GWEN_XMLNode_GetData(newNode),
00359 GWEN_XMLNode_GetData(currNode));
00360 GWEN_XMLNode_AddHeader(currNode, newNode);
00361 GWEN_XmlCtx_SetCurrentHeader(ctx, newNode);
00362 }
00363 else if (*tagName=='/') {
00364 const char *s;
00365
00366 tagName++;
00367 DBG_VERBOUS(GWEN_LOGDOMAIN, "Finishing tag [%s]", tagName);
00368 s=GWEN_XMLNode_GetData(currNode);
00369 if (s==NULL) {
00370 DBG_INFO(GWEN_LOGDOMAIN, "Current node tag has no name");
00371 return GWEN_ERROR_BAD_DATA;
00372 }
00373
00374 if (strcasecmp(s, tagName)!=0) {
00375 if (!(GWEN_XmlCtx_GetFlags(ctx) & GWEN_XML_FLAGS_TOLERANT_ENDTAGS)) {
00376 DBG_INFO(GWEN_LOGDOMAIN,
00377 "Endtag does not match curent tag (%s != %s)", s, tagName);
00378 return GWEN_ERROR_BAD_DATA;
00379 }
00380 else {
00381 newNode=currNode;
00382
00383 while( (newNode=GWEN_XMLNode_GetParent(newNode)) ) {
00384 GWEN_XmlCtx_DecDepth(ctx);
00385 s=GWEN_XMLNode_GetData(newNode);
00386 if (strcasecmp(s, tagName)==0)
00387 break;
00388 }
00389 if (newNode)
00390 newNode=GWEN_XMLNode_GetParent(newNode);
00391 if (newNode) {
00392 GWEN_XmlCtx_SetCurrentNode(ctx, newNode);
00393 GWEN_XmlCtx_DecDepth(ctx);
00394 }
00395 else {
00396 DBG_INFO(GWEN_LOGDOMAIN, "No matching parent node for [%s]",
00397 tagName);
00398 return GWEN_ERROR_BAD_DATA;
00399 }
00400 }
00401 }
00402 else {
00403 newNode=GWEN_XMLNode_GetParent(currNode);
00404 if (newNode==NULL) {
00405 DBG_INFO(GWEN_LOGDOMAIN, "No parent node at [%s]", tagName);
00406 return GWEN_ERROR_BAD_DATA;
00407 }
00408 GWEN_XmlCtx_SetCurrentNode(ctx, newNode);
00409 GWEN_XmlCtx_DecDepth(ctx);
00410 }
00411
00412 GWEN_XmlCtx_IncFinishedElement(ctx);
00413 }
00414 else {
00415 newNode=GWEN_XMLNode_new(GWEN_XMLNodeTypeTag, tagName);
00416 assert(newNode);
00417 GWEN_XMLNode_AddChild(currNode, newNode);
00418 GWEN_XmlCtx_SetCurrentNode(ctx, newNode);
00419 GWEN_XmlCtx_IncDepth(ctx);
00420 DBG_VERBOUS(GWEN_LOGDOMAIN, "Starting tag [%s]", tagName);
00421 }
00422
00423 return 0;
00424 }
00425
00426
00427
00428 int GWEN_XmlCtxStore_EndTag(GWEN_XML_CONTEXT *ctx, int closing) {
00429 GWEN_XMLNODE *currNode;
00430
00431 currNode=GWEN_XmlCtx_GetCurrentHeader(ctx);
00432 if (currNode) {
00433 DBG_VERBOUS(GWEN_LOGDOMAIN, "Ending header [%s]", GWEN_XMLNode_GetData(currNode));
00434 GWEN_XmlCtx_SetCurrentHeader(ctx, NULL);
00435 }
00436 else {
00437 currNode=GWEN_XmlCtx_GetCurrentNode(ctx);
00438 if (currNode==NULL)
00439 return GWEN_ERROR_INVALID;
00440 DBG_VERBOUS(GWEN_LOGDOMAIN, "Ending tag [%s] (%s)",
00441 GWEN_XMLNode_GetData(currNode),
00442 closing?"closing":"not closing");
00443
00444 if (closing) {
00445 GWEN_XMLNODE *newNode;
00446
00447 newNode=GWEN_XMLNode_GetParent(currNode);
00448 if (newNode==NULL) {
00449 DBG_INFO(GWEN_LOGDOMAIN, "No parent node at [%s]", GWEN_XMLNode_GetData(currNode));
00450 return GWEN_ERROR_BAD_DATA;
00451 }
00452 GWEN_XmlCtx_SetCurrentNode(ctx, newNode);
00453
00454 GWEN_XmlCtx_DecDepth(ctx);
00455 GWEN_XmlCtx_IncFinishedElement(ctx);
00456 }
00457 }
00458
00459 return 0;
00460 }
00461
00462
00463
00464 int GWEN_XmlCtxStore_AddData(GWEN_XML_CONTEXT *ctx, const char *data) {
00465 GWEN_XMLNODE *currNode;
00466 GWEN_BUFFER *buf;
00467 uint32_t flags;
00468
00469 flags=GWEN_XmlCtx_GetFlags(ctx);
00470 currNode=GWEN_XmlCtx_GetCurrentNode(ctx);
00471 if (currNode==NULL)
00472 return GWEN_ERROR_INVALID;
00473
00474 buf=GWEN_Buffer_new(0, 64, 0, 1);
00475 if (GWEN_Text_UnescapeXmlToBuffer(data, buf)) {
00476 GWEN_Buffer_free(buf);
00477 DBG_INFO(GWEN_LOGDOMAIN, "here");
00478 return GWEN_ERROR_BAD_DATA;
00479 }
00480
00481 if (!(flags & GWEN_XML_FLAGS_NO_CONDENSE) ||
00482 (flags & GWEN_XML_FLAGS_KEEP_CNTRL) ||
00483 (flags & GWEN_XML_FLAGS_KEEP_BLANKS)) {
00484 const uint8_t *p;
00485 uint8_t *dst;
00486 uint8_t *src;
00487 unsigned int size;
00488 unsigned int i;
00489 int lastWasBlank;
00490 uint8_t *lastBlankPos;
00491 uint32_t bStart=0;
00492
00493 dst=(uint8_t*)GWEN_Buffer_GetStart(buf);
00494 src=dst;
00495 if (!(flags & GWEN_XML_FLAGS_KEEP_BLANKS)) {
00496 if (flags & GWEN_XML_FLAGS_KEEP_CNTRL) {
00497 while(*src && (*src==32 || *src==9))
00498 src++;
00499 }
00500 else {
00501 while(*src && *src<33)
00502 src++;
00503 }
00504 }
00505
00506 p=src;
00507 bStart=src-((uint8_t*)GWEN_Buffer_GetStart(buf));
00508 size=GWEN_Buffer_GetUsedBytes(buf)-bStart;
00509 lastWasBlank=0;
00510 lastBlankPos=0;
00511
00512 for (i=0; i<size; i++) {
00513 uint8_t c;
00514
00515 c=*p;
00516 if (!(flags & GWEN_XML_FLAGS_KEEP_CNTRL) && c<32)
00517 c=32;
00518
00519
00520 if (!(flags & GWEN_XML_FLAGS_NO_CONDENSE) && c==32) {
00521 if (!lastWasBlank) {
00522
00523 lastWasBlank=1;
00524 lastBlankPos=dst;
00525 *(dst++)=c;
00526 }
00527 }
00528 else {
00529 lastWasBlank=0;
00530 lastBlankPos=0;
00531 *(dst++)=c;
00532 }
00533 p++;
00534 }
00535
00536
00537 if (lastBlankPos!=0)
00538 dst=lastBlankPos;
00539
00540 size=dst-(uint8_t*)GWEN_Buffer_GetStart(buf);
00541 GWEN_Buffer_Crop(buf, 0, size);
00542 }
00543
00544 if (GWEN_Buffer_GetUsedBytes(buf)) {
00545 GWEN_XMLNODE *newNode;
00546
00547 newNode=GWEN_XMLNode_new(GWEN_XMLNodeTypeData, GWEN_Buffer_GetStart(buf));
00548 assert(newNode);
00549 GWEN_XMLNode_AddChild(currNode, newNode);
00550 DBG_VERBOUS(GWEN_LOGDOMAIN, "Setting this data: [%s]", GWEN_Buffer_GetStart(buf));
00551 }
00552 GWEN_Buffer_free(buf);
00553
00554 return 0;
00555 }
00556
00557
00558
00559 int GWEN_XmlCtxStore_AddComment(GWEN_UNUSED GWEN_XML_CONTEXT *ctx, GWEN_UNUSED const char *data) {
00560 return 0;
00561 }
00562
00563
00564
00565 int GWEN_XmlCtxStore_AddAttr(GWEN_XML_CONTEXT *ctx,
00566 const char *attrName,
00567 const char *attrData) {
00568 GWEN_XMLNODE *currNode;
00569
00570 currNode=GWEN_XmlCtx_GetCurrentHeader(ctx);
00571 if (currNode) {
00572 DBG_VERBOUS(GWEN_LOGDOMAIN, "Setting attribute of header [%s]: [%s]=[%s]",
00573 GWEN_XMLNode_GetData(currNode), attrName, attrData);
00574 GWEN_XMLNode_SetProperty(currNode, attrName, attrData);
00575 }
00576 else {
00577 int isNormalProperty=1;
00578
00579 currNode=GWEN_XmlCtx_GetCurrentNode(ctx);
00580 if (currNode==NULL)
00581 return GWEN_ERROR_INVALID;
00582 if (attrData==NULL)
00583 attrData="";
00584
00585 if (ctx->flags & GWEN_XML_FLAGS_HANDLE_NAMESPACES) {
00586 if (strcasecmp(attrName, "xmlns")==0) {
00587 GWEN_XMLNODE_NAMESPACE *ns;
00588
00589 DBG_VERBOUS(GWEN_LOGDOMAIN, "Adding namespace [%s] to node [%s]",
00590 attrData, GWEN_XMLNode_GetData(currNode));
00591 ns=GWEN_XMLNode_NameSpace_new("", attrData);
00592 GWEN_XMLNode_AddNameSpace(currNode, ns);
00593 GWEN_XMLNode_NameSpace_free(ns);
00594 isNormalProperty=0;
00595 }
00596 else if (strncasecmp(attrName, "xmlns:", 6)==0) {
00597 const char *name;
00598
00599 name=strchr(attrName, ':');
00600 if (name) {
00601 name++;
00602 if (*name) {
00603 GWEN_XMLNODE_NAMESPACE *ns;
00604
00605 DBG_VERBOUS(GWEN_LOGDOMAIN, "Adding namespace [%s]=[%s]",
00606 name, attrData);
00607 ns=GWEN_XMLNode_NameSpace_new(name, attrData);
00608 GWEN_XMLNode_AddNameSpace(currNode, ns);
00609 GWEN_XMLNode_NameSpace_free(ns);
00610 isNormalProperty=0;
00611 }
00612 }
00613 }
00614 }
00615
00616 if (isNormalProperty) {
00617 GWEN_BUFFER *buf;
00618
00619 DBG_VERBOUS(GWEN_LOGDOMAIN, "Setting attribute of tag [%s]: [%s]=[%s]",
00620 GWEN_XMLNode_GetData(currNode), attrName, attrData);
00621 buf=GWEN_Buffer_new(0, 64, 0, 1);
00622 if (GWEN_Text_UnescapeXmlToBuffer(attrData, buf)) {
00623 GWEN_Buffer_free(buf);
00624 DBG_INFO(GWEN_LOGDOMAIN, "here");
00625 return GWEN_ERROR_BAD_DATA;
00626 }
00627 GWEN_XMLNode_SetProperty(currNode, attrName, GWEN_Buffer_GetStart(buf));
00628 GWEN_Buffer_free(buf);
00629 }
00630 }
00631
00632 return 0;
00633 }
00634
00635
00636
00637
00638
00639