Package CedarBackup2 :: Module xmlutil
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.xmlutil

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2006,2010 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # Portions Copyright (c) 2000 Fourthought Inc, USA. 
 15  # All Rights Reserved. 
 16  # 
 17  # This program is free software; you can redistribute it and/or 
 18  # modify it under the terms of the GNU General Public License, 
 19  # Version 2, as published by the Free Software Foundation. 
 20  # 
 21  # This program is distributed in the hope that it will be useful, 
 22  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 23  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 24  # 
 25  # Copies of the GNU General Public License are available from 
 26  # the Free Software Foundation website, http://www.gnu.org/. 
 27  # 
 28  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 29  # 
 30  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 31  # Language : Python (>= 2.5) 
 32  # Project  : Cedar Backup, release 2 
 33  # Revision : $Id: xmlutil.py 1006 2010-07-07 21:03:57Z pronovic $ 
 34  # Purpose  : Provides general XML-related functionality. 
 35  # 
 36  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 37   
 38  ######################################################################## 
 39  # Module documentation 
 40  ######################################################################## 
 41   
 42  """ 
 43  Provides general XML-related functionality. 
 44   
 45  What I'm trying to do here is abstract much of the functionality that directly 
 46  accesses the DOM tree.  This is not so much to "protect" the other code from 
 47  the DOM, but to standardize the way it's used.  It will also help extension 
 48  authors write code that easily looks more like the rest of Cedar Backup. 
 49   
 50  @sort: createInputDom, createOutputDom, serializeDom, isElement, readChildren,  
 51         readFirstChild, readStringList, readString, readInteger, readBoolean, 
 52         addContainerNode, addStringNode, addIntegerNode, addBooleanNode, 
 53         TRUE_BOOLEAN_VALUES, FALSE_BOOLEAN_VALUES, VALID_BOOLEAN_VALUES 
 54   
 55  @var TRUE_BOOLEAN_VALUES: List of boolean values in XML representing C{True}. 
 56  @var FALSE_BOOLEAN_VALUES: List of boolean values in XML representing C{False}. 
 57  @var VALID_BOOLEAN_VALUES: List of valid boolean values in XML. 
 58   
 59  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 60  """ 
 61  # pylint: disable-msg=C0111,C0103,W0511,W0104 
 62   
 63  ######################################################################## 
 64  # Imported modules 
 65  ######################################################################## 
 66   
 67  # System modules 
 68  import sys 
 69  import re 
 70  import logging 
 71  import codecs 
 72  from types import UnicodeType 
 73  from StringIO import StringIO 
 74   
 75  # XML-related modules 
 76  from xml.parsers.expat import ExpatError 
 77  from xml.dom.minidom import Node 
 78  from xml.dom.minidom import getDOMImplementation 
 79  from xml.dom.minidom import parseString 
 80   
 81   
 82  ######################################################################## 
 83  # Module-wide constants and variables 
 84  ######################################################################## 
 85   
 86  logger = logging.getLogger("CedarBackup2.log.xml") 
 87   
 88  TRUE_BOOLEAN_VALUES   = [ "Y", "y", ] 
 89  FALSE_BOOLEAN_VALUES  = [ "N", "n", ] 
 90  VALID_BOOLEAN_VALUES  = TRUE_BOOLEAN_VALUES + FALSE_BOOLEAN_VALUES 
 91   
 92   
 93  ######################################################################## 
 94  # Functions for creating and parsing DOM trees 
 95  ######################################################################## 
 96   
97 -def createInputDom(xmlData, name="cb_config"):
98 """ 99 Creates a DOM tree based on reading an XML string. 100 @param name: Assumed base name of the document (root node name). 101 @return: Tuple (xmlDom, parentNode) for the parsed document 102 @raise ValueError: If the document can't be parsed. 103 """ 104 try: 105 xmlDom = parseString(xmlData) 106 parentNode = readFirstChild(xmlDom, name) 107 return (xmlDom, parentNode) 108 except (IOError, ExpatError), e: 109 raise ValueError("Unable to parse XML document: %s" % e)
110
111 -def createOutputDom(name="cb_config"):
112 """ 113 Creates a DOM tree used for writing an XML document. 114 @param name: Base name of the document (root node name). 115 @return: Tuple (xmlDom, parentNode) for the new document 116 """ 117 impl = getDOMImplementation() 118 xmlDom = impl.createDocument(None, name, None) 119 return (xmlDom, xmlDom.documentElement)
120 121 122 ######################################################################## 123 # Functions for reading values out of XML documents 124 ######################################################################## 125
126 -def isElement(node):
127 """ 128 Returns True or False depending on whether the XML node is an element node. 129 """ 130 return node.nodeType == Node.ELEMENT_NODE
131
132 -def readChildren(parent, name):
133 """ 134 Returns a list of nodes with a given name immediately beneath the 135 parent. 136 137 By "immediately beneath" the parent, we mean from among nodes that are 138 direct children of the passed-in parent node. 139 140 Underneath, we use the Python C{getElementsByTagName} method, which is 141 pretty cool, but which (surprisingly?) returns a list of all children 142 with a given name below the parent, at any level. We just prune that 143 list to include only children whose C{parentNode} matches the passed-in 144 parent. 145 146 @param parent: Parent node to search beneath. 147 @param name: Name of nodes to search for. 148 149 @return: List of child nodes with correct parent, or an empty list if 150 no matching nodes are found. 151 """ 152 lst = [] 153 if parent is not None: 154 result = parent.getElementsByTagName(name) 155 for entry in result: 156 if entry.parentNode is parent: 157 lst.append(entry) 158 return lst
159
160 -def readFirstChild(parent, name):
161 """ 162 Returns the first child with a given name immediately beneath the parent. 163 164 By "immediately beneath" the parent, we mean from among nodes that are 165 direct children of the passed-in parent node. 166 167 @param parent: Parent node to search beneath. 168 @param name: Name of node to search for. 169 170 @return: First properly-named child of parent, or C{None} if no matching nodes are found. 171 """ 172 result = readChildren(parent, name) 173 if result is None or result == []: 174 return None 175 return result[0]
176
177 -def readStringList(parent, name):
178 """ 179 Returns a list of the string contents associated with nodes with a given 180 name immediately beneath the parent. 181 182 By "immediately beneath" the parent, we mean from among nodes that are 183 direct children of the passed-in parent node. 184 185 First, we find all of the nodes using L{readChildren}, and then we 186 retrieve the "string contents" of each of those nodes. The returned list 187 has one entry per matching node. We assume that string contents of a 188 given node belong to the first C{TEXT_NODE} child of that node. Nodes 189 which have no C{TEXT_NODE} children are not represented in the returned 190 list. 191 192 @param parent: Parent node to search beneath. 193 @param name: Name of node to search for. 194 195 @return: List of strings as described above, or C{None} if no matching nodes are found. 196 """ 197 lst = [] 198 result = readChildren(parent, name) 199 for entry in result: 200 if entry.hasChildNodes(): 201 for child in entry.childNodes: 202 if child.nodeType == Node.TEXT_NODE: 203 lst.append(child.nodeValue) 204 break 205 if lst == []: 206 lst = None 207 return lst
208
209 -def readString(parent, name):
210 """ 211 Returns string contents of the first child with a given name immediately 212 beneath the parent. 213 214 By "immediately beneath" the parent, we mean from among nodes that are 215 direct children of the passed-in parent node. We assume that string 216 contents of a given node belong to the first C{TEXT_NODE} child of that 217 node. 218 219 @param parent: Parent node to search beneath. 220 @param name: Name of node to search for. 221 222 @return: String contents of node or C{None} if no matching nodes are found. 223 """ 224 result = readStringList(parent, name) 225 if result is None: 226 return None 227 return result[0]
228
229 -def readInteger(parent, name):
230 """ 231 Returns integer contents of the first child with a given name immediately 232 beneath the parent. 233 234 By "immediately beneath" the parent, we mean from among nodes that are 235 direct children of the passed-in parent node. 236 237 @param parent: Parent node to search beneath. 238 @param name: Name of node to search for. 239 240 @return: Integer contents of node or C{None} if no matching nodes are found. 241 @raise ValueError: If the string at the location can't be converted to an integer. 242 """ 243 result = readString(parent, name) 244 if result is None: 245 return None 246 else: 247 return int(result)
248
249 -def readFloat(parent, name):
250 """ 251 Returns float contents of the first child with a given name immediately 252 beneath the parent. 253 254 By "immediately beneath" the parent, we mean from among nodes that are 255 direct children of the passed-in parent node. 256 257 @param parent: Parent node to search beneath. 258 @param name: Name of node to search for. 259 260 @return: Float contents of node or C{None} if no matching nodes are found. 261 @raise ValueError: If the string at the location can't be converted to a 262 float value. 263 """ 264 result = readString(parent, name) 265 if result is None: 266 return None 267 else: 268 return float(result)
269
270 -def readBoolean(parent, name):
271 """ 272 Returns boolean contents of the first child with a given name immediately 273 beneath the parent. 274 275 By "immediately beneath" the parent, we mean from among nodes that are 276 direct children of the passed-in parent node. 277 278 The string value of the node must be one of the values in L{VALID_BOOLEAN_VALUES}. 279 280 @param parent: Parent node to search beneath. 281 @param name: Name of node to search for. 282 283 @return: Boolean contents of node or C{None} if no matching nodes are found. 284 @raise ValueError: If the string at the location can't be converted to a boolean. 285 """ 286 result = readString(parent, name) 287 if result is None: 288 return None 289 else: 290 if result in TRUE_BOOLEAN_VALUES: 291 return True 292 elif result in FALSE_BOOLEAN_VALUES: 293 return False 294 else: 295 raise ValueError("Boolean values must be one of %s." % VALID_BOOLEAN_VALUES)
296 297 298 ######################################################################## 299 # Functions for writing values into XML documents 300 ######################################################################## 301
302 -def addContainerNode(xmlDom, parentNode, nodeName):
303 """ 304 Adds a container node as the next child of a parent node. 305 306 @param xmlDom: DOM tree as from C{impl.createDocument()}. 307 @param parentNode: Parent node to create child for. 308 @param nodeName: Name of the new container node. 309 310 @return: Reference to the newly-created node. 311 """ 312 containerNode = xmlDom.createElement(nodeName) 313 parentNode.appendChild(containerNode) 314 return containerNode
315
316 -def addStringNode(xmlDom, parentNode, nodeName, nodeValue):
317 """ 318 Adds a text node as the next child of a parent, to contain a string. 319 320 If the C{nodeValue} is None, then the node will be created, but will be 321 empty (i.e. will contain no text node child). 322 323 @param xmlDom: DOM tree as from C{impl.createDocument()}. 324 @param parentNode: Parent node to create child for. 325 @param nodeName: Name of the new container node. 326 @param nodeValue: The value to put into the node. 327 328 @return: Reference to the newly-created node. 329 """ 330 containerNode = addContainerNode(xmlDom, parentNode, nodeName) 331 if nodeValue is not None: 332 textNode = xmlDom.createTextNode(nodeValue) 333 containerNode.appendChild(textNode) 334 return containerNode
335
336 -def addIntegerNode(xmlDom, parentNode, nodeName, nodeValue):
337 """ 338 Adds a text node as the next child of a parent, to contain an integer. 339 340 If the C{nodeValue} is None, then the node will be created, but will be 341 empty (i.e. will contain no text node child). 342 343 The integer will be converted to a string using "%d". The result will be 344 added to the document via L{addStringNode}. 345 346 @param xmlDom: DOM tree as from C{impl.createDocument()}. 347 @param parentNode: Parent node to create child for. 348 @param nodeName: Name of the new container node. 349 @param nodeValue: The value to put into the node. 350 351 @return: Reference to the newly-created node. 352 """ 353 if nodeValue is None: 354 return addStringNode(xmlDom, parentNode, nodeName, None) 355 else: 356 return addStringNode(xmlDom, parentNode, nodeName, "%d" % nodeValue)
357
358 -def addBooleanNode(xmlDom, parentNode, nodeName, nodeValue):
359 """ 360 Adds a text node as the next child of a parent, to contain a boolean. 361 362 If the C{nodeValue} is None, then the node will be created, but will be 363 empty (i.e. will contain no text node child). 364 365 Boolean C{True}, or anything else interpreted as C{True} by Python, will 366 be converted to a string "Y". Anything else will be converted to a 367 string "N". The result is added to the document via L{addStringNode}. 368 369 @param xmlDom: DOM tree as from C{impl.createDocument()}. 370 @param parentNode: Parent node to create child for. 371 @param nodeName: Name of the new container node. 372 @param nodeValue: The value to put into the node. 373 374 @return: Reference to the newly-created node. 375 """ 376 if nodeValue is None: 377 return addStringNode(xmlDom, parentNode, nodeName, None) 378 else: 379 if nodeValue: 380 return addStringNode(xmlDom, parentNode, nodeName, "Y") 381 else: 382 return addStringNode(xmlDom, parentNode, nodeName, "N")
383 384 385 ######################################################################## 386 # Functions for serializing DOM trees 387 ######################################################################## 388
389 -def serializeDom(xmlDom, indent=3):
390 """ 391 Serializes a DOM tree and returns the result in a string. 392 @param xmlDom: XML DOM tree to serialize 393 @param indent: Number of spaces to indent, as an integer 394 @return: String form of DOM tree, pretty-printed. 395 """ 396 xmlBuffer = StringIO() 397 serializer = Serializer(xmlBuffer, "UTF-8", indent=indent) 398 serializer.serialize(xmlDom) 399 xmlData = xmlBuffer.getvalue() 400 xmlBuffer.close() 401 return xmlData
402
403 -class Serializer(object):
404 405 """ 406 XML serializer class. 407 408 This is a customized serializer that I hacked together based on what I found 409 in the PyXML distribution. Basically, around release 2.7.0, the only reason 410 I still had around a dependency on PyXML was for the PrettyPrint 411 functionality, and that seemed pointless. So, I stripped the PrettyPrint 412 code out of PyXML and hacked bits of it off until it did just what I needed 413 and no more. 414 415 This code started out being called PrintVisitor, but I decided it makes more 416 sense just calling it a serializer. I've made nearly all of the methods 417 private, and I've added a new high-level serialize() method rather than 418 having clients call C{visit()}. 419 420 Anyway, as a consequence of my hacking with it, this can't quite be called a 421 complete XML serializer any more. I ripped out support for HTML and XHTML, 422 and there is also no longer any support for namespaces (which I took out 423 because this dragged along a lot of extra code, and Cedar Backup doesn't use 424 namespaces). However, everything else should pretty much work as expected. 425 426 @copyright: This code, prior to customization, was part of the PyXML 427 codebase, and before that was part of the 4DOM suite developed by 428 Fourthought, Inc. It its original form, it was Copyright (c) 2000 429 Fourthought Inc, USA; All Rights Reserved. 430 """ 431
432 - def __init__(self, stream=sys.stdout, encoding="UTF-8", indent=3):
433 """ 434 Initialize a serializer. 435 @param stream: Stream to write output to. 436 @param encoding: Output encoding. 437 @param indent: Number of spaces to indent, as an integer 438 """ 439 self.stream = stream 440 self.encoding = encoding 441 self._indent = indent * " " 442 self._depth = 0 443 self._inText = 0
444
445 - def serialize(self, xmlDom):
446 """ 447 Serialize the passed-in XML document. 448 @param xmlDom: XML DOM tree to serialize 449 @raise ValueError: If there's an unknown node type in the document. 450 """ 451 self._visit(xmlDom) 452 self.stream.write("\n")
453
454 - def _write(self, text):
455 obj = _encodeText(text, self.encoding) 456 self.stream.write(obj) 457 return
458
459 - def _tryIndent(self):
460 if not self._inText and self._indent: 461 self._write('\n' + self._indent*self._depth) 462 return
463
464 - def _visit(self, node):
465 """ 466 @raise ValueError: If there's an unknown node type in the document. 467 """ 468 if node.nodeType == Node.ELEMENT_NODE: 469 return self._visitElement(node) 470 471 elif node.nodeType == Node.ATTRIBUTE_NODE: 472 return self._visitAttr(node) 473 474 elif node.nodeType == Node.TEXT_NODE: 475 return self._visitText(node) 476 477 elif node.nodeType == Node.CDATA_SECTION_NODE: 478 return self._visitCDATASection(node) 479 480 elif node.nodeType == Node.ENTITY_REFERENCE_NODE: 481 return self._visitEntityReference(node) 482 483 elif node.nodeType == Node.ENTITY_NODE: 484 return self._visitEntity(node) 485 486 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 487 return self._visitProcessingInstruction(node) 488 489 elif node.nodeType == Node.COMMENT_NODE: 490 return self._visitComment(node) 491 492 elif node.nodeType == Node.DOCUMENT_NODE: 493 return self._visitDocument(node) 494 495 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 496 return self._visitDocumentType(node) 497 498 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 499 return self._visitDocumentFragment(node) 500 501 elif node.nodeType == Node.NOTATION_NODE: 502 return self._visitNotation(node) 503 504 # It has a node type, but we don't know how to handle it 505 raise ValueError("Unknown node type: %s" % repr(node))
506
507 - def _visitNodeList(self, node, exclude=None):
508 for curr in node: 509 curr is not exclude and self._visit(curr) 510 return
511
512 - def _visitNamedNodeMap(self, node):
513 for item in node.values(): 514 self._visit(item) 515 return
516
517 - def _visitAttr(self, node):
518 self._write(' ' + node.name) 519 value = node.value 520 text = _translateCDATA(value, self.encoding) 521 text, delimiter = _translateCDATAAttr(text) 522 self.stream.write("=%s%s%s" % (delimiter, text, delimiter)) 523 return
524
525 - def _visitProlog(self):
526 self._write("<?xml version='1.0' encoding='%s'?>" % (self.encoding or 'utf-8')) 527 self._inText = 0 528 return
529
530 - def _visitDocument(self, node):
531 self._visitProlog() 532 node.doctype and self._visitDocumentType(node.doctype) 533 self._visitNodeList(node.childNodes, exclude=node.doctype) 534 return
535
536 - def _visitDocumentFragment(self, node):
537 self._visitNodeList(node.childNodes) 538 return
539
540 - def _visitElement(self, node):
541 self._tryIndent() 542 self._write('<%s' % node.tagName) 543 for attr in node.attributes.values(): 544 self._visitAttr(attr) 545 if len(node.childNodes): 546 self._write('>') 547 self._depth = self._depth + 1 548 self._visitNodeList(node.childNodes) 549 self._depth = self._depth - 1 550 not (self._inText) and self._tryIndent() 551 self._write('</%s>' % node.tagName) 552 else: 553 self._write('/>') 554 self._inText = 0 555 return
556
557 - def _visitText(self, node):
558 text = node.data 559 if self._indent: 560 text.strip() 561 if text: 562 text = _translateCDATA(text, self.encoding) 563 self.stream.write(text) 564 self._inText = 1 565 return
566
567 - def _visitDocumentType(self, doctype):
568 if not doctype.systemId and not doctype.publicId: return 569 self._tryIndent() 570 self._write('<!DOCTYPE %s' % doctype.name) 571 if doctype.systemId and '"' in doctype.systemId: 572 system = "'%s'" % doctype.systemId 573 else: 574 system = '"%s"' % doctype.systemId 575 if doctype.publicId and '"' in doctype.publicId: 576 # We should probably throw an error 577 # Valid characters: <space> | <newline> | <linefeed> | 578 # [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 579 public = "'%s'" % doctype.publicId 580 else: 581 public = '"%s"' % doctype.publicId 582 if doctype.publicId and doctype.systemId: 583 self._write(' PUBLIC %s %s' % (public, system)) 584 elif doctype.systemId: 585 self._write(' SYSTEM %s' % system) 586 if doctype.entities or doctype.notations: 587 self._write(' [') 588 self._depth = self._depth + 1 589 self._visitNamedNodeMap(doctype.entities) 590 self._visitNamedNodeMap(doctype.notations) 591 self._depth = self._depth - 1 592 self._tryIndent() 593 self._write(']>') 594 else: 595 self._write('>') 596 self._inText = 0 597 return
598
599 - def _visitEntity(self, node):
600 """Visited from a NamedNodeMap in DocumentType""" 601 self._tryIndent() 602 self._write('<!ENTITY %s' % (node.nodeName)) 603 node.publicId and self._write(' PUBLIC %s' % node.publicId) 604 node.systemId and self._write(' SYSTEM %s' % node.systemId) 605 node.notationName and self._write(' NDATA %s' % node.notationName) 606 self._write('>') 607 return
608
609 - def _visitNotation(self, node):
610 """Visited from a NamedNodeMap in DocumentType""" 611 self._tryIndent() 612 self._write('<!NOTATION %s' % node.nodeName) 613 node.publicId and self._write(' PUBLIC %s' % node.publicId) 614 node.systemId and self._write(' SYSTEM %s' % node.systemId) 615 self._write('>') 616 return
617
618 - def _visitCDATASection(self, node):
619 self._tryIndent() 620 self._write('<![CDATA[%s]]>' % (node.data)) 621 self._inText = 0 622 return
623
624 - def _visitComment(self, node):
625 self._tryIndent() 626 self._write('<!--%s-->' % (node.data)) 627 self._inText = 0 628 return
629
630 - def _visitEntityReference(self, node):
631 self._write('&%s;' % node.nodeName) 632 self._inText = 1 633 return
634
635 - def _visitProcessingInstruction(self, node):
636 self._tryIndent() 637 self._write('<?%s %s?>' % (node.target, node.data)) 638 self._inText = 0 639 return
640
641 -def _encodeText(text, encoding):
642 """ 643 @copyright: This code, prior to customization, was part of the PyXML 644 codebase, and before that was part of the 4DOM suite developed by 645 Fourthought, Inc. It its original form, it was attributed to Martin v. 646 Löwis and was Copyright (c) 2000 Fourthought Inc, USA; All Rights Reserved. 647 """ 648 encoder = codecs.lookup(encoding)[0] # encode,decode,reader,writer 649 if type(text) is not UnicodeType: 650 text = unicode(text, "utf-8") 651 return encoder(text)[0] # result,size
652
653 -def _translateCDATAAttr(characters):
654 """ 655 Handles normalization and some intelligence about quoting. 656 657 @copyright: This code, prior to customization, was part of the PyXML 658 codebase, and before that was part of the 4DOM suite developed by 659 Fourthought, Inc. It its original form, it was Copyright (c) 2000 660 Fourthought Inc, USA; All Rights Reserved. 661 """ 662 if not characters: 663 return '', "'" 664 if "'" in characters: 665 delimiter = '"' 666 new_chars = re.sub('"', '&quot;', characters) 667 else: 668 delimiter = "'" 669 new_chars = re.sub("'", '&apos;', characters) 670 #FIXME: There's more to normalization 671 #Convert attribute new-lines to character entity 672 # characters is possibly shorter than new_chars (no entities) 673 if "\n" in characters: 674 new_chars = re.sub('\n', '&#10;', new_chars) 675 return new_chars, delimiter
676 677 #Note: Unicode object only for now
678 -def _translateCDATA(characters, encoding='UTF-8', prev_chars='', markupSafe=0):
679 """ 680 @copyright: This code, prior to customization, was part of the PyXML 681 codebase, and before that was part of the 4DOM suite developed by 682 Fourthought, Inc. It its original form, it was Copyright (c) 2000 683 Fourthought Inc, USA; All Rights Reserved. 684 """ 685 CDATA_CHAR_PATTERN = re.compile('[&<]|]]>') 686 CHAR_TO_ENTITY = { '&': '&amp;', '<': '&lt;', ']]>': ']]&gt;', } 687 ILLEGAL_LOW_CHARS = '[\x01-\x08\x0B-\x0C\x0E-\x1F]' 688 ILLEGAL_HIGH_CHARS = '\xEF\xBF[\xBE\xBF]' 689 XML_ILLEGAL_CHAR_PATTERN = re.compile('%s|%s'%(ILLEGAL_LOW_CHARS, ILLEGAL_HIGH_CHARS)) 690 if not characters: 691 return '' 692 if not markupSafe: 693 if CDATA_CHAR_PATTERN.search(characters): 694 new_string = CDATA_CHAR_PATTERN.subn(lambda m, d=CHAR_TO_ENTITY: d[m.group()], characters)[0] 695 else: 696 new_string = characters 697 if prev_chars[-2:] == ']]' and characters[0] == '>': 698 new_string = '&gt;' + new_string[1:] 699 else: 700 new_string = characters 701 #Note: use decimal char entity rep because some browsers are broken 702 #FIXME: This will bomb for high characters. Should, for instance, detect 703 #The UTF-8 for 0xFFFE and put out &#xFFFE; 704 if XML_ILLEGAL_CHAR_PATTERN.search(new_string): 705 new_string = XML_ILLEGAL_CHAR_PATTERN.subn(lambda m: '&#%i;' % ord(m.group()), new_string)[0] 706 new_string = _encodeText(new_string, encoding) 707 return new_string
708