Package CedarBackup2 :: Package extend :: Module split
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.extend.split

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2007,2010 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python (>= 2.5) 
 29  # Project  : Official Cedar Backup Extensions 
 30  # Revision : $Id: split.py 1006 2010-07-07 21:03:57Z pronovic $ 
 31  # Purpose  : Provides an extension to split up large files in staging directories. 
 32  # 
 33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 34   
 35  ######################################################################## 
 36  # Module documentation 
 37  ######################################################################## 
 38   
 39  """ 
 40  Provides an extension to split up large files in staging directories. 
 41   
 42  When this extension is executed, it will look through the configured Cedar 
 43  Backup staging directory for files exceeding a specified size limit, and split 
 44  them down into smaller files using the 'split' utility.  Any directory which 
 45  has already been split (as indicated by the C{cback.split} file) will be 
 46  ignored. 
 47   
 48  This extension requires a new configuration section <split> and is intended 
 49  to be run immediately after the standard stage action or immediately before the 
 50  standard store action.  Aside from its own configuration, it requires the 
 51  options and staging configuration sections in the standard Cedar Backup 
 52  configuration file. 
 53   
 54  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 55  """ 
 56   
 57  ######################################################################## 
 58  # Imported modules 
 59  ######################################################################## 
 60   
 61  # System modules 
 62  import os 
 63  import re 
 64  import logging 
 65   
 66  # Cedar Backup modules 
 67  from CedarBackup2.util import resolveCommand, executeCommand, changeOwnership 
 68  from CedarBackup2.xmlutil import createInputDom, addContainerNode 
 69  from CedarBackup2.xmlutil import readFirstChild 
 70  from CedarBackup2.actions.util import findDailyDirs, writeIndicatorFile, getBackupFiles 
 71  from CedarBackup2.config import ByteQuantity, readByteQuantity, addByteQuantityNode 
 72   
 73   
 74  ######################################################################## 
 75  # Module-wide constants and variables 
 76  ######################################################################## 
 77   
 78  logger = logging.getLogger("CedarBackup2.log.extend.split") 
 79   
 80  SPLIT_COMMAND = [ "split", ] 
 81  SPLIT_INDICATOR = "cback.split" 
82 83 84 ######################################################################## 85 # SplitConfig class definition 86 ######################################################################## 87 88 -class SplitConfig(object):
89 90 """ 91 Class representing split configuration. 92 93 Split configuration is used for splitting staging directories. 94 95 The following restrictions exist on data in this class: 96 97 - The size limit must be a ByteQuantity 98 - The split size must be a ByteQuantity 99 100 @sort: __init__, __repr__, __str__, __cmp__, sizeLimit, splitSize 101 """ 102
103 - def __init__(self, sizeLimit=None, splitSize=None):
104 """ 105 Constructor for the C{SplitCOnfig} class. 106 107 @param sizeLimit: Size limit of the files, in bytes 108 @param splitSize: Size that files exceeding the limit will be split into, in bytes 109 110 @raise ValueError: If one of the values is invalid. 111 """ 112 self._sizeLimit = None 113 self._splitSize = None 114 self.sizeLimit = sizeLimit 115 self.splitSize = splitSize
116
117 - def __repr__(self):
118 """ 119 Official string representation for class instance. 120 """ 121 return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize)
122
123 - def __str__(self):
124 """ 125 Informal string representation for class instance. 126 """ 127 return self.__repr__()
128
129 - def __cmp__(self, other):
130 """ 131 Definition of equals operator for this class. 132 Lists within this class are "unordered" for equality comparisons. 133 @param other: Other object to compare to. 134 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 135 """ 136 if other is None: 137 return 1 138 if self.sizeLimit != other.sizeLimit: 139 if self.sizeLimit < other.sizeLimit: 140 return -1 141 else: 142 return 1 143 if self.splitSize != other.splitSize: 144 if self.splitSize < other.splitSize: 145 return -1 146 else: 147 return 1 148 return 0
149
150 - def _setSizeLimit(self, value):
151 """ 152 Property target used to set the size limit. 153 If not C{None}, the value must be a C{ByteQuantity} object. 154 @raise ValueError: If the value is not a C{ByteQuantity} 155 """ 156 if value is None: 157 self._sizeLimit = None 158 else: 159 if not isinstance(value, ByteQuantity): 160 raise ValueError("Value must be a C{ByteQuantity} object.") 161 self._sizeLimit = value
162
163 - def _getSizeLimit(self):
164 """ 165 Property target used to get the size limit. 166 """ 167 return self._sizeLimit
168
169 - def _setSplitSize(self, value):
170 """ 171 Property target used to set the split size. 172 If not C{None}, the value must be a C{ByteQuantity} object. 173 @raise ValueError: If the value is not a C{ByteQuantity} 174 """ 175 if value is None: 176 self._splitSize = None 177 else: 178 if not isinstance(value, ByteQuantity): 179 raise ValueError("Value must be a C{ByteQuantity} object.") 180 self._splitSize = value
181
182 - def _getSplitSize(self):
183 """ 184 Property target used to get the split size. 185 """ 186 return self._splitSize
187 188 sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity") 189 splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity")
190
191 192 ######################################################################## 193 # LocalConfig class definition 194 ######################################################################## 195 196 -class LocalConfig(object):
197 198 """ 199 Class representing this extension's configuration document. 200 201 This is not a general-purpose configuration object like the main Cedar 202 Backup configuration object. Instead, it just knows how to parse and emit 203 split-specific configuration values. Third parties who need to read and 204 write configuration related to this extension should access it through the 205 constructor, C{validate} and C{addConfig} methods. 206 207 @note: Lists within this class are "unordered" for equality comparisons. 208 209 @sort: __init__, __repr__, __str__, __cmp__, split, validate, addConfig 210 """ 211
212 - def __init__(self, xmlData=None, xmlPath=None, validate=True):
213 """ 214 Initializes a configuration object. 215 216 If you initialize the object without passing either C{xmlData} or 217 C{xmlPath} then configuration will be empty and will be invalid until it 218 is filled in properly. 219 220 No reference to the original XML data or original path is saved off by 221 this class. Once the data has been parsed (successfully or not) this 222 original information is discarded. 223 224 Unless the C{validate} argument is C{False}, the L{LocalConfig.validate} 225 method will be called (with its default arguments) against configuration 226 after successfully parsing any passed-in XML. Keep in mind that even if 227 C{validate} is C{False}, it might not be possible to parse the passed-in 228 XML document if lower-level validations fail. 229 230 @note: It is strongly suggested that the C{validate} option always be set 231 to C{True} (the default) unless there is a specific need to read in 232 invalid configuration from disk. 233 234 @param xmlData: XML data representing configuration. 235 @type xmlData: String data. 236 237 @param xmlPath: Path to an XML file on disk. 238 @type xmlPath: Absolute path to a file on disk. 239 240 @param validate: Validate the document after parsing it. 241 @type validate: Boolean true/false. 242 243 @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in. 244 @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed. 245 @raise ValueError: If the parsed configuration document is not valid. 246 """ 247 self._split = None 248 self.split = None 249 if xmlData is not None and xmlPath is not None: 250 raise ValueError("Use either xmlData or xmlPath, but not both.") 251 if xmlData is not None: 252 self._parseXmlData(xmlData) 253 if validate: 254 self.validate() 255 elif xmlPath is not None: 256 xmlData = open(xmlPath).read() 257 self._parseXmlData(xmlData) 258 if validate: 259 self.validate()
260
261 - def __repr__(self):
262 """ 263 Official string representation for class instance. 264 """ 265 return "LocalConfig(%s)" % (self.split)
266
267 - def __str__(self):
268 """ 269 Informal string representation for class instance. 270 """ 271 return self.__repr__()
272
273 - def __cmp__(self, other):
274 """ 275 Definition of equals operator for this class. 276 Lists within this class are "unordered" for equality comparisons. 277 @param other: Other object to compare to. 278 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 279 """ 280 if other is None: 281 return 1 282 if self.split != other.split: 283 if self.split < other.split: 284 return -1 285 else: 286 return 1 287 return 0
288
289 - def _setSplit(self, value):
290 """ 291 Property target used to set the split configuration value. 292 If not C{None}, the value must be a C{SplitConfig} object. 293 @raise ValueError: If the value is not a C{SplitConfig} 294 """ 295 if value is None: 296 self._split = None 297 else: 298 if not isinstance(value, SplitConfig): 299 raise ValueError("Value must be a C{SplitConfig} object.") 300 self._split = value
301
302 - def _getSplit(self):
303 """ 304 Property target used to get the split configuration value. 305 """ 306 return self._split
307 308 split = property(_getSplit, _setSplit, None, "Split configuration in terms of a C{SplitConfig} object.") 309
310 - def validate(self):
311 """ 312 Validates configuration represented by the object. 313 314 Split configuration must be filled in. Within that, both the size limit 315 and split size must be filled in. 316 317 @raise ValueError: If one of the validations fails. 318 """ 319 if self.split is None: 320 raise ValueError("Split section is required.") 321 if self.split.sizeLimit is None: 322 raise ValueError("Size limit must be set.") 323 if self.split.splitSize is None: 324 raise ValueError("Split size must be set.")
325
326 - def addConfig(self, xmlDom, parentNode):
327 """ 328 Adds a <split> configuration section as the next child of a parent. 329 330 Third parties should use this function to write configuration related to 331 this extension. 332 333 We add the following fields to the document:: 334 335 sizeLimit //cb_config/split/size_limit 336 splitSize //cb_config/split/split_size 337 338 @param xmlDom: DOM tree as from C{impl.createDocument()}. 339 @param parentNode: Parent that the section should be appended to. 340 """ 341 if self.split is not None: 342 sectionNode = addContainerNode(xmlDom, parentNode, "split") 343 addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit) 344 addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize)
345
346 - def _parseXmlData(self, xmlData):
347 """ 348 Internal method to parse an XML string into the object. 349 350 This method parses the XML document into a DOM tree (C{xmlDom}) and then 351 calls a static method to parse the split configuration section. 352 353 @param xmlData: XML data to be parsed 354 @type xmlData: String data 355 356 @raise ValueError: If the XML cannot be successfully parsed. 357 """ 358 (xmlDom, parentNode) = createInputDom(xmlData) 359 self._split = LocalConfig._parseSplit(parentNode)
360 361 @staticmethod
362 - def _parseSplit(parent):
363 """ 364 Parses an split configuration section. 365 366 We read the following individual fields:: 367 368 sizeLimit //cb_config/split/size_limit 369 splitSize //cb_config/split/split_size 370 371 @param parent: Parent node to search beneath. 372 373 @return: C{EncryptConfig} object or C{None} if the section does not exist. 374 @raise ValueError: If some filled-in value is invalid. 375 """ 376 split = None 377 section = readFirstChild(parent, "split") 378 if section is not None: 379 split = SplitConfig() 380 split.sizeLimit = readByteQuantity(section, "size_limit") 381 split.splitSize = readByteQuantity(section, "split_size") 382 return split
383
384 385 ######################################################################## 386 # Public functions 387 ######################################################################## 388 389 ########################### 390 # executeAction() function 391 ########################### 392 393 -def executeAction(configPath, options, config):
394 """ 395 Executes the split backup action. 396 397 @param configPath: Path to configuration file on disk. 398 @type configPath: String representing a path on disk. 399 400 @param options: Program command-line options. 401 @type options: Options object. 402 403 @param config: Program configuration. 404 @type config: Config object. 405 406 @raise ValueError: Under many generic error conditions 407 @raise IOError: If there are I/O problems reading or writing files 408 """ 409 logger.debug("Executing split extended action.") 410 if config.options is None or config.stage is None: 411 raise ValueError("Cedar Backup configuration is not properly filled in.") 412 local = LocalConfig(xmlPath=configPath) 413 dailyDirs = findDailyDirs(config.stage.targetDir, SPLIT_INDICATOR) 414 for dailyDir in dailyDirs: 415 _splitDailyDir(dailyDir, local.split.sizeLimit, local.split.splitSize, 416 config.options.backupUser, config.options.backupGroup) 417 writeIndicatorFile(dailyDir, SPLIT_INDICATOR, config.options.backupUser, config.options.backupGroup) 418 logger.info("Executed the split extended action successfully.")
419
420 421 ############################## 422 # _splitDailyDir() function 423 ############################## 424 425 -def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup):
426 """ 427 Splits large files in a daily staging directory. 428 429 Files that match INDICATOR_PATTERNS (i.e. C{"cback.store"}, 430 C{"cback.stage"}, etc.) are assumed to be indicator files and are ignored. 431 All other files are split. 432 433 @param dailyDir: Daily directory to encrypt 434 @param sizeLimit: Size limit, in bytes 435 @param splitSize: Split size, in bytes 436 @param backupUser: User that target files should be owned by 437 @param backupGroup: Group that target files should be owned by 438 439 @raise ValueError: If the encrypt mode is not supported. 440 @raise ValueError: If the daily staging directory does not exist. 441 """ 442 logger.debug("Begin splitting contents of [%s]." % dailyDir) 443 fileList = getBackupFiles(dailyDir) # ignores indicator files 444 for path in fileList: 445 size = float(os.stat(path).st_size) 446 if size > sizeLimit.bytes: 447 _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True) 448 logger.debug("Completed splitting contents of [%s]." % dailyDir)
449
450 451 ######################## 452 # _splitFile() function 453 ######################## 454 455 -def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False):
456 """ 457 Splits the source file into chunks of the indicated size. 458 459 The split files will be owned by the indicated backup user and group. If 460 C{removeSource} is C{True}, then the source file will be removed after it is 461 successfully split. 462 463 @param sourcePath: Absolute path of the source file to split 464 @param splitSize: Encryption mode (only "gpg" is allowed) 465 @param backupUser: User that target files should be owned by 466 @param backupGroup: Group that target files should be owned by 467 @param removeSource: Indicates whether to remove the source file 468 469 @raise IOError: If there is a problem accessing, splitting or removing the source file. 470 """ 471 cwd = os.getcwd() 472 try: 473 if not os.path.exists(sourcePath): 474 raise ValueError("Source path [%s] does not exist." % sourcePath) 475 dirname = os.path.dirname(sourcePath) 476 filename = os.path.basename(sourcePath) 477 prefix = "%s_" % filename 478 bytes = int(splitSize.bytes) 479 os.chdir(dirname) # need to operate from directory that we want files written to 480 command = resolveCommand(SPLIT_COMMAND) 481 args = [ "--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix, ] 482 (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False) 483 if result != 0: 484 raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath)) 485 pattern = re.compile(r"(creating file `)(%s)(.*)(')" % prefix) 486 match = pattern.search(output[-1:][0]) 487 if match is None: 488 raise IOError("Unable to parse output from split command.") 489 value = int(match.group(3).strip()) 490 for index in range(0, value): 491 path = "%s%05d" % (prefix, index) 492 if not os.path.exists(path): 493 raise IOError("After call to split, expected file [%s] does not exist." % path) 494 changeOwnership(path, backupUser, backupGroup) 495 if removeSource: 496 if os.path.exists(sourcePath): 497 try: 498 os.remove(sourcePath) 499 logger.debug("Completed removing old file [%s]." % sourcePath) 500 except: 501 raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath)) 502 finally: 503 os.chdir(cwd)
504