1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 """
40 Provides an extension to split up large files in staging directories.
41
42 When this extension is executed, it will look through the configured Cedar
43 Backup staging directory for files exceeding a specified size limit, and split
44 them down into smaller files using the 'split' utility. Any directory which
45 has already been split (as indicated by the C{cback.split} file) will be
46 ignored.
47
48 This extension requires a new configuration section <split> and is intended
49 to be run immediately after the standard stage action or immediately before the
50 standard store action. Aside from its own configuration, it requires the
51 options and staging configuration sections in the standard Cedar Backup
52 configuration file.
53
54 @author: Kenneth J. Pronovici <pronovic@ieee.org>
55 """
56
57
58
59
60
61
62 import os
63 import re
64 import logging
65
66
67 from CedarBackup2.filesystem import FilesystemList
68 from CedarBackup2.util import resolveCommand, executeCommand
69 from CedarBackup2.util import changeOwnership, buildNormalizedPath
70 from CedarBackup2.util import UNIT_BYTES, UNIT_KBYTES, UNIT_MBYTES, UNIT_GBYTES
71 from CedarBackup2.xmlutil import createInputDom, addContainerNode, addStringNode
72 from CedarBackup2.xmlutil import readFirstChild, readString
73 from CedarBackup2.actions.util import findDailyDirs, writeIndicatorFile, getBackupFiles
74 from CedarBackup2.config import ByteQuantity, readByteQuantity, addByteQuantityNode
75
76
77
78
79
80
81 logger = logging.getLogger("CedarBackup2.log.extend.split")
82
83 SPLIT_COMMAND = [ "split", ]
84 SPLIT_INDICATOR = "cback.split"
85
86
87
88
89
90
92
93 """
94 Class representing split configuration.
95
96 Split configuration is used for splitting staging directories.
97
98 The following restrictions exist on data in this class:
99
100 - The size limit must be a ByteQuantity
101 - The split size must be a ByteQuantity
102
103 @sort: __init__, __repr__, __str__, __cmp__, sizeLimit, splitSize
104 """
105
106 - def __init__(self, sizeLimit=None, splitSize=None):
107 """
108 Constructor for the C{SplitCOnfig} class.
109
110 @param sizeLimit: Size limit of the files, in bytes
111 @param splitSize: Size that files exceeding the limit will be split into, in bytes
112
113 @raise ValueError: If one of the values is invalid.
114 """
115 self._sizeLimit = None
116 self._splitSize = None
117 self.sizeLimit = sizeLimit
118 self.splitSize = splitSize
119
121 """
122 Official string representation for class instance.
123 """
124 return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize)
125
127 """
128 Informal string representation for class instance.
129 """
130 return self.__repr__()
131
133 """
134 Definition of equals operator for this class.
135 Lists within this class are "unordered" for equality comparisons.
136 @param other: Other object to compare to.
137 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
138 """
139 if other is None:
140 return 1
141 if self._sizeLimit != other._sizeLimit:
142 if self._sizeLimit < other._sizeLimit:
143 return -1
144 else:
145 return 1
146 if self._splitSize != other._splitSize:
147 if self._splitSize < other._splitSize:
148 return -1
149 else:
150 return 1
151 return 0
152
154 """
155 Property target used to set the size limit.
156 If not C{None}, the value must be a C{ByteQuantity} object.
157 @raise ValueError: If the value is not a C{ByteQuantity}
158 """
159 if value is None:
160 self._sizeLimit = None
161 else:
162 if not isinstance(value, ByteQuantity):
163 raise ValueError("Value must be a C{ByteQuantity} object.")
164 self._sizeLimit = value
165
167 """
168 Property target used to get the size limit.
169 """
170 return self._sizeLimit
171
173 """
174 Property target used to set the split size.
175 If not C{None}, the value must be a C{ByteQuantity} object.
176 @raise ValueError: If the value is not a C{ByteQuantity}
177 """
178 if value is None:
179 self._splitSize = None
180 else:
181 if not isinstance(value, ByteQuantity):
182 raise ValueError("Value must be a C{ByteQuantity} object.")
183 self._splitSize = value
184
186 """
187 Property target used to get the split size.
188 """
189 return self._splitSize
190
191 sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity")
192 splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity")
193
194
195
196
197
198
200
201 """
202 Class representing this extension's configuration document.
203
204 This is not a general-purpose configuration object like the main Cedar
205 Backup configuration object. Instead, it just knows how to parse and emit
206 split-specific configuration values. Third parties who need to read and
207 write configuration related to this extension should access it through the
208 constructor, C{validate} and C{addConfig} methods.
209
210 @note: Lists within this class are "unordered" for equality comparisons.
211
212 @sort: __init__, __repr__, __str__, __cmp__, split, validate, addConfig
213 """
214
215 - def __init__(self, xmlData=None, xmlPath=None, validate=True):
216 """
217 Initializes a configuration object.
218
219 If you initialize the object without passing either C{xmlData} or
220 C{xmlPath} then configuration will be empty and will be invalid until it
221 is filled in properly.
222
223 No reference to the original XML data or original path is saved off by
224 this class. Once the data has been parsed (successfully or not) this
225 original information is discarded.
226
227 Unless the C{validate} argument is C{False}, the L{LocalConfig.validate}
228 method will be called (with its default arguments) against configuration
229 after successfully parsing any passed-in XML. Keep in mind that even if
230 C{validate} is C{False}, it might not be possible to parse the passed-in
231 XML document if lower-level validations fail.
232
233 @note: It is strongly suggested that the C{validate} option always be set
234 to C{True} (the default) unless there is a specific need to read in
235 invalid configuration from disk.
236
237 @param xmlData: XML data representing configuration.
238 @type xmlData: String data.
239
240 @param xmlPath: Path to an XML file on disk.
241 @type xmlPath: Absolute path to a file on disk.
242
243 @param validate: Validate the document after parsing it.
244 @type validate: Boolean true/false.
245
246 @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in.
247 @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed.
248 @raise ValueError: If the parsed configuration document is not valid.
249 """
250 self._split = None
251 self.split = None
252 if xmlData is not None and xmlPath is not None:
253 raise ValueError("Use either xmlData or xmlPath, but not both.")
254 if xmlData is not None:
255 self._parseXmlData(xmlData)
256 if validate:
257 self.validate()
258 elif xmlPath is not None:
259 xmlData = open(xmlPath).read()
260 self._parseXmlData(xmlData)
261 if validate:
262 self.validate()
263
265 """
266 Official string representation for class instance.
267 """
268 return "LocalConfig(%s)" % (self.split)
269
271 """
272 Informal string representation for class instance.
273 """
274 return self.__repr__()
275
277 """
278 Definition of equals operator for this class.
279 Lists within this class are "unordered" for equality comparisons.
280 @param other: Other object to compare to.
281 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
282 """
283 if other is None:
284 return 1
285 if self._split != other._split:
286 if self._split < other._split:
287 return -1
288 else:
289 return 1
290 return 0
291
293 """
294 Property target used to set the split configuration value.
295 If not C{None}, the value must be a C{SplitConfig} object.
296 @raise ValueError: If the value is not a C{SplitConfig}
297 """
298 if value is None:
299 self._split = None
300 else:
301 if not isinstance(value, SplitConfig):
302 raise ValueError("Value must be a C{SplitConfig} object.")
303 self._split = value
304
306 """
307 Property target used to get the split configuration value.
308 """
309 return self._split
310
311 split = property(_getSplit, _setSplit, None, "Split configuration in terms of a C{SplitConfig} object.")
312
314 """
315 Validates configuration represented by the object.
316
317 Split configuration must be filled in. Within that, both the size limit
318 and split size must be filled in.
319
320 @raise ValueError: If one of the validations fails.
321 """
322 if self.split is None:
323 raise ValueError("Split section is required.")
324 if self.split.sizeLimit is None:
325 raise ValueError("Size limit must be set.")
326 if self.split.splitSize is None:
327 raise ValueError("Split size must be set.")
328
330 """
331 Adds a <split> configuration section as the next child of a parent.
332
333 Third parties should use this function to write configuration related to
334 this extension.
335
336 We add the following fields to the document::
337
338 sizeLimit //cb_config/split/size_limit
339 splitSize //cb_config/split/split_size
340
341 @param xmlDom: DOM tree as from C{impl.createDocument()}.
342 @param parentNode: Parent that the section should be appended to.
343 """
344 if self.split is not None:
345 sectionNode = addContainerNode(xmlDom, parentNode, "split")
346 addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit)
347 addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize)
348
350 """
351 Internal method to parse an XML string into the object.
352
353 This method parses the XML document into a DOM tree (C{xmlDom}) and then
354 calls a static method to parse the split configuration section.
355
356 @param xmlData: XML data to be parsed
357 @type xmlData: String data
358
359 @raise ValueError: If the XML cannot be successfully parsed.
360 """
361 (xmlDom, parentNode) = createInputDom(xmlData)
362 self._split = LocalConfig._parseSplit(parentNode)
363
365 """
366 Parses an split configuration section.
367
368 We read the following individual fields::
369
370 sizeLimit //cb_config/split/size_limit
371 splitSize //cb_config/split/split_size
372
373 @param parent: Parent node to search beneath.
374
375 @return: C{EncryptConfig} object or C{None} if the section does not exist.
376 @raise ValueError: If some filled-in value is invalid.
377 """
378 split = None
379 section = readFirstChild(parent, "split")
380 if section is not None:
381 split = SplitConfig()
382 split.sizeLimit = readByteQuantity(section, "size_limit")
383 split.splitSize = readByteQuantity(section, "split_size")
384 return split
385 _parseSplit = staticmethod(_parseSplit)
386
387
388
389
390
391
392
393
394
395
422
423
424
425
426
427
428 -def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup):
429 """
430 Splits large files in a daily staging directory.
431
432 Files that match INDICATOR_PATTERNS (i.e. C{"cback.store"},
433 C{"cback.stage"}, etc.) are assumed to be indicator files and are ignored.
434 All other files are split.
435
436 @param dailyDir: Daily directory to encrypt
437 @param sizeLimit: Size limit, in bytes
438 @param splitSize: Split size, in bytes
439 @param backupUser: User that target files should be owned by
440 @param backupGroup: Group that target files should be owned by
441
442 @raise ValueError: If the encrypt mode is not supported.
443 @raise ValueError: If the daily staging directory does not exist.
444 """
445 logger.debug("Begin splitting contents of [%s]." % dailyDir)
446 fileList = getBackupFiles(dailyDir)
447 for path in fileList:
448 size = float(os.stat(path).st_size)
449 if size > sizeLimit.bytes:
450 _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True)
451 logger.debug("Completed splitting contents of [%s]." % dailyDir)
452
453
454
455
456
457
458 -def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False):
459 """
460 Splits the source file into chunks of the indicated size.
461
462 The split files will be owned by the indicated backup user and group. If
463 C{removeSource} is C{True}, then the source file will be removed after it is
464 successfully split.
465
466 @param sourcePath: Absolute path of the source file to split
467 @param splitSize: Encryption mode (only "gpg" is allowed)
468 @param backupUser: User that target files should be owned by
469 @param backupGroup: Group that target files should be owned by
470 @param removeSource: Indicates whether to remove the source file
471
472 @raise IOError: If there is a problem accessing, splitting or removing the source file.
473 """
474 cwd = os.getcwd()
475 try:
476 if not os.path.exists(sourcePath):
477 raise ValueError("Source path [%s] does not exist." % sourcePath);
478 dirname = os.path.dirname(sourcePath)
479 filename = os.path.basename(sourcePath)
480 prefix = "%s_" % filename
481 bytes = int(splitSize.bytes)
482 os.chdir(dirname)
483 command = resolveCommand(SPLIT_COMMAND)
484 args = [ "--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix, ]
485 (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False)
486 if result != 0:
487 raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath))
488 pattern = re.compile(r"(creating file `)(%s)(.*)(')" % prefix)
489 match = pattern.search(output[-1:][0])
490 if match is None:
491 raise IOError("Unable to parse output from split command.")
492 value = int(match.group(3).strip())
493 for index in range(0, value):
494 path = "%s%05d" % (prefix, index)
495 if not os.path.exists(path):
496 raise IOError("After call to split, expected file [%s] does not exist." % path)
497 changeOwnership(path, backupUser, backupGroup)
498 if removeSource:
499 if os.path.exists(sourcePath):
500 try:
501 os.remove(sourcePath)
502 logger.debug("Completed removing old file [%s]." % sourcePath)
503 except:
504 raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath))
505 finally:
506 os.chdir(cwd)
507