Package CedarBackup2 :: Module filesystem
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.filesystem

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2008 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python (>= 2.3) 
  29  # Project  : Cedar Backup, release 2 
  30  # Revision : $Id: filesystem.py 914 2008-04-28 02:40:04Z pronovic $ 
  31  # Purpose  : Provides filesystem-related objects. 
  32  # 
  33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  34   
  35  ######################################################################## 
  36  # Module documentation 
  37  ######################################################################## 
  38   
  39  """ 
  40  Provides filesystem-related objects. 
  41  @sort: FilesystemList, BackupFileList, PurgeItemList 
  42  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  43  """ 
  44   
  45   
  46  ######################################################################## 
  47  # Imported modules 
  48  ######################################################################## 
  49   
  50  # System modules 
  51  import sys 
  52  import os 
  53  import re 
  54  import sha 
  55  import math 
  56  import logging 
  57  import tarfile 
  58   
  59  # Cedar Backup modules 
  60  from CedarBackup2.knapsack import firstFit, bestFit, worstFit, alternateFit 
  61  from CedarBackup2.util import AbsolutePathList, ObjectTypeList, UnorderedList, RegexList 
  62  from CedarBackup2.util import removeKeys, displayBytes, calculateFileAge, encodePath, dereferenceLink 
  63   
  64   
  65  ######################################################################## 
  66  # Module-wide variables 
  67  ######################################################################## 
  68   
  69  logger = logging.getLogger("CedarBackup2.log.filesystem") 
  70   
  71   
  72  ######################################################################## 
  73  # FilesystemList class definition 
  74  ######################################################################## 
  75   
76 -class FilesystemList(list):
77 78 ###################### 79 # Class documentation 80 ###################### 81 82 """ 83 Represents a list of filesystem items. 84 85 This is a generic class that represents a list of filesystem items. Callers 86 can add individual files or directories to the list, or can recursively add 87 the contents of a directory. The class also allows for up-front exclusions 88 in several forms (all files, all directories, all items matching a pattern, 89 all items whose basename matches a pattern, or all directories containing a 90 specific "ignore file"). Symbolic links are typically backed up 91 non-recursively, i.e. the link to a directory is backed up, but not the 92 contents of that link (we don't want to deal with recursive loops, etc.). 93 94 The custom methods such as L{addFile} will only add items if they exist on 95 the filesystem and do not match any exclusions that are already in place. 96 However, since a FilesystemList is a subclass of Python's standard list 97 class, callers can also add items to the list in the usual way, using 98 methods like C{append()} or C{insert()}. No validations apply to items 99 added to the list in this way; however, many list-manipulation methods deal 100 "gracefully" with items that don't exist in the filesystem, often by 101 ignoring them. 102 103 Once a list has been created, callers can remove individual items from the 104 list using standard methods like C{pop()} or C{remove()} or they can use 105 custom methods to remove specific types of entries or entries which match a 106 particular pattern. 107 108 @note: Regular expression patterns that apply to paths are assumed to be 109 bounded at front and back by the beginning and end of the string, i.e. they 110 are treated as if they begin with C{^} and end with C{$}. This is true 111 whether we are matching a complete path or a basename. 112 113 @note: Some platforms, like Windows, do not support soft links. On those 114 platforms, the ignore-soft-links flag can be set, but it won't do any good 115 because the operating system never reports a file as a soft link. 116 117 @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs, 118 removeLinks, removeMatch, removeInvalid, normalize, validate, 119 excludeFiles, excludeDirs, excludeLinks, excludePaths, 120 excludePatterns, excludeBasenamePatterns, ignoreFile 121 """ 122 123 124 ############## 125 # Constructor 126 ############## 127
128 - def __init__(self):
129 """Initializes a list with no configured exclusions.""" 130 list.__init__(self) 131 self._excludeFiles = False 132 self._excludeDirs = False 133 self._excludeLinks = False 134 self._excludePaths = None 135 self._excludePatterns = None 136 self._excludeBasenamePatterns = None 137 self._ignoreFile = None 138 self.excludeFiles = False 139 self.excludeLinks = False 140 self.excludeDirs = False 141 self.excludePaths = [] 142 self.excludePatterns = RegexList() 143 self.excludeBasenamePatterns = RegexList() 144 self.ignoreFile = None
145 146 147 ############# 148 # Properties 149 ############# 150
151 - def _setExcludeFiles(self, value):
152 """ 153 Property target used to set the exclude files flag. 154 No validations, but we normalize the value to C{True} or C{False}. 155 """ 156 if value: 157 self._excludeFiles = True 158 else: 159 self._excludeFiles = False
160
161 - def _getExcludeFiles(self):
162 """ 163 Property target used to get the exclude files flag. 164 """ 165 return self._excludeFiles
166
167 - def _setExcludeDirs(self, value):
168 """ 169 Property target used to set the exclude directories flag. 170 No validations, but we normalize the value to C{True} or C{False}. 171 """ 172 if value: 173 self._excludeDirs = True 174 else: 175 self._excludeDirs = False
176
177 - def _getExcludeDirs(self):
178 """ 179 Property target used to get the exclude directories flag. 180 """ 181 return self._excludeDirs
182 192 198
199 - def _setExcludePaths(self, value):
200 """ 201 Property target used to set the exclude paths list. 202 A C{None} value is converted to an empty list. 203 Elements do not have to exist on disk at the time of assignment. 204 @raise ValueError: If any list element is not an absolute path. 205 """ 206 self._absoluteExcludePaths = AbsolutePathList() 207 if value is not None: 208 self._absoluteExcludePaths.extend(value)
209
210 - def _getExcludePaths(self):
211 """ 212 Property target used to get the absolute exclude paths list. 213 """ 214 return self._absoluteExcludePaths
215
216 - def _setExcludePatterns(self, value):
217 """ 218 Property target used to set the exclude patterns list. 219 A C{None} value is converted to an empty list. 220 """ 221 self._excludePatterns = RegexList() 222 if value is not None: 223 self._excludePatterns.extend(value)
224
225 - def _getExcludePatterns(self):
226 """ 227 Property target used to get the exclude patterns list. 228 """ 229 return self._excludePatterns
230
231 - def _setExcludeBasenamePatterns(self, value):
232 """ 233 Property target used to set the exclude basename patterns list. 234 A C{None} value is converted to an empty list. 235 """ 236 self._excludeBasenamePatterns = RegexList() 237 if value is not None: 238 self._excludeBasenamePatterns.extend(value)
239
241 """ 242 Property target used to get the exclude basename patterns list. 243 """ 244 return self._excludeBasenamePatterns
245
246 - def _setIgnoreFile(self, value):
247 """ 248 Property target used to set the ignore file. 249 The value must be a non-empty string if it is not C{None}. 250 @raise ValueError: If the value is an empty string. 251 """ 252 if value is not None: 253 if len(value) < 1: 254 raise ValueError("The ignore file must be a non-empty string.") 255 self._ignoreFile = value
256
257 - def _getIgnoreFile(self):
258 """ 259 Property target used to get the ignore file. 260 """ 261 return self._ignoreFile
262 263 excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.") 264 excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.") 265 excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.") 266 excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.") 267 excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, 268 "List of regular expression patterns (matching complete path) to be excluded.") 269 excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns, 270 None, "List of regular expression patterns (matching basename) to be excluded.") 271 ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.") 272 273 274 ############## 275 # Add methods 276 ############## 277
278 - def addFile(self, path):
279 """ 280 Adds a file to the list. 281 282 The path must exist and must be a file or a link to an existing file. It 283 will be added to the list subject to any exclusions that are in place. 284 285 @param path: File path to be added to the list 286 @type path: String representing a path on disk 287 288 @return: Number of items added to the list. 289 290 @raise ValueError: If path is not a file or does not exist. 291 @raise ValueError: If the path could not be encoded properly. 292 """ 293 path = encodePath(path) 294 if not os.path.exists(path) or not os.path.isfile(path): 295 logger.debug("Path [%s] is not a file or does not exist on disk." % path) 296 raise ValueError("Path is not a file or does not exist on disk.") 297 if self.excludeLinks and os.path.islink(path): 298 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 299 return 0 300 if self.excludeFiles: 301 logger.debug("Path [%s] is excluded based on excludeFiles." % path) 302 return 0 303 if path in self.excludePaths: 304 logger.debug("Path [%s] is excluded based on excludePaths." % path) 305 return 0 306 for pattern in self.excludePatterns: 307 if re.compile(r"^%s$" % pattern).match(path): # safe to assume all are valid due to RegexList 308 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 309 return 0 310 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 311 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 312 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 313 return 0 314 self.append(path) 315 logger.debug("Added file to list: [%s]" % path) 316 return 1
317
318 - def addDir(self, path):
319 """ 320 Adds a directory to the list. 321 322 The path must exist and must be a directory or a link to an existing 323 directory. It will be added to the list subject to any exclusions that 324 are in place. The L{ignoreFile} does not apply to this method, only to 325 L{addDirContents}. 326 327 @param path: Directory path to be added to the list 328 @type path: String representing a path on disk 329 330 @return: Number of items added to the list. 331 332 @raise ValueError: If path is not a directory or does not exist. 333 @raise ValueError: If the path could not be encoded properly. 334 """ 335 path = encodePath(path) 336 path = normalizeDir(path) 337 if not os.path.exists(path) or not os.path.isdir(path): 338 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 339 raise ValueError("Path is not a directory or does not exist on disk.") 340 if self.excludeLinks and os.path.islink(path): 341 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 342 return 0 343 if self.excludeDirs: 344 logger.debug("Path [%s] is excluded based on excludeDirs." % path) 345 return 0 346 if path in self.excludePaths: 347 logger.debug("Path [%s] is excluded based on excludePaths." % path) 348 return 0 349 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 350 if re.compile(r"^%s$" % pattern).match(path): 351 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 352 return 0 353 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 354 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 355 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 356 return 0 357 self.append(path) 358 logger.debug("Added directory to list: [%s]" % path) 359 return 1
360
361 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
362 """ 363 Adds the contents of a directory to the list. 364 365 The path must exist and must be a directory or a link to a directory. 366 The contents of the directory (as well as the directory path itself) will 367 be recursively added to the list, subject to any exclusions that are in 368 place. If you only want the directory and its immediate contents to be 369 added, then pass in C{recursive=False}. 370 371 @note: If a directory's absolute path matches an exclude pattern or path, 372 or if the directory contains the configured ignore file, then the 373 directory and all of its contents will be recursively excluded from the 374 list. 375 376 @note: If the passed-in directory happens to be a soft link, it will be 377 recursed. However, the linkDepth parameter controls whether any soft 378 links I{within} the directory will be recursed. The link depth is 379 maximum depth of the tree at which soft links should be followed. So, a 380 depth of 0 does not follow any soft links, a depth of 1 follows only 381 links within the passed-in directory, a depth of 2 follows the links at 382 the next level down, etc. 383 384 @note: Any invalid soft links (i.e. soft links that point to 385 non-existent items) will be silently ignored. 386 387 @note: The L{excludeDirs} flag only controls whether any given directory 388 path itself is added to the list once it has been discovered. It does 389 I{not} modify any behavior related to directory recursion. 390 391 @note: If you call this method I{on a link to a directory} that link will 392 never be dereferenced (it may, however, be followed). 393 394 @param path: Directory path whose contents should be added to the list 395 @type path: String representing a path on disk 396 397 @param recursive: Indicates whether directory contents should be added recursively. 398 @type recursive: Boolean value 399 400 @param addSelf: Indicates whether the directory itself should be added to the list. 401 @type addSelf: Boolean value 402 403 @param linkDepth: Maximum depth of the tree at which soft links should be followed 404 @type linkDepth: Integer value, where zero means not to follow any soft links 405 406 @param dereference: Indicates whether soft links, if followed, should be dereferenced 407 @type dereference: Boolean value 408 409 @return: Number of items recursively added to the list 410 411 @raise ValueError: If path is not a directory or does not exist. 412 @raise ValueError: If the path could not be encoded properly. 413 """ 414 path = encodePath(path) 415 path = normalizeDir(path) 416 return self._addDirContentsInternal(path, addSelf, recursive, linkDepth, dereference)
417
418 - def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0, dereference=False):
419 """ 420 Internal implementation of C{addDirContents}. 421 422 This internal implementation exists due to some refactoring. Basically, 423 some subclasses have a need to add the contents of a directory, but not 424 the directory itself. This is different than the standard C{FilesystemList} 425 behavior and actually ends up making a special case out of the first 426 call in the recursive chain. Since I don't want to expose the modified 427 interface, C{addDirContents} ends up being wholly implemented in terms 428 of this method. 429 430 The linkDepth parameter controls whether soft links are followed when we 431 are adding the contents recursively. Any recursive calls reduce the 432 value by one. If the value zero or less, then soft links will just be 433 added as directories, but will not be followed. This means that links 434 are followed to a I{constant depth} starting from the top-most directory. 435 436 There is one difference between soft links and directories: soft links 437 that are added recursively are not placed into the list explicitly. This 438 is because if we do add the links recursively, the resulting tar file 439 gets a little confused (it has a link and a directory with the same 440 name). 441 442 @note: If you call this method I{on a link to a directory} that link will 443 never be dereferenced (it may, however, be followed). 444 445 @param path: Directory path whose contents should be added to the list. 446 @param includePath: Indicates whether to include the path as well as contents. 447 @param recursive: Indicates whether directory contents should be added recursively. 448 @param linkDepth: Depth of soft links that should be followed 449 @param dereference: Indicates whether soft links, if followed, should be dereferenced 450 451 @return: Number of items recursively added to the list 452 453 @raise ValueError: If path is not a directory or does not exist. 454 """ 455 added = 0 456 if not os.path.exists(path) or not os.path.isdir(path): 457 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 458 raise ValueError("Path is not a directory or does not exist on disk.") 459 if path in self.excludePaths: 460 logger.debug("Path [%s] is excluded based on excludePaths." % path) 461 return added 462 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 463 if re.compile(r"^%s$" % pattern).match(path): 464 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 465 return added 466 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 467 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 468 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 469 return added 470 if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)): 471 logger.debug("Path [%s] is excluded based on ignore file." % path) 472 return added 473 if includePath: 474 added += self.addDir(path) # could actually be excluded by addDir, yet 475 for entry in os.listdir(path): 476 entrypath = os.path.join(path, entry) 477 if os.path.isfile(entrypath): 478 if linkDepth > 0 and dereference: 479 derefpath = dereferenceLink(entrypath) 480 if derefpath != entrypath: 481 added += self.addFile(derefpath) 482 added += self.addFile(entrypath) 483 elif os.path.isdir(entrypath): 484 if os.path.islink(entrypath): 485 if recursive: 486 if linkDepth > 0: 487 newDepth = linkDepth - 1; 488 if dereference: 489 derefpath = dereferenceLink(entrypath) 490 if derefpath != entrypath: 491 added += self._addDirContentsInternal(derefpath, True, recursive, newDepth, dereference) 492 added += self.addDir(entrypath) 493 else: 494 added += self._addDirContentsInternal(entrypath, False, recursive, newDepth, dereference) 495 else: 496 added += self.addDir(entrypath) 497 else: 498 added += self.addDir(entrypath) 499 else: 500 if recursive: 501 newDepth = linkDepth - 1; 502 added += self._addDirContentsInternal(entrypath, True, recursive, newDepth, dereference) 503 else: 504 added += self.addDir(entrypath) 505 return added
506 507 508 ################# 509 # Remove methods 510 ################# 511
512 - def removeFiles(self, pattern=None):
513 """ 514 Removes file entries from the list. 515 516 If C{pattern} is not passed in or is C{None}, then all file entries will 517 be removed from the list. Otherwise, only those file entries matching 518 the pattern will be removed. Any entry which does not exist on disk 519 will be ignored (use L{removeInvalid} to purge those entries). 520 521 This method might be fairly slow for large lists, since it must check the 522 type of each item in the list. If you know ahead of time that you want 523 to exclude all files, then you will be better off setting L{excludeFiles} 524 to C{True} before adding items to the list. 525 526 @param pattern: Regular expression pattern representing entries to remove 527 528 @return: Number of entries removed 529 @raise ValueError: If the passed-in pattern is not a valid regular expression. 530 """ 531 removed = 0 532 if pattern is None: 533 for entry in self[:]: 534 if os.path.exists(entry) and os.path.isfile(entry): 535 self.remove(entry) 536 logger.debug("Removed path [%s] from list." % entry) 537 removed += 1 538 else: 539 try: 540 compiled = re.compile(pattern) 541 except re.error: 542 raise ValueError("Pattern is not a valid regular expression.") 543 for entry in self[:]: 544 if os.path.exists(entry) and os.path.isfile(entry): 545 if compiled.match(entry): 546 self.remove(entry) 547 logger.debug("Removed path [%s] from list." % entry) 548 removed += 1 549 logger.debug("Removed a total of %d entries." % removed); 550 return removed
551
552 - def removeDirs(self, pattern=None):
553 """ 554 Removes directory entries from the list. 555 556 If C{pattern} is not passed in or is C{None}, then all directory entries 557 will be removed from the list. Otherwise, only those directory entries 558 matching the pattern will be removed. Any entry which does not exist on 559 disk will be ignored (use L{removeInvalid} to purge those entries). 560 561 This method might be fairly slow for large lists, since it must check the 562 type of each item in the list. If you know ahead of time that you want 563 to exclude all directories, then you will be better off setting 564 L{excludeDirs} to C{True} before adding items to the list (note that this 565 will not prevent you from recursively adding the I{contents} of 566 directories). 567 568 @param pattern: Regular expression pattern representing entries to remove 569 570 @return: Number of entries removed 571 @raise ValueError: If the passed-in pattern is not a valid regular expression. 572 """ 573 removed = 0 574 if pattern is None: 575 for entry in self[:]: 576 if os.path.exists(entry) and os.path.isdir(entry): 577 self.remove(entry) 578 logger.debug("Removed path [%s] from list." % entry) 579 removed += 1 580 else: 581 try: 582 compiled = re.compile(pattern) 583 except re.error: 584 raise ValueError("Pattern is not a valid regular expression.") 585 for entry in self[:]: 586 if os.path.exists(entry) and os.path.isdir(entry): 587 if compiled.match(entry): 588 self.remove(entry) 589 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 590 removed += 1 591 logger.debug("Removed a total of %d entries." % removed); 592 return removed
593 633
634 - def removeMatch(self, pattern):
635 """ 636 Removes from the list all entries matching a pattern. 637 638 This method removes from the list all entries which match the passed in 639 C{pattern}. Since there is no need to check the type of each entry, it 640 is faster to call this method than to call the L{removeFiles}, 641 L{removeDirs} or L{removeLinks} methods individually. If you know which 642 patterns you will want to remove ahead of time, you may be better off 643 setting L{excludePatterns} or L{excludeBasenamePatterns} before adding 644 items to the list. 645 646 @note: Unlike when using the exclude lists, the pattern here is I{not} 647 bounded at the front and the back of the string. You can use any pattern 648 you want. 649 650 @param pattern: Regular expression pattern representing entries to remove 651 652 @return: Number of entries removed. 653 @raise ValueError: If the passed-in pattern is not a valid regular expression. 654 """ 655 try: 656 compiled = re.compile(pattern) 657 except re.error: 658 raise ValueError("Pattern is not a valid regular expression.") 659 removed = 0 660 for entry in self[:]: 661 if compiled.match(entry): 662 self.remove(entry) 663 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 664 removed += 1 665 logger.debug("Removed a total of %d entries." % removed); 666 return removed
667
668 - def removeInvalid(self):
669 """ 670 Removes from the list all entries that do not exist on disk. 671 672 This method removes from the list all entries which do not currently 673 exist on disk in some form. No attention is paid to whether the entries 674 are files or directories. 675 676 @return: Number of entries removed. 677 """ 678 removed = 0 679 for entry in self[:]: 680 if not os.path.exists(entry): 681 self.remove(entry) 682 logger.debug("Removed path [%s] from list." % entry) 683 removed += 1 684 logger.debug("Removed a total of %d entries." % removed); 685 return removed
686 687 688 ################## 689 # Utility methods 690 ################## 691
692 - def normalize(self):
693 """Normalizes the list, ensuring that each entry is unique.""" 694 orig = len(self) 695 self.sort() 696 dups = filter(lambda x, self=self: self[x] == self[x+1], range(0, len(self) - 1)) 697 items = map(lambda x, self=self: self[x], dups) 698 map(self.remove, items) 699 new = len(self) 700 logger.debug("Completed normalizing list; removed %d items (%d originally, %d now)." % (new-orig, orig, new))
701
702 - def verify(self):
703 """ 704 Verifies that all entries in the list exist on disk. 705 @return: C{True} if all entries exist, C{False} otherwise. 706 """ 707 for entry in self: 708 if not os.path.exists(entry): 709 logger.debug("Path [%s] is invalid; list is not valid." % entry) 710 return False 711 logger.debug("All entries in list are valid.") 712 return True
713 714 715 ######################################################################## 716 # SpanItem class definition 717 ######################################################################## 718
719 -class SpanItem(object):
720 """ 721 Item returned by L{BackupFileList.generateSpan}. 722 """
723 - def __init__(self, fileList, size, capacity, utilization):
724 """ 725 Create object. 726 @param fileList: List of files 727 @param size: Size (in bytes) of files 728 @param utilization: Utilization, as a percentage (0-100) 729 """ 730 self.fileList = fileList 731 self.size = size 732 self.capacity = capacity 733 self.utilization = utilization
734 735 736 ######################################################################## 737 # BackupFileList class definition 738 ######################################################################## 739
740 -class BackupFileList(FilesystemList):
741 742 ###################### 743 # Class documentation 744 ###################### 745 746 """ 747 List of files to be backed up. 748 749 A BackupFileList is a L{FilesystemList} containing a list of files to be 750 backed up. It only contains files, not directories (soft links are treated 751 like files). On top of the generic functionality provided by 752 L{FilesystemList}, this class adds functionality to keep a hash (checksum) 753 for each file in the list, and it also provides a method to calculate the 754 total size of the files in the list and a way to export the list into tar 755 form. 756 757 @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap, 758 generateFitted, generateTarfile, removeUnchanged 759 """ 760 761 ############## 762 # Constructor 763 ############## 764
765 - def __init__(self):
766 """Initializes a list with no configured exclusions.""" 767 FilesystemList.__init__(self)
768 769 770 ################################ 771 # Overridden superclass methods 772 ################################ 773
774 - def addDir(self, path):
775 """ 776 Adds a directory to the list. 777 778 Note that this class does not allow directories to be added by themselves 779 (a backup list contains only files). However, since links to directories 780 are technically files, we allow them to be added. 781 782 This method is implemented in terms of the superclass method, with one 783 additional validation: the superclass method is only called if the 784 passed-in path is both a directory and a link. All of the superclass's 785 existing validations and restrictions apply. 786 787 @param path: Directory path to be added to the list 788 @type path: String representing a path on disk 789 790 @return: Number of items added to the list. 791 792 @raise ValueError: If path is not a directory or does not exist. 793 @raise ValueError: If the path could not be encoded properly. 794 """ 795 path = encodePath(path) 796 path = normalizeDir(path) 797 if os.path.isdir(path) and not os.path.islink(path): 798 return 0 799 else: 800 return FilesystemList.addDir(self, path)
801 802 803 ################## 804 # Utility methods 805 ################## 806
807 - def totalSize(self):
808 """ 809 Returns the total size among all files in the list. 810 Only files are counted. 811 Soft links that point at files are ignored. 812 Entries which do not exist on disk are ignored. 813 @return: Total size, in bytes 814 """ 815 total = 0.0 816 for entry in self: 817 if os.path.isfile(entry) and not os.path.islink(entry): 818 total += float(os.stat(entry).st_size) 819 return total
820
821 - def generateSizeMap(self):
822 """ 823 Generates a mapping from file to file size in bytes. 824 The mapping does include soft links, which are listed with size zero. 825 Entries which do not exist on disk are ignored. 826 @return: Dictionary mapping file to file size 827 """ 828 table = { } 829 for entry in self: 830 if os.path.islink(entry): 831 table[entry] = 0.0 832 elif os.path.isfile(entry): 833 table[entry] = float(os.stat(entry).st_size) 834 return table
835
836 - def generateDigestMap(self, stripPrefix=None):
837 """ 838 Generates a mapping from file to file digest. 839 840 Currently, the digest is an SHA hash, which should be pretty secure. In 841 the future, this might be a different kind of hash, but we guarantee that 842 the type of the hash will not change unless the library major version 843 number is bumped. 844 845 Entries which do not exist on disk are ignored. 846 847 Soft links are ignored. We would end up generating a digest for the file 848 that the soft link points at, which doesn't make any sense. 849 850 If C{stripPrefix} is passed in, then that prefix will be stripped from 851 each key when the map is generated. This can be useful in generating two 852 "relative" digest maps to be compared to one another. 853 854 @param stripPrefix: Common prefix to be stripped from paths 855 @type stripPrefix: String with any contents 856 857 @return: Dictionary mapping file to digest value 858 @see: L{removeUnchanged} 859 """ 860 table = { } 861 if stripPrefix is not None: 862 for entry in self: 863 if os.path.isfile(entry) and not os.path.islink(entry): 864 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry) 865 else: 866 for entry in self: 867 if os.path.isfile(entry) and not os.path.islink(entry): 868 table[entry] = BackupFileList._generateDigest(entry) 869 return table
870
871 - def _generateDigest(path):
872 """ 873 Generates an SHA digest for a given file on disk. 874 875 The original code for this function used this simplistic implementation, 876 which requires reading the entire file into memory at once in order to 877 generate a digest value:: 878 879 sha.new(open(path).read()).hexdigest() 880 881 Not surprisingly, this isn't an optimal solution. The U{Simple file 882 hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>} 883 Python Cookbook recipe describes how to incrementally generate a hash 884 value by reading in chunks of data rather than reading the file all at 885 once. The recipe relies on the the C{update()} method of the various 886 Python hashing algorithms. 887 888 In my tests using a 110 MB file on CD, the original implementation 889 requires 111 seconds. This implementation requires only 40-45 seconds, 890 which is a pretty substantial speed-up. 891 892 Practice shows that reading in around 4kB (4096 bytes) at a time yields 893 the best performance. Smaller reads are quite a bit slower, and larger 894 reads don't make much of a difference. The 4kB number makes me a little 895 suspicious, and I think it might be related to the size of a filesystem 896 read at the hardware level. However, I've decided to just hardcode 4096 897 until I have evidence that shows it's worthwhile making the read size 898 configurable. 899 900 @param path: Path to generate digest for. 901 902 @return: ASCII-safe SHA digest for the file. 903 @raise OSError: If the file cannot be opened. 904 """ 905 s = sha.new() 906 f = open(path, mode="rb") # in case platform cares about binary reads 907 readBytes = 4096 # see notes above 908 while(readBytes > 0): 909 readString = f.read(readBytes) 910 s.update(readString) 911 readBytes = len(readString) 912 f.close() 913 digest = s.hexdigest() 914 logger.debug("Generated digest [%s] for file [%s]." % (digest, path)) 915 return digest
916 _generateDigest = staticmethod(_generateDigest) 917
918 - def generateFitted(self, capacity, algorithm="worst_fit"):
919 """ 920 Generates a list of items that fit in the indicated capacity. 921 922 Sometimes, callers would like to include every item in a list, but are 923 unable to because not all of the items fit in the space available. This 924 method returns a copy of the list, containing only the items that fit in 925 a given capacity. A copy is returned so that we don't lose any 926 information if for some reason the fitted list is unsatisfactory. 927 928 The fitting is done using the functions in the knapsack module. By 929 default, the first fit algorithm is used, but you can also choose 930 from best fit, worst fit and alternate fit. 931 932 @param capacity: Maximum capacity among the files in the new list 933 @type capacity: Integer, in bytes 934 935 @param algorithm: Knapsack (fit) algorithm to use 936 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 937 938 @return: Copy of list with total size no larger than indicated capacity 939 @raise ValueError: If the algorithm is invalid. 940 """ 941 table = self._getKnapsackTable() 942 function = BackupFileList._getKnapsackFunction(algorithm) 943 return function(table, capacity)[0]
944
945 - def generateSpan(self, capacity, algorithm="worst_fit"):
946 """ 947 Splits the list of items into sub-lists that fit in a given capacity. 948 949 Sometimes, callers need split to a backup file list into a set of smaller 950 lists. For instance, you could use this to "span" the files across a set 951 of discs. 952 953 The fitting is done using the functions in the knapsack module. By 954 default, the first fit algorithm is used, but you can also choose 955 from best fit, worst fit and alternate fit. 956 957 @note: If any of your items are larger than the capacity, then it won't 958 be possible to find a solution. In this case, a value error will be 959 raised. 960 961 @param capacity: Maximum capacity among the files in the new list 962 @type capacity: Integer, in bytes 963 964 @param algorithm: Knapsack (fit) algorithm to use 965 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 966 967 @return: List of L{SpanItem} objects. 968 969 @raise ValueError: If the algorithm is invalid. 970 @raise ValueError: If it's not possible to fit some items 971 """ 972 spanItems = [] 973 function = BackupFileList._getKnapsackFunction(algorithm) 974 table = self._getKnapsackTable(capacity) 975 iteration = 0 976 while len(table) > 0: 977 iteration += 1 978 fit = function(table, capacity) 979 if len(fit[0]) == 0: 980 # Should never happen due to validations in _convertToKnapsackForm(), but let's be safe 981 raise ValueError("After iteration %d, unable to add any new items." % iteration) 982 removeKeys(table, fit[0]) 983 utilization = (float(fit[1])/float(capacity))*100.0 984 item = SpanItem(fit[0], fit[1], capacity, utilization) 985 spanItems.append(item) 986 return spanItems
987
988 - def _getKnapsackTable(self, capacity=None):
989 """ 990 Converts the list into the form needed by the knapsack algorithms. 991 @return: Dictionary mapping file name to tuple of (file path, file size). 992 """ 993 table = { } 994 for entry in self: 995 if os.path.islink(entry): 996 table[entry] = (entry, 0.0) 997 elif os.path.isfile(entry): 998 size = float(os.stat(entry).st_size) 999 if capacity is not None: 1000 if size > capacity: 1001 raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity))) 1002 table[entry] = (entry, size) 1003 return table
1004
1005 - def _getKnapsackFunction(algorithm):
1006 """ 1007 Returns a reference to the function associated with an algorithm name. 1008 Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit" 1009 @param algorithm: Name of the algorithm 1010 @return: Reference to knapsack function 1011 @raise ValueError: If the algorithm name is unknown. 1012 """ 1013 if algorithm == "first_fit": 1014 return firstFit 1015 elif algorithm == "best_fit": 1016 return bestFit 1017 elif algorithm == "worst_fit": 1018 return worstFit 1019 elif algorithm == "alternate_fit": 1020 return alternateFit 1021 else: 1022 raise ValueError("Algorithm [%s] is invalid." % algorithm);
1023 _getKnapsackFunction = staticmethod(_getKnapsackFunction) 1024
1025 - def generateTarfile(self, path, mode='tar', ignore=False, flat=False):
1026 """ 1027 Creates a tar file containing the files in the list. 1028 1029 By default, this method will create uncompressed tar files. If you pass 1030 in mode C{'targz'}, then it will create gzipped tar files, and if you 1031 pass in mode C{'tarbz2'}, then it will create bzipped tar files. 1032 1033 The tar file will be created as a GNU tar archive, which enables extended 1034 file name lengths, etc. Since GNU tar is so prevalent, I've decided that 1035 the extra functionality out-weighs the disadvantage of not being 1036 "standard". 1037 1038 If you pass in C{flat=True}, then a "flat" archive will be created, and 1039 all of the files will be added to the root of the archive. So, the file 1040 C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}. 1041 1042 By default, the whole method call fails if there are problems adding any 1043 of the files to the archive, resulting in an exception. Under these 1044 circumstances, callers are advised that they might want to call 1045 L{removeInvalid()} and then attempt to extract the tar file a second 1046 time, since the most common cause of failures is a missing file (a file 1047 that existed when the list was built, but is gone again by the time the 1048 tar file is built). 1049 1050 If you want to, you can pass in C{ignore=True}, and the method will 1051 ignore errors encountered when adding individual files to the archive 1052 (but not errors opening and closing the archive itself). 1053 1054 We'll always attempt to remove the tarfile from disk if an exception will 1055 be thrown. 1056 1057 @note: No validation is done as to whether the entries in the list are 1058 files, since only files or soft links should be in an object like this. 1059 However, to be safe, everything is explicitly added to the tar archive 1060 non-recursively so it's safe to include soft links to directories. 1061 1062 @note: The Python C{tarfile} module, which is used internally here, is 1063 supposed to deal properly with long filenames and links. In my testing, 1064 I have found that it appears to be able to add long really long filenames 1065 to archives, but doesn't do a good job reading them back out, even out of 1066 an archive it created. Fortunately, all Cedar Backup does is add files 1067 to archives. 1068 1069 @param path: Path of tar file to create on disk 1070 @type path: String representing a path on disk 1071 1072 @param mode: Tar creation mode 1073 @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'} 1074 1075 @param ignore: Indicates whether to ignore certain errors. 1076 @type ignore: Boolean 1077 1078 @param flat: Creates "flat" archive by putting all items in root 1079 @type flat: Boolean 1080 1081 @raise ValueError: If mode is not valid 1082 @raise ValueError: If list is empty 1083 @raise ValueError: If the path could not be encoded properly. 1084 @raise TarError: If there is a problem creating the tar file 1085 """ 1086 path = encodePath(path) 1087 if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.") 1088 if(mode == 'tar'): tarmode = "w:" 1089 elif(mode == 'targz'): tarmode = "w:gz" 1090 elif(mode == 'tarbz2'): tarmode = "w:bz2" 1091 else: raise ValueError("Mode [%s] is not valid." % mode) 1092 try: 1093 tar = tarfile.open(path, tarmode) 1094 tar.posix = False # make a GNU-compatible archive without file length limits 1095 for entry in self: 1096 try: 1097 if flat: 1098 tar.add(entry, arcname=os.path.basename(entry), recursive=False) 1099 else: 1100 tar.add(entry, recursive=False) 1101 except tarfile.TarError, e: 1102 if not ignore: 1103 raise e 1104 logger.info("Unable to add file [%s]; going on anyway." % entry) 1105 except OSError, e: 1106 if not ignore: 1107 raise tarfile.TarError(e) 1108 logger.info("Unable to add file [%s]; going on anyway." % entry) 1109 tar.close() 1110 except tarfile.ReadError, e: 1111 try: tar.close() 1112 except: pass 1113 if os.path.exists(path): 1114 try: os.remove(path) 1115 except: pass 1116 raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path) 1117 except tarfile.TarError, e: 1118 try: tar.close() 1119 except: pass 1120 if os.path.exists(path): 1121 try: os.remove(path) 1122 except: pass 1123 raise e
1124
1125 - def removeUnchanged(self, digestMap, captureDigest=False):
1126 """ 1127 Removes unchanged entries from the list. 1128 1129 This method relies on a digest map as returned from L{generateDigestMap}. 1130 For each entry in C{digestMap}, if the entry also exists in the current 1131 list I{and} the entry in the current list has the same digest value as in 1132 the map, the entry in the current list will be removed. 1133 1134 This method offers a convenient way for callers to filter unneeded 1135 entries from a list. The idea is that a caller will capture a digest map 1136 from C{generateDigestMap} at some point in time (perhaps the beginning of 1137 the week), and will save off that map using C{pickle} or some other 1138 method. Then, the caller could use this method sometime in the future to 1139 filter out any unchanged files based on the saved-off map. 1140 1141 If C{captureDigest} is passed-in as C{True}, then digest information will 1142 be captured for the entire list before the removal step occurs using the 1143 same rules as in L{generateDigestMap}. The check will involve a lookup 1144 into the complete digest map. 1145 1146 If C{captureDigest} is passed in as C{False}, we will only generate a 1147 digest value for files we actually need to check, and we'll ignore any 1148 entry in the list which isn't a file that currently exists on disk. 1149 1150 The return value varies depending on C{captureDigest}, as well. To 1151 preserve backwards compatibility, if C{captureDigest} is C{False}, then 1152 we'll just return a single value representing the number of entries 1153 removed. Otherwise, we'll return a tuple of C{(entries removed, digest 1154 map)}. The returned digest map will be in exactly the form returned by 1155 L{generateDigestMap}. 1156 1157 @note: For performance reasons, this method actually ends up rebuilding 1158 the list from scratch. First, we build a temporary dictionary containing 1159 all of the items from the original list. Then, we remove items as needed 1160 from the dictionary (which is faster than the equivalent operation on a 1161 list). Finally, we replace the contents of the current list based on the 1162 keys left in the dictionary. This should be transparent to the caller. 1163 1164 @param digestMap: Dictionary mapping file name to digest value. 1165 @type digestMap: Map as returned from L{generateDigestMap}. 1166 1167 @param captureDigest: Indicates that digest information should be captured. 1168 @type captureDigest: Boolean 1169 1170 @return: Number of entries removed 1171 """ 1172 if captureDigest: 1173 removed = 0 1174 table = {} 1175 captured = {} 1176 for entry in self: 1177 if os.path.isfile(entry) and not os.path.islink(entry): 1178 table[entry] = BackupFileList._generateDigest(entry) 1179 captured[entry] = table[entry] 1180 else: 1181 table[entry] = None 1182 for entry in digestMap.keys(): 1183 if table.has_key(entry): 1184 if table[entry] is not None: # equivalent to file/link check in other case 1185 digest = table[entry] 1186 if digest == digestMap[entry]: 1187 removed += 1 1188 del table[entry] 1189 logger.debug("Discarded unchanged file [%s]." % entry) 1190 self[:] = table.keys() 1191 return (removed, captured) 1192 else: 1193 removed = 0 1194 table = {} 1195 for entry in self: 1196 table[entry] = None 1197 for entry in digestMap.keys(): 1198 if table.has_key(entry): 1199 if os.path.isfile(entry) and not os.path.islink(entry): 1200 digest = BackupFileList._generateDigest(entry) 1201 if digest == digestMap[entry]: 1202 removed += 1 1203 del table[entry] 1204 logger.debug("Discarded unchanged file [%s]." % entry) 1205 self[:] = table.keys() 1206 return removed
1207 1208 1209 ######################################################################## 1210 # PurgeItemList class definition 1211 ######################################################################## 1212
1213 -class PurgeItemList(FilesystemList):
1214 1215 ###################### 1216 # Class documentation 1217 ###################### 1218 1219 """ 1220 List of files and directories to be purged. 1221 1222 A PurgeItemList is a L{FilesystemList} containing a list of files and 1223 directories to be purged. On top of the generic functionality provided by 1224 L{FilesystemList}, this class adds functionality to remove items that are 1225 too young to be purged, and to actually remove each item in the list from 1226 the filesystem. 1227 1228 The other main difference is that when you add a directory's contents to a 1229 purge item list, the directory itself is not added to the list. This way, 1230 if someone asks to purge within in C{/opt/backup/collect}, that directory 1231 doesn't get removed once all of the files within it is gone. 1232 """ 1233 1234 ############## 1235 # Constructor 1236 ############## 1237
1238 - def __init__(self):
1239 """Initializes a list with no configured exclusions.""" 1240 FilesystemList.__init__(self)
1241 1242 1243 ############## 1244 # Add methods 1245 ############## 1246
1247 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
1248 """ 1249 Adds the contents of a directory to the list. 1250 1251 The path must exist and must be a directory or a link to a directory. 1252 The contents of the directory (but I{not} the directory path itself) will 1253 be recursively added to the list, subject to any exclusions that are in 1254 place. If you only want the directory and its contents to be added, then 1255 pass in C{recursive=False}. 1256 1257 @note: If a directory's absolute path matches an exclude pattern or path, 1258 or if the directory contains the configured ignore file, then the 1259 directory and all of its contents will be recursively excluded from the 1260 list. 1261 1262 @note: If the passed-in directory happens to be a soft link, it will be 1263 recursed. However, the linkDepth parameter controls whether any soft 1264 links I{within} the directory will be recursed. The link depth is 1265 maximum depth of the tree at which soft links should be followed. So, a 1266 depth of 0 does not follow any soft links, a depth of 1 follows only 1267 links within the passed-in directory, a depth of 2 follows the links at 1268 the next level down, etc. 1269 1270 @note: Any invalid soft links (i.e. soft links that point to 1271 non-existent items) will be silently ignored. 1272 1273 @note: The L{excludeDirs} flag only controls whether any given soft link 1274 path itself is added to the list once it has been discovered. It does 1275 I{not} modify any behavior related to directory recursion. 1276 1277 @note: The L{excludeDirs} flag only controls whether any given directory 1278 path itself is added to the list once it has been discovered. It does 1279 I{not} modify any behavior related to directory recursion. 1280 1281 @note: If you call this method I{on a link to a directory} that link will 1282 never be dereferenced (it may, however, be followed). 1283 1284 @param path: Directory path whose contents should be added to the list 1285 @type path: String representing a path on disk 1286 1287 @param recursive: Indicates whether directory contents should be added recursively. 1288 @type recursive: Boolean value 1289 1290 @param addSelf: Ignored in this subclass. 1291 1292 @param linkDepth: Depth of soft links that should be followed 1293 @type linkDepth: Integer value, where zero means not to follow any soft links 1294 1295 @param dereference: Indicates whether soft links, if followed, should be dereferenced 1296 @type dereference: Boolean value 1297 1298 @return: Number of items recursively added to the list 1299 1300 @raise ValueError: If path is not a directory or does not exist. 1301 @raise ValueError: If the path could not be encoded properly. 1302 """ 1303 path = encodePath(path) 1304 path = normalizeDir(path) 1305 return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth, dereference)
1306 1307 1308 ################## 1309 # Utility methods 1310 ################## 1311
1312 - def removeYoungFiles(self, daysOld):
1313 """ 1314 Removes from the list files younger than a certain age (in days). 1315 1316 Any file whose "age" in days is less than (C{<}) the value of the 1317 C{daysOld} parameter will be removed from the list so that it will not be 1318 purged later when L{purgeItems} is called. Directories and soft links 1319 will be ignored. 1320 1321 The "age" of a file is the amount of time since the file was last used, 1322 per the most recent of the file's C{st_atime} and C{st_mtime} values. 1323 1324 @note: Some people find the "sense" of this method confusing or 1325 "backwards". Keep in mind that this method is used to remove items 1326 I{from the list}, not from the filesystem! It removes from the list 1327 those items that you would I{not} want to purge because they are too 1328 young. As an example, passing in C{daysOld} of zero (0) would remove 1329 from the list no files, which would result in purging all of the files 1330 later. I would be happy to make a synonym of this method with an 1331 easier-to-understand "sense", if someone can suggest one. 1332 1333 @param daysOld: Minimum age of files that are to be kept in the list. 1334 @type daysOld: Integer value >= 0. 1335 1336 @return: Number of entries removed 1337 """ 1338 removed = 0 1339 daysOld = int(daysOld) 1340 if daysOld < 0: 1341 raise ValueError("Days old value must be an integer >= 0.") 1342 for entry in self[:]: 1343 if os.path.isfile(entry) and not os.path.islink(entry): 1344 try: 1345 ageInDays = calculateFileAge(entry) 1346 ageInWholeDays = math.floor(ageInDays) 1347 if ageInWholeDays < daysOld: 1348 removed += 1 1349 self.remove(entry) 1350 except OSError: 1351 pass 1352 return removed
1353
1354 - def purgeItems(self):
1355 """ 1356 Purges all items in the list. 1357 1358 Every item in the list will be purged. Directories in the list will 1359 I{not} be purged recursively, and hence will only be removed if they are 1360 empty. Errors will be ignored. 1361 1362 To faciliate easy removal of directories that will end up being empty, 1363 the delete process happens in two passes: files first (including soft 1364 links), then directories. 1365 1366 @return: Tuple containing count of (files, dirs) removed 1367 """ 1368 files = 0 1369 dirs = 0 1370 for entry in self: 1371 if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)): 1372 try: 1373 os.remove(entry) 1374 files += 1 1375 logger.debug("Purged file [%s]." % entry) 1376 except OSError: 1377 pass 1378 for entry in self: 1379 if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry): 1380 try: 1381 os.rmdir(entry) 1382 dirs += 1 1383 logger.debug("Purged empty directory [%s]." % entry) 1384 except OSError: 1385 pass 1386 return (files, dirs)
1387 1388 1389 ######################################################################## 1390 # Public functions 1391 ######################################################################## 1392 1393 ########################## 1394 # normalizeDir() function 1395 ########################## 1396
1397 -def normalizeDir(path):
1398 """ 1399 Normalizes a directory name. 1400 1401 For our purposes, a directory name is normalized by removing the trailing 1402 path separator, if any. This is important because we want directories to 1403 appear within lists in a consistent way, although from the user's 1404 perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent. 1405 1406 @param path: Path to be normalized. 1407 @type path: String representing a path on disk 1408 1409 @return: Normalized path, which should be equivalent to the original. 1410 """ 1411 if path != os.sep and path[-1:] == os.sep: 1412 return path[:-1] 1413 return path
1414 1415 1416 ############################# 1417 # compareContents() function 1418 ############################# 1419
1420 -def compareContents(path1, path2, verbose=False):
1421 """ 1422 Compares the contents of two directories to see if they are equivalent. 1423 1424 The two directories are recursively compared. First, we check whether they 1425 contain exactly the same set of files. Then, we check to see every given 1426 file has exactly the same contents in both directories. 1427 1428 This is all relatively simple to implement through the magic of 1429 L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix 1430 off the front of each entry in the mapping it generates. This makes our 1431 comparison as simple as creating a list for each path, then generating a 1432 digest map for each path and comparing the two. 1433 1434 If no exception is thrown, the two directories are considered identical. 1435 1436 If the C{verbose} flag is C{True}, then an alternate (but slower) method is 1437 used so that any thrown exception can indicate exactly which file caused the 1438 comparison to fail. The thrown C{ValueError} exception distinguishes 1439 between the directories containing different files, and containing the same 1440 files with differing content. 1441 1442 @note: Symlinks are I{not} followed for the purposes of this comparison. 1443 1444 @param path1: First path to compare. 1445 @type path1: String representing a path on disk 1446 1447 @param path2: First path to compare. 1448 @type path2: String representing a path on disk 1449 1450 @param verbose: Indicates whether a verbose response should be given. 1451 @type verbose: Boolean 1452 1453 @raise ValueError: If a directory doesn't exist or can't be read. 1454 @raise ValueError: If the two directories are not equivalent. 1455 @raise IOError: If there is an unusual problem reading the directories. 1456 """ 1457 try: 1458 path1List = BackupFileList() 1459 path1List.addDirContents(path1) 1460 path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1)) 1461 path2List = BackupFileList() 1462 path2List.addDirContents(path2) 1463 path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2)) 1464 compareDigestMaps(path1Digest, path2Digest, verbose) 1465 except IOError, e: 1466 logger.error("I/O error encountered during consistency check.") 1467 raise e
1468
1469 -def compareDigestMaps(digest1, digest2, verbose=False):
1470 """ 1471 Compares two digest maps and throws an exception if they differ. 1472 1473 @param digest1: First digest to compare. 1474 @type digest1: Digest as returned from BackupFileList.generateDigestMap() 1475 1476 @param digest2: Second digest to compare. 1477 @type digest2: Digest as returned from BackupFileList.generateDigestMap() 1478 1479 @param verbose: Indicates whether a verbose response should be given. 1480 @type verbose: Boolean 1481 1482 @raise ValueError: If the two directories are not equivalent. 1483 """ 1484 if not verbose: 1485 if digest1 != digest2: 1486 raise ValueError("Consistency check failed.") 1487 else: 1488 list1 = UnorderedList(digest1.keys()) 1489 list2 = UnorderedList(digest2.keys()) 1490 if list1 != list2: 1491 raise ValueError("Directories contain a different set of files.") 1492 for key in list1: 1493 if digest1[key] != digest2[key]: 1494 raise ValueError("File contents for [%s] vary between directories." % key)
1495