Package logilab :: Package common :: Module fileutils
[frames] | no frames]

Source Code for Module logilab.common.fileutils

  1  # copyright 2003-2010 LOGILAB S.A. (Paris, FRANCE), all rights reserved. 
  2  # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr 
  3  # 
  4  # This file is part of logilab-common. 
  5  # 
  6  # logilab-common is free software: you can redistribute it and/or modify it under 
  7  # the terms of the GNU Lesser General Public License as published by the Free 
  8  # Software Foundation, either version 2.1 of the License, or (at your option) any 
  9  # later version. 
 10  # 
 11  # logilab-common is distributed in the hope that it will be useful, but WITHOUT 
 12  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 13  # FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more 
 14  # details. 
 15  # 
 16  # You should have received a copy of the GNU Lesser General Public License along 
 17  # with logilab-common.  If not, see <http://www.gnu.org/licenses/>. 
 18  """File and file-path manipulation utilities. 
 19   
 20  :group path manipulation: first_level_directory, relative_path, is_binary,\ 
 21  get_by_ext, remove_dead_links 
 22  :group file manipulation: norm_read, norm_open, lines, stream_lines, lines,\ 
 23  write_open_mode, ensure_fs_mode, export 
 24  :sort: path manipulation, file manipulation 
 25  """ 
 26  __docformat__ = "restructuredtext en" 
 27   
 28  import sys 
 29  import shutil 
 30  import mimetypes 
 31  from os.path import isabs, isdir, islink, split, exists, walk, normpath, join 
 32  from os.path import abspath 
 33  from os import sep, mkdir, remove, listdir, stat, chmod 
 34  from stat import ST_MODE, S_IWRITE 
 35  from cStringIO import StringIO 
 36   
 37  from logilab.common import STD_BLACKLIST as BASE_BLACKLIST, IGNORED_EXTENSIONS 
 38  from logilab.common.shellutils import find 
 39   
40 -def first_level_directory(path):
41 """Return the first level directory of a path. 42 43 >>> first_level_directory('home/syt/work') 44 'home' 45 >>> first_level_directory('/home/syt/work') 46 '/' 47 >>> first_level_directory('work') 48 'work' 49 >>> 50 51 :type path: str 52 :param path: the path for which we want the first level directory 53 54 :rtype: str 55 :return: the first level directory appearing in `path` 56 """ 57 head, tail = split(path) 58 while head and tail: 59 head, tail = split(head) 60 if tail: 61 return tail 62 # path was absolute, head is the fs root 63 return head
64
65 -def abspath_listdir(path):
66 """Lists path's content using absolute paths. 67 68 >>> os.listdir('/home') 69 ['adim', 'alf', 'arthur', 'auc'] 70 >>> abspath_listdir('/home') 71 ['/home/adim', '/home/alf', '/home/arthur', '/home/auc'] 72 """ 73 path = abspath(path) 74 return [join(path, filename) for filename in listdir(path)]
75 76
77 -def is_binary(filename):
78 """Return true if filename may be a binary file, according to it's 79 extension. 80 81 :type filename: str 82 :param filename: the name of the file 83 84 :rtype: bool 85 :return: 86 true if the file is a binary file (actually if it's mime type 87 isn't beginning by text/) 88 """ 89 try: 90 return not mimetypes.guess_type(filename)[0].startswith('text') 91 except AttributeError: 92 return 1
93 94
95 -def write_open_mode(filename):
96 """Return the write mode that should used to open file. 97 98 :type filename: str 99 :param filename: the name of the file 100 101 :rtype: str 102 :return: the mode that should be use to open the file ('w' or 'wb') 103 """ 104 if is_binary(filename): 105 return 'wb' 106 return 'w'
107 108
109 -def ensure_fs_mode(filepath, desired_mode=S_IWRITE):
110 """Check that the given file has the given mode(s) set, else try to 111 set it. 112 113 :type filepath: str 114 :param filepath: path of the file 115 116 :type desired_mode: int 117 :param desired_mode: 118 ORed flags describing the desired mode. Use constants from the 119 `stat` module for file permission's modes 120 """ 121 mode = stat(filepath)[ST_MODE] 122 if not mode & desired_mode: 123 chmod(filepath, mode | desired_mode)
124 125
126 -class ProtectedFile(file):
127 """A special file-object class that automatically that automatically 128 does a 'chmod +w' when needed. 129 130 XXX: for now, the way it is done allows 'normal file-objects' to be 131 created during the ProtectedFile object lifetime. 132 One way to circumvent this would be to chmod / unchmod on each 133 write operation. 134 135 One other way would be to : 136 137 - catch the IOError in the __init__ 138 139 - if IOError, then create a StringIO object 140 141 - each write operation writes in this StringIO object 142 143 - on close()/del(), write/append the StringIO content to the file and 144 do the chmod only once 145 """
146 - def __init__(self, filepath, mode):
147 self.original_mode = stat(filepath)[ST_MODE] 148 self.mode_changed = False 149 if mode in ('w', 'a', 'wb', 'ab'): 150 if not self.original_mode & S_IWRITE: 151 chmod(filepath, self.original_mode | S_IWRITE) 152 self.mode_changed = True 153 file.__init__(self, filepath, mode)
154
155 - def _restore_mode(self):
156 """restores the original mode if needed""" 157 if self.mode_changed: 158 chmod(self.name, self.original_mode) 159 # Don't re-chmod in case of several restore 160 self.mode_changed = False
161
162 - def close(self):
163 """restore mode before closing""" 164 self._restore_mode() 165 file.close(self)
166
167 - def __del__(self):
168 if not self.closed: 169 self.close()
170 171
172 -class UnresolvableError(Exception):
173 """Exception raised by relative path when it's unable to compute relative 174 path between two paths. 175 """
176
177 -def relative_path(from_file, to_file):
178 """Try to get a relative path from `from_file` to `to_file` 179 (path will be absolute if to_file is an absolute file). This function 180 is useful to create link in `from_file` to `to_file`. This typical use 181 case is used in this function description. 182 183 If both files are relative, they're expected to be relative to the same 184 directory. 185 186 >>> relative_path( from_file='toto/index.html', to_file='index.html') 187 '../index.html' 188 >>> relative_path( from_file='index.html', to_file='toto/index.html') 189 'toto/index.html' 190 >>> relative_path( from_file='tutu/index.html', to_file='toto/index.html') 191 '../toto/index.html' 192 >>> relative_path( from_file='toto/index.html', to_file='/index.html') 193 '/index.html' 194 >>> relative_path( from_file='/toto/index.html', to_file='/index.html') 195 '../index.html' 196 >>> relative_path( from_file='/toto/index.html', to_file='/toto/summary.html') 197 'summary.html' 198 >>> relative_path( from_file='index.html', to_file='index.html') 199 '' 200 >>> relative_path( from_file='/index.html', to_file='toto/index.html') 201 Traceback (most recent call last): 202 File "<string>", line 1, in ? 203 File "<stdin>", line 37, in relative_path 204 UnresolvableError 205 >>> relative_path( from_file='/index.html', to_file='/index.html') 206 '' 207 >>> 208 209 :type from_file: str 210 :param from_file: source file (where links will be inserted) 211 212 :type to_file: str 213 :param to_file: target file (on which links point) 214 215 :raise UnresolvableError: if it has been unable to guess a correct path 216 217 :rtype: str 218 :return: the relative path of `to_file` from `from_file` 219 """ 220 from_file = normpath(from_file) 221 to_file = normpath(to_file) 222 if from_file == to_file: 223 return '' 224 if isabs(to_file): 225 if not isabs(from_file): 226 return to_file 227 elif isabs(from_file): 228 raise UnresolvableError() 229 from_parts = from_file.split(sep) 230 to_parts = to_file.split(sep) 231 idem = 1 232 result = [] 233 while len(from_parts) > 1: 234 dirname = from_parts.pop(0) 235 if idem and len(to_parts) > 1 and dirname == to_parts[0]: 236 to_parts.pop(0) 237 else: 238 idem = 0 239 result.append('..') 240 result += to_parts 241 return sep.join(result)
242 243 244 from logilab.common.textutils import _LINE_RGX 245 from sys import version_info 246 _HAS_UNIV_OPEN = version_info[:2] >= (2, 3) 247 del version_info 248
249 -def norm_read(path):
250 """Return the content of the file with normalized line feeds. 251 252 :type path: str 253 :param path: path to the file to read 254 255 :rtype: str 256 :return: the content of the file with normalized line feeds 257 """ 258 if _HAS_UNIV_OPEN: 259 return open(path, 'U').read() 260 return _LINE_RGX.sub('\n', open(path).read())
261 262
263 -def norm_open(path):
264 """Return a stream for a file with content with normalized line feeds. 265 266 :type path: str 267 :param path: path to the file to open 268 269 :rtype: file or StringIO 270 :return: the opened file with normalized line feeds 271 """ 272 if _HAS_UNIV_OPEN: 273 return open(path, 'U') 274 return StringIO(_LINE_RGX.sub('\n', open(path).read()))
275 276
277 -def lines(path, comments=None):
278 """Return a list of non empty lines in the file located at `path`. 279 280 :type path: str 281 :param path: path to the file 282 283 :type comments: str or None 284 :param comments: 285 optional string which can be used to comment a line in the file 286 (i.e. lines starting with this string won't be returned) 287 288 :rtype: list 289 :return: 290 a list of stripped line in the file, without empty and commented 291 lines 292 293 :warning: at some point this function will probably return an iterator 294 """ 295 stream = norm_open(path) 296 result = stream_lines(stream, comments) 297 stream.close() 298 return result
299 300
301 -def stream_lines(stream, comments=None):
302 """Return a list of non empty lines in the given `stream`. 303 304 :type stream: object implementing 'xreadlines' or 'readlines' 305 :param stream: file like object 306 307 :type comments: str or None 308 :param comments: 309 optional string which can be used to comment a line in the file 310 (i.e. lines starting with this string won't be returned) 311 312 :rtype: list 313 :return: 314 a list of stripped line in the file, without empty and commented 315 lines 316 317 :warning: at some point this function will probably return an iterator 318 """ 319 try: 320 readlines = stream.xreadlines 321 except AttributeError: 322 readlines = stream.readlines 323 result = [] 324 for line in readlines(): 325 line = line.strip() 326 if line and (comments is None or not line.startswith(comments)): 327 result.append(line) 328 return result
329 330
331 -def export(from_dir, to_dir, 332 blacklist=BASE_BLACKLIST, ignore_ext=IGNORED_EXTENSIONS, 333 verbose=0):
334 """Make a mirror of `from_dir` in `to_dir`, omitting directories and 335 files listed in the black list or ending with one of the given 336 extensions. 337 338 :type from_dir: str 339 :param from_dir: directory to export 340 341 :type to_dir: str 342 :param to_dir: destination directory 343 344 :type blacklist: list or tuple 345 :param blacklist: 346 list of files or directories to ignore, default to the content of 347 `BASE_BLACKLIST` 348 349 :type ignore_ext: list or tuple 350 :param ignore_ext: 351 list of extensions to ignore, default to the content of 352 `IGNORED_EXTENSIONS` 353 354 :type verbose: bool 355 :param verbose: 356 flag indicating whether information about exported files should be 357 printed to stderr, default to False 358 """ 359 def make_mirror(_, directory, fnames): 360 """walk handler""" 361 for norecurs in blacklist: 362 try: 363 fnames.remove(norecurs) 364 except ValueError: 365 continue 366 for filename in fnames: 367 # don't include binary files 368 for ext in ignore_ext: 369 if filename.endswith(ext): 370 break 371 else: 372 src = join(directory, filename) 373 dest = to_dir + src[len(from_dir):] 374 if verbose: 375 print >> sys.stderr, src, '->', dest 376 if isdir(src): 377 if not exists(dest): 378 mkdir(dest) 379 else: 380 if exists(dest): 381 remove(dest) 382 shutil.copy2(src, dest)
383 try: 384 mkdir(to_dir) 385 except OSError: 386 pass 387 walk(from_dir, make_mirror, None) 388 389 409 walk(directory, _remove_dead_link, None) 410