1
2
3
4 """Checksumming convenience classes
5
6 TODO! Sorry!
7
8 [@]SourceFileOrURL Alg Digest Length ModTime TargetFileOrURL
9 TOKEN NUMBER: 1 2 3 4 5 6
10
11 """
12
13 from __future__ import with_statement
14
15 COLUMNS = { 0:"SourceFileOrURL",
16 1:"Alg",
17 2:"Digest",
18 3:"Length",
19 4:"ModTime",
20 5:"TargetFileOrURL",
21 }
22
23
24 import os, sys
25 from stat import *
26
27 import re
28
29 from collections import defaultdict
30
31 import hashlib
32
33 import codecs
34
35 import logging
36
37 logging.basicConfig(level=logging.INFO)
38
39 logger = logging.getLogger('checkm')
40
42 """The item or directory was either not found, or not accessible."""
44 """
45 FIXME
46 @param *arg:
47 @type *arg:
48 @param **kw:
49 @type **kw:
50 """
51 self.context = (arg, kw)
53 """
54 FIXME
55 """
56 return self.context.__str__()
58 """
59 FIXME
60 """
61 return self.context.__str__()
62
64 COLUMN_NAMES = [u'# [@]SourceFileOrURL',u'Alg',u'Digest',u'Length',u'ModTime']
70
72 """
73 FIXME
74 @param report:
75 @type report:
76 """
77 cols = defaultdict(lambda : 0)
78 for line in report:
79 for index in xrange(len(line)):
80 if len(line[index])>cols[index]:
81 cols[index] = len(line[index])
82 return cols
83
85 """
86 FIXME
87 @param line:
88 @type line:
89 @param col_maxes:
90 @type col_maxes:
91 """
92 spaced_line = []
93 for index in xrange(len(line)):
94 spaced_line.append(line[index])
95 spaces = col_maxes[index]-len(line[index])+4
96 spaced_line.append(u" "*spaces)
97 return u"".join(spaced_line)
98
99 - def create_bagit_manifest(self, scan_directory, algorithm, recursive=False, delimiter = " ", filename=None):
100 """
101 FIXME
102 @param scan_directory:
103 @type scan_directory:
104 @param algorithm:
105 @type algorithm:
106 @param recursive=False:
107 @type recursive=False:
108 @param delimiter:
109 @type delimiter:
110 @param filename=None:
111 @type filename=None:
112 """
113 if not filename:
114 filename = "manifest-%s.txt" % algorithm
115 logger.info("Creating bagit manifest file(%s) for dir(%s) with Alg:%s" % (filename,
116 scan_directory,
117 algorithm))
118 report = self.scanner.scan_directory(scan_directory, algorithm, recursive=recursive, columns=3)
119 if hasattr(filename, 'write'):
120 faked_filename = "manifest-%s.txt" % algorithm
121 for line in report:
122 if line[2] != "d":
123 if os.path.abspath(line[0]) != os.path.abspath(faked_filename):
124 filename.write("%s%s%s\n" % (line[2], delimiter, line[0]))
125 else:
126 logger.info("Manifest file match - scan line ignored")
127 else:
128 with codecs.open(filename, encoding='utf-8', mode="w") as output:
129 for line in report:
130 if line[2] != "d":
131 if os.path.abspath(line[0]) != os.path.abspath(filename):
132 output.write("%s%s%s\n" % (line[2], delimiter, line[0]))
133 else:
134 logger.info("Manifest file match - scan line ignored")
135 output.write("\n")
136 return filename
137
139 logger.info("Creating multilevel checkm files '(%s)' from top level directory(%s) with Alg:%s and columns:%s" % (checkm_filename, top_directory, algorithm, columns))
140 if not os.path.isdir(top_directory):
141 raise NotFound(top_directory=top_directory)
142
143
144
145 dir_list = [(root, dirnames) for (root, dirnames, _) in os.walk(top_directory, topdown=False)]
146 dirs = dict(dir_list)
147
148 for (dirname,_) in dir_list:
149 logger.info('creating checkm file %s in %s' % (checkm_filename, dirname))
150 with codecs.open(os.path.join(dirname, checkm_filename), encoding='utf-8', mode="w") as output:
151 self.create_checkm_file(dirname,
152 algorithm,
153 os.path.join(dirname, checkm_filename),
154 recursive=False,
155 columns=columns,
156 checkm_file=output)
157 subdir_report = []
158 for subdir in dirs[dirname]:
159 logger.info('Checking sub-checkm file and adding it to the list of hashes in %s' % dirname)
160 try:
161 line = self.scanner.scan_path(os.path.join(dirname, subdir, checkm_filename), algorithm, columns)
162 logger.info("Line - %s" % line)
163 line[0] = '@%s' % (line[0])
164 subdir_report.append(line)
165 except Exception, e:
166 print dirname, subdir, checkm_filename
167 print "Fail! %s" % e
168 col_maxes = self._get_max_len(subdir_report)
169 for line in subdir_report:
170 output.write('%s\n' % (self._space_line(line, col_maxes)))
171 output.write('\n')
172
173 - def create_checkm_file(self, scan_directory, algorithm, checkm_filename, recursive=False, columns=3, checkm_file=None):
174 logger.info("Creating checkm file for dir(%s) with Alg:%s and columns: %s" % (
175 scan_directory,
176 algorithm, columns))
177 report = self.scanner.scan_directory(scan_directory, algorithm, recursive=recursive, columns=columns)
178 col_maxes = self._get_max_len(report)
179 if checkm_file != None and hasattr(checkm_file, 'write'):
180 checkm_file.write("%s \n" % (self._space_line(CheckmReporter.COLUMN_NAMES[:columns], col_maxes)))
181 for line in report:
182 if os.path.abspath(line[0]) != os.path.abspath(checkm_filename):
183 checkm_file.write("%s\n" % (self._space_line(line, col_maxes)))
184 else:
185 logger.info("Manifest file match - scan line ignored")
186 return checkm_file
187 else:
188 with codecs.open(checkm_filename, encoding='utf-8', mode="w") as output:
189 output.write("%s \n" % (self._space_line(CheckmReporter.COLUMN_NAMES[:columns], col_maxes)))
190 for line in report:
191 if os.path.abspath(line[0]) != os.path.abspath(checkm_filename):
192 output.write("%s\n" % (self._space_line(line, col_maxes)))
193 else:
194 logger.info("Manifest file match - scan line ignored")
195 output.write("\n")
196
198 """
199 FIXME
200 @param bagit_filename:
201 @type bagit_filename:
202 @param algorithm=None:
203 @type algorithm=None:
204 """
205 logger.info("Checking files against '%s' bagit manifest" % bagit_filename)
206 if algorithm == None:
207 if hasattr(bagit_filename, 'read'):
208 raise Exception("Need to supply the algorithm when passing a filelike object instead of a filename")
209 m = re.search("manifest-(?P<alg>[^\.]+)\.txt", bagit_filename)
210 if m != None:
211 algorithm = m.groupdict()['alg']
212 parser = BagitParser(bagit_filename)
213 scanner = CheckmScanner()
214 results = {'pass':[], 'fail':{}}
215 for row in parser:
216 if row:
217 try:
218 scan_row = scanner.scan_path(row[1], algorithm, 3)
219 if row[0] != scan_row[2]:
220 logger.info("Failed original: %s" % row)
221 logger.info("Current scan: %s" % scan_row)
222 results['fail'][row[1]] = (row, scan_row)
223 else:
224 results['pass'].append(row[1])
225 except NotFound:
226 scan_row = "File not found"
227 logger.info("Failed original: %s" % row)
228 logger.info("But file not found at this path.")
229 results['fail'][row[1]] = (row, scan_row)
230 return results
231
232 - def check_checkm_hashes(self, scan_directory, checkm_filename, ignore_multilevel=True, columns=None):
233 """
234 FIXME
235 @param scan_directory:
236 @type scan_directory:
237 @param checkm_filename:
238 @type checkm_filename:
239 """
240 def _check_files_against_parser(parser, columns=None):
241 scanner = CheckmScanner()
242 results = {'pass':[], 'fail':{}, 'include':[]}
243 for row in parser:
244 if row:
245 try:
246 if row[0].startswith('@'):
247 row[0] = row[0][1:]
248 results['include'].append(row[0])
249 if not columns:
250 columns = len(row)
251 scan_row = scanner.scan_path(row[0], row[1], columns)
252 nomatch = False
253 for expected, scanned in zip(row, scan_row):
254 if expected != "-" and expected != scanned:
255 nomatch = True
256 if nomatch:
257 logger.info("Failed original: %s" % row)
258 logger.info("Current scan: %s" % scan_row)
259 results['fail'][row[0]] = (row, scan_row)
260 else:
261 results['pass'].append(row[0])
262 except NotFound:
263 scan_row = "File not found"
264 logger.info("Failed original: %s" % row)
265 logger.info("But file not found at this path.")
266 results['fail'][row[0]] = (row, scan_row)
267 return results
268
269 logger.info("Checking files against %s checkm manifest" % checkm_filename)
270 parser = CheckmParser(checkm_filename)
271 results = _check_files_against_parser(parser, columns)
272 if ignore_multilevel:
273 return results
274 else:
275
276 checkm_list = results['include'][:]
277 while checkm_list:
278 checkm_file = checkm_list.pop()
279 parser = CheckmParser(checkm_file)
280 additional_results = _check_files_against_parser(parser, columns)
281
282 results['pass'].extend(additional_results['pass'])
283
284 results['include'].extend(additional_results['include'])
285 checkm_list.extend(additional_results['include'])
286
287 results['fail'].update(additional_results['fail'])
288 return results
289
292 """
293 FIXME
294 @param bagit_file=None:
295 @type bagit_file=None:
296 """
297 self.status = False
298 self.lines = []
299 if bagit_file:
300 self.parse(bagit_file)
301
303 """
304 FIXME
305 """
306 class Bagit_iter:
307 def __init__(self, lines):
308 """
309 FIXME
310 @param lines:
311 @type lines:
312 """
313 self.lines = lines
314 self.last = 0
315 def __iter__(self):
316 """
317 FIXME
318 """
319 return self
320 def next(self):
321 """
322 FIXME
323 """
324 if self.last >= len(self.lines):
325 raise StopIteration
326 elif len(self.lines) == 0:
327 raise StopIteration
328 else:
329 self.last += 1
330 return self.lines[self.last-1]
331 return Bagit_iter(self.lines)
332
333 - def parse(self, fileobj):
334 """
335 FIXME
336 @param fileobj:
337 @type fileobj:
338 """
339 if not hasattr(fileobj, "read"):
340 with codecs.open(fileobj, encoding='utf-8', mode="r") as check_fh:
341 self._parse_lines(check_fh)
342 else:
343 self._parse_lines(fileobj)
344 return self.lines
345
347 """
348 FIXME
349 @param fh:
350 @type fh:
351 """
352 self.lines = []
353 line_buffer = ""
354 def _parse_line(line):
355 """
356 FIXME
357 @param line:
358 @type line:
359 """
360 if not line.startswith('#'):
361 tokens = filter(lambda x: x, re.split("\s+", line, 1))
362 logger.info(tokens)
363 if tokens:
364
365 if tokens[1].startswith("*"):
366 tokens[1] = tokens[1][1:].strip()
367 self.lines.append(tokens)
368 for chunk in fh.read(0x1000):
369 line_buffer = line_buffer + chunk
370 while True:
371 if not line_buffer:
372 break
373 fragments = line_buffer.split('\n',1)
374 if len(fragments) == 1:
375 break
376 _parse_line(fragments[0])
377 line_buffer = fragments[1]
378
381 """
382 FIXME
383 @param checkm_file=None:
384 @type checkm_file=None:
385 """
386 self.status = False
387 self.lines = []
388 if checkm_file:
389 self.parse(checkm_file)
390
392 """
393 FIXME
394 """
395 class Checkm_iter:
396 def __init__(self, lines):
397 """
398 FIXME
399 @param lines:
400 @type lines:
401 """
402 self.lines = lines
403 self.last = 0
404 def __iter__(self):
405 """
406 FIXME
407 """
408 return self
409 def next(self):
410 """
411 FIXME
412 """
413 if self.last >= len(self.lines):
414 raise StopIteration
415 elif len(self.lines) == 0:
416 raise StopIteration
417 else:
418 self.last += 1
419 return self.lines[self.last-1]
420 return Checkm_iter(self.lines)
421
422 - def parse(self, checkm_file):
423 """
424 FIXME
425 @param checkm_file:
426 @type checkm_file:
427 """
428 if not hasattr(checkm_file, "read"):
429 if os.path.isfile(checkm_file):
430 with codecs.open(checkm_file, encoding='utf-8', mode="r") as check_fh:
431 self._parse_lines(check_fh)
432 else:
433 raise NotFound(checkm_file=checkm_file)
434 else:
435 self._parse_lines(checkm_file)
436 return self.lines
437
439 """
440 FIXME
441 @param fh:
442 @type fh:
443 """
444 self.lines = []
445 line_buffer = ""
446 def _parse_line(line):
447 """
448 FIXME
449 @param line:
450 @type line:
451 """
452 if not line.startswith('#'):
453 tokens = filter(lambda x: x, re.split("\s+", line, 5))
454 logger.info(tokens)
455 if tokens:
456 self.lines.append(tokens)
457
458 for chunk in fh.read(0x1000):
459 line_buffer = line_buffer + chunk
460 while True:
461 if not line_buffer:
462 break
463 fragments = line_buffer.split('\n',1)
464 if len(fragments) == 1:
465 break
466 _parse_line(fragments[0])
467 line_buffer = fragments[1]
468
470 HASHTYPES = ['md5', 'sha1', 'sha224','sha256','sha384','sha512']
471 - def scan_local(self, directory_path, algorithm, columns=3):
472 """
473 FIXME
474 @param directory_path:
475 @type directory_path:
476 @param algorithm:
477 @type algorithm:
478 @param columns=3:
479 @type columns=3:
480 """
481 report = []
482 for item in os.listdir(directory_path):
483 item_path = os.path.join(directory_path, item)
484 report.append(self.scan_path(item_path, algorithm, columns))
485 return report
486
487 - def scan_tree(self, directory_path, algorithm, columns):
488 """
489 FIXME
490 @param directory_path:
491 @type directory_path:
492 @param algorithm:
493 @type algorithm:
494 @param columns:
495 @type columns:
496 """
497 report = []
498 if os.path.exists(directory_path):
499 for (dirpath, dirnames, filenames) in os.walk(directory_path):
500 for item_path in [os.path.join(dirpath, x) for x in dirnames+filenames]:
501 report.append(self.scan_path(item_path, algorithm, columns))
502 return report
503 else:
504 raise NotFound(directory_path=directory_path, recursive=True)
505
506 - def scan_path(self, item_path, algorithm, columns):
507 """
508 FIXME
509 @param item_path:
510 @type item_path:
511 @param algorithm:
512 @type algorithm:
513 @param columns:
514 @type columns:
515 """
516 if columns<3 or not isinstance(columns, int):
517 columns = 3
518 try:
519 line = []
520
521 line.append(unicode(item_path))
522
523 line.append(unicode(algorithm))
524
525 if os.path.isdir(item_path):
526 line.append(u'd')
527 else:
528
529 hash_gen = getattr(hashlib, algorithm)()
530 with open(item_path, 'rb') as fh:
531 logger.info("Checking %s with algorithm %s" % (item_path, algorithm))
532 chunk = fh.read(1024*8)
533 while chunk:
534 hash_gen.update(chunk)
535 chunk= fh.read(1024*8)
536 line.append(unicode(hash_gen.hexdigest()))
537 if columns>3:
538
539 line.append(unicode(os.stat(item_path)[ST_SIZE]))
540 if columns>4:
541
542 line.append(unicode(os.stat(item_path)[ST_MTIME]))
543 return line
544 except OSError:
545 logger.info("item exists? %s" % os.path.exists(item_path))
546 raise NotFound(item_path=item_path)
547 except IOError:
548 logger.info("item exists? %s" % os.path.exists(item_path))
549 raise NotFound(item_path=item_path)
550 except AttributeError:
551 raise ValueError("This tool cannot perform hashtype %s" % algorithm)
552
553 - def scan_directory(self, directory_path, algorithm, recursive=False, columns=3):
554 """
555 FIXME
556 @param directory_path:
557 @type directory_path:
558 @param algorithm:
559 @type algorithm:
560 @param recursive=False:
561 @type recursive=False:
562 @param columns=3:
563 @type columns=3:
564 """
565 if os.path.exists(directory_path):
566 if recursive:
567 return self.scan_tree(directory_path, algorithm, columns)
568 return self.scan_local(directory_path, algorithm, columns)
569 else:
570 raise NotFound(directory_path=directory_path, recursive=recursive)
571