import os
import sys
import argparse
import time
import logging
import tempfile
from boto.s3.connection import S3Connection
from boto.s3.connection import S3ResponseError
class BucketLogParser( object ):
def __init__( self, access_key, secret_key, bucket_name, prefix ):
self.access_key = access_key
self.secret_key = secret_key
self.bucket_name = bucket_name
self.prefix = prefix
self.connection = None
self.bucket = None
def connect( self ):
""" http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.connection
http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.bucket
"""
self.connection = S3Connection( self.access_key , self.secret_key, is_secure=True )
self.bucket = self.connection.get_bucket( self.bucket_name )
def get_bucket_listing( self ):
result = list()
listing_generator = self.bucket.list( prefix=self.prefix ) # generator of keys, boto.s3.bucketlistresultset
for item in listing_generator:
logging.info( item.name )
result.append( item.name ) # http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.key
return result
def download_listing( self, key_names, target_dir=None ):
target_dir = target_dir if target_dir else tempfile.gettempdir()
for name in key_names:
local_file_path = os.path.join( target_dir, name )
logging.info( 'downloading: {}'.format( local_file_path ) )
with open( local_file_path, 'w' ) as f:
key = self.bucket.get_key( name )
key.get_contents_to_file( f )
if __name__ == '__main__':
try:
parser = argparse.ArgumentParser()
parser.add_argument( '-L', '--log', help='specify log file location' )
parser.add_argument( '-A', '--access', help='Access Key', required=True )
parser.add_argument( '-K', '--secret', help='Secret Key', required=True )
parser.add_argument( '-B', '--bucketname', help='Bucket Name', required=True )
parser.add_argument( '-P', '--prefix', help='prefix filtered listing' )
parser.add_argument( '-T', '--localdirectory', help='local storage for files' )
args = parser.parse_args()
logging_output = args.log if args.log else os.path.join( os.path.normpath( '/tmp' ) , sys.argv[0] + '.log' )
logging_level = logging.INFO
logging.basicConfig(
name = 'BucketLogParser' ,
level = logging_level ,
format = '%(asctime)s %(levelname)s %(message)s',
filename = logging_output,
filemode = 'a' )
parser = argparse.ArgumentParser()
b = BucketLogParser( args.access, args.secret, args.bucketname, args.prefix )
logging.info( 'connecting...' )
b.connect()
#start = time.time()
#buckets = b.connection.get_all_buckets()
#print len( buckets ), "buckets:"
#for bucket in buckets:
# print bucket.name
#print 'bucket listing in {} seconds'.format( time.time() - start )
start = time.time()
items = b.get_bucket_listing()
print len( items ), 'items found'
logging.info( '{} items found'.format( len( items ) ) )
print 'bucket listing in {} seconds'.format( time.time() - start )
start = time.time()
b.download_listing( items, args.localdirectory )
print 'bucket listing downloaded in {} seconds'.format( time.time() - start )
except KeyboardInterrupt:
pass
except S3ResponseError as error:
print error
logging.error( error )
print 'done'
logging.info( 'done' )