import os
import sys
import argparse
import time
import logging
import tempfile
from boto.s3.connection import S3Connection
from boto.s3.connection import S3ResponseError
import boto
class BucketUtil( object ):
def __init__( self, access_key, secret_key, ssl, prefix, host='s3.amazonaws.com', port=None ):
self.access_key = access_key
self.secret_key = secret_key
self.prefix = prefix
self.host = host
self.port = port
self.ssl = ssl
self.connection = None
self.bucket = None
self.bucket_listing_generator = None
def connect( self ):
""" http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.connection
http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.bucket
"""
self.connection = S3Connection( self.access_key , self.secret_key,
is_secure=ssl, port=self.port, host=self.host,
calling_format = boto.s3.connection.OrdinaryCallingFormat() )
def list_buckets( self ):
buckets = self.connection.get_all_buckets()
return buckets
# TODO: not memory efficient for large lists
def get_bucket_listing( self ):
result = list()
if self.bucket:
listing_generator = self.bucket.list( prefix=self.prefix ) # generator of keys, boto.s3.bucketlistresultset
for item in listing_generator:
logging.info( item.name.encode('utf-8') )
result.append( item.name ) # http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.key
return result
def download_listing( self, key_names, target_dir=None ):
target_dir = target_dir if target_dir else tempfile.gettempdir()
for name in key_names:
local_file_path = os.path.join( target_dir, name )
logging.info( 'downloading: {}'.format( local_file_path ) )
with open( local_file_path, 'w' ) as f:
key = self.bucket.get_key( name )
key.get_contents_to_file( f )
if __name__ == '__main__':
try:
parser = argparse.ArgumentParser()
parser.add_argument( '-L', '--log', help='specify log file location', required=True )
parser.add_argument( '-A', '--access', help='Access Key', required=True )
parser.add_argument( '-K', '--secret', help='Secret Key', required=True )
parser.add_argument( '-B', '--bucketname', help='Bucket Name' )
parser.add_argument( '-P', '--prefix', help='prefix filtered listing' )
parser.add_argument( '-T', '--localdirectory', help='local storage for files' )
parser.add_argument( '-H', '--host', help='target host (e.g. not amazon s3)' )
parser.add_argument( '--port', help='port number' )
parser.add_argument( '--disablessl', help='disable ssl', action="store_true" )
parser.add_argument( '--downloadurl', help='generate a url for the parameter provided key (path and file)' )
args = parser.parse_args()
logging_output = args.log
logging_level = logging.INFO
logging.basicConfig(
name = 'BucketLogParser' ,
level = logging_level ,
format = '%(asctime)s %(levelname)s %(message)s',
filename = logging_output,
filemode = 'a' )
parser = argparse.ArgumentParser()
ssl = False if args.disablessl else True
b = BucketUtil( args.access, args.secret, ssl, args.prefix, args.host, int( args.port ) )
logging.info( 'connecting...' )
b.connect()
start = time.time()
buckets = b.list_buckets()
print len( buckets ), "buckets:"
for bucket in buckets:
print bucket.name
if args.bucketname and args.bucketname == bucket.name:
b.bucket = bucket
print 'buckets list in {} seconds'.format( time.time() - start )
items = list()
if b.bucket:
start = time.time()
items = b.get_bucket_listing()
print len( items ), 'items found'
logging.info( '{} items found'.format( len( items ) ) )
print 'bucket listing in {} seconds'.format( time.time() - start )
for item in items:
name = item.encode('utf-8') # http://docs.python.org/2/howto/unicode.html#the-unicode-type
print '{}'.format( name )
if args.downloadurl:
file_key = b.bucket.get_key( args.downloadurl )
file_url = file_key.generate_url( 3600, force_http=args.disablessl ) # expiration in seconds
print file_url
# start = time.time()
# b.download_listing( items, args.localdirectory )
# print 'bucket listing downloaded in {} seconds'.format( time.time() - start )
except KeyboardInterrupt:
pass
except S3ResponseError as error:
print error
logging.error( error )
print 'done'
logging.info( 'done' )