john pfeiffer
  • Home
  • Categories
  • Tags
  • Archives

s3 boto list keys download to file

import os
import sys
import argparse
import time
import logging
import tempfile

from boto.s3.connection import S3Connection
from boto.s3.connection import S3ResponseError


class BucketLogParser( object ):

    def __init__( self, access_key, secret_key, bucket_name, prefix ):
        self.access_key = access_key
        self.secret_key = secret_key
        self.bucket_name = bucket_name
        self.prefix = prefix

        self.connection = None
        self.bucket = None



    def connect( self ):
        """ http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.connection
            http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.bucket
        """
        self.connection = S3Connection( self.access_key , self.secret_key, is_secure=True )
        self.bucket = self.connection.get_bucket( self.bucket_name )


    def get_bucket_listing( self ):
        result = list()
        listing_generator = self.bucket.list( prefix=self.prefix )     # generator of keys, boto.s3.bucketlistresultset
        for item in listing_generator:
            logging.info( item.name )
            result.append( item.name )            # http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.key
        return result


    def download_listing( self, key_names, target_dir=None ):
        target_dir = target_dir if target_dir else tempfile.gettempdir()
        for name in key_names:
            local_file_path = os.path.join( target_dir, name )
            logging.info( 'downloading: {}'.format( local_file_path ) )
            with open( local_file_path, 'w' ) as f:
                key = self.bucket.get_key( name )
                key.get_contents_to_file( f )



if __name__ == '__main__':
    try:
        parser = argparse.ArgumentParser()
        parser.add_argument( '-L', '--log', help='specify log file location' )
        parser.add_argument( '-A', '--access', help='Access Key', required=True )
        parser.add_argument( '-K', '--secret', help='Secret Key', required=True )
        parser.add_argument( '-B', '--bucketname', help='Bucket Name', required=True )
        parser.add_argument( '-P', '--prefix', help='prefix filtered listing' )
        parser.add_argument( '-T', '--localdirectory', help='local storage for files' )
        args = parser.parse_args()

        logging_output = args.log if args.log else os.path.join( os.path.normpath( '/tmp' ) , sys.argv[0] + '.log' )
        logging_level = logging.INFO
        logging.basicConfig(
            name = 'BucketLogParser' ,
            level = logging_level ,
            format = '%(asctime)s %(levelname)s %(message)s',
            filename = logging_output,
            filemode = 'a' )

        parser = argparse.ArgumentParser()
        b = BucketLogParser( args.access, args.secret, args.bucketname, args.prefix )
        logging.info( 'connecting...' )
        b.connect()

        #start = time.time()
        #buckets = b.connection.get_all_buckets()
        #print len( buckets ), "buckets:"
        #for bucket in buckets:
        #    print bucket.name
        #print 'bucket listing in {} seconds'.format( time.time() - start )


        start = time.time()
        items = b.get_bucket_listing()
        print len( items ), 'items found'
        logging.info( '{} items found'.format( len( items ) ) )
        print 'bucket listing in {} seconds'.format( time.time() - start )


        start = time.time()
        b.download_listing( items, args.localdirectory )
        print 'bucket listing downloaded in {} seconds'.format( time.time() - start )



    except KeyboardInterrupt:
        pass

    except S3ResponseError as error:
        print error
        logging.error( error )

    print 'done'
    logging.info( 'done' )

  • « Services disable startup debian ubuntu rcconf dialog init
  • cli php q function print html line »

Published

Nov 6, 2013

Category

python

~264 words

Tags

  • boto 6
  • download 12
  • file 92
  • keys 6
  • list 23
  • python 180
  • s3 17
  • to 63