john pfeiffer
  • Home
  • Categories
  • Tags
  • Archives

s3 boto list keys download to file v2 generate url

import os
import sys
import argparse
import time
import logging
import tempfile

from boto.s3.connection import S3Connection
from boto.s3.connection import S3ResponseError
import boto


class BucketUtil( object ):

    def __init__( self, access_key, secret_key, ssl, prefix, host='s3.amazonaws.com', port=None ):
        self.access_key = access_key
        self.secret_key = secret_key
        self.prefix = prefix
        self.host = host
        self.port = port
        self.ssl = ssl

        self.connection = None
        self.bucket = None
        self.bucket_listing_generator = None



    def connect( self ):
        """ http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.connection
            http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.bucket
        """
        self.connection = S3Connection( self.access_key , self.secret_key,
                                        is_secure=ssl, port=self.port, host=self.host,
                                        calling_format = boto.s3.connection.OrdinaryCallingFormat() )


    def list_buckets( self ):
        buckets = self.connection.get_all_buckets()
        return buckets


    # TODO: not memory efficient for large lists
    def get_bucket_listing( self ):
        result = list()
        if self.bucket:
            listing_generator = self.bucket.list( prefix=self.prefix )     # generator of keys, boto.s3.bucketlistresultset
            for item in listing_generator:
                logging.info( item.name.encode('utf-8') )
                result.append( item.name )            # http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.key
        return result


    def download_listing( self, key_names, target_dir=None ):
        target_dir = target_dir if target_dir else tempfile.gettempdir()
        for name in key_names:
            local_file_path = os.path.join( target_dir, name )
            logging.info( 'downloading: {}'.format( local_file_path ) )
            with open( local_file_path, 'w' ) as f:
                key = self.bucket.get_key( name )
                key.get_contents_to_file( f )


if __name__ == '__main__':
    try:
        parser = argparse.ArgumentParser()
        parser.add_argument( '-L', '--log', help='specify log file location', required=True )
        parser.add_argument( '-A', '--access', help='Access Key', required=True )
        parser.add_argument( '-K', '--secret', help='Secret Key', required=True )
        parser.add_argument( '-B', '--bucketname', help='Bucket Name' )
        parser.add_argument( '-P', '--prefix', help='prefix filtered listing' )
        parser.add_argument( '-T', '--localdirectory', help='local storage for files' )
        parser.add_argument( '-H', '--host', help='target host (e.g. not amazon s3)' )
        parser.add_argument( '--port', help='port number' )
        parser.add_argument( '--disablessl', help='disable ssl', action="store_true" )
        parser.add_argument( '--downloadurl', help='generate a url for the parameter provided key (path and file)' )


        args = parser.parse_args()

        logging_output = args.log
        logging_level = logging.INFO
        logging.basicConfig(
            name = 'BucketLogParser' ,
            level = logging_level ,
            format = '%(asctime)s %(levelname)s %(message)s',
            filename = logging_output,
            filemode = 'a' )

        parser = argparse.ArgumentParser()
        ssl = False if args.disablessl else True
        b = BucketUtil( args.access, args.secret, ssl, args.prefix, args.host, int( args.port ) )
        logging.info( 'connecting...' )
        b.connect()

        start = time.time()
        buckets = b.list_buckets()
        print len( buckets ), "buckets:"
        for bucket in buckets:
            print bucket.name
            if args.bucketname and args.bucketname == bucket.name:
                b.bucket = bucket
        print 'buckets list in {} seconds'.format( time.time() - start )

        items = list()
        if b.bucket:
            start = time.time()
            items = b.get_bucket_listing()
            print len( items ), 'items found'
            logging.info( '{} items found'.format( len( items ) ) )
            print 'bucket listing in {} seconds'.format( time.time() - start )

        for item in items:
            name = item.encode('utf-8') # http://docs.python.org/2/howto/unicode.html#the-unicode-type
            print '{}'.format( name )

        if args.downloadurl:
            file_key = b.bucket.get_key( args.downloadurl )
            file_url = file_key.generate_url( 3600, force_http=args.disablessl )  # expiration in seconds
            print file_url

        # start = time.time()
        # b.download_listing( items, args.localdirectory )
        # print 'bucket listing downloaded in {} seconds'.format( time.time() - start )

    except KeyboardInterrupt:
        pass

    except S3ResponseError as error:
        print error
        logging.error( error )

    print 'done'
    logging.info( 'done' )

  • « Function array string split complex ipaddress example
  • Amazon s3 bucket python pypi repo pip install s3cmd »

Published

Jan 15, 2014

Category

python

~358 words

Tags

  • boto 6
  • download 12
  • file 92
  • generate 2
  • keys 6
  • list 23
  • python 180
  • s3 17
  • to 63
  • url 14
  • v2 7