john pfeiffer
  • Home
  • Categories
  • Tags
  • Archives

random randint subset of large set

from random import randint

# freq = [ 0, 0, 0, 0, 0 ]
# for i in xrange( 1000 ):
#   index = randint( 0, 4 )
#   freq[  index ] += 1
# print repr( freq )

# rand subset
MAX = 5
SUBSET_MAX = MAX
a = range( MAX )
b = list()
print "original", repr( a )
for i in xrange( MAX ):
    r_index = randint( 0, len(a) -1 )
    print "index=", str( r_index )
    "value=", str( a[ r_index ] )
    b.append ( a[ r_index ] )
    a.remove( a[r_index ] )
    print repr( a ), repr( b )



# rand subset large max
from math import ceil
MAX = 8  #very large
PARTITION_MAX = 3
PARTITION_COUNT = MAX / PARTITION_MAX
if MAX % PARTITION_MAX:
  PARTITION_COUNT += 1
SUBSET_MAX = MAX - 1

print "rand subset in large range", MAX
print "partition max", PARTITION_MAX, "creates", PARTITION_COUNT
a = range( MAX )
b = list()
print "original", repr( a )
for i in xrange( MAX ):
  r_index = randint( 0, len( a )-1 )
  print "index=",str( r_index ), "value=",str( a[ r_index ] )
  b.append( a[ r_index ] )
  a.remove( a[ r_index] )
  print repr( a ), repr( b )



# rand subset partitions

def fill_partition( source , partition, count ):
  for i in xrange( count ):
    index = randint( 0, len( source )-1 )
    value = source[ index ]
    print "index=",str( index ), "value=",str( value )
    partition.append( value )
    source.remove( value )
  return source, partition


MAX = 8  #very large
PARTITION_MAX = 3
PARTITION_COUNT = MAX / PARTITION_MAX
if MAX % PARTITION_MAX:
  PARTITION_COUNT += 1
SUBSET_MAX = MAX - 1

print "rand subset in large range", MAX
print "partition max", PARTITION_MAX, "creates", PARTITION_COUNT
a = range( MAX )
partitions = list()
print "original", repr( a )
for i in xrange( PARTITION_COUNT ):
  if PARTITION_MAX < len( a ):
    count = PARTITION_MAX
  else:
    count = len( a )
  a, partition = fill_partition( a , list(), count )
  print repr( a ), repr( partition )
  partitions.append( partition )

print "done", repr( partitions )




 # rand subset very large range
# memory management, pointers

def get_chunk_from_source( source, count ):
  chunk = list()
  for i in xrange( count ):
    source_max_index = len( source ) -1
    index = randint( 0, source_max_index )
    value = source[ index ]
    chunk.append( value )
    source.remove( value )
  return chunk

def get_chunk( start, end, count ):
  print str(count), "from", str(start), "to", end
  a = range( start, end )
  b = get_chunk_from_source( a, count )
  return b

def get_sources_ordering( ):
  SOURCE_COUNT = RANGE_MAX / MEMORY_CONSTRAINT
  if RANGE_MAX % MEMORY_CONSTRAINT:
    SOURCE_COUNT += 1
  sources = list()
  start = 0
  end = start + MEMORY_CONSTRAINT
  sources.append( ( start, end ) )
  while len( sources ) < SOURCE_COUNT:
    start += MEMORY_CONSTRAINT
    end += MEMORY_CONSTRAINT
    sources.append( ( start, end ) )
  return sources

RANGE_MAX = 1000
MEMORY_CONSTRAINT = 200
sources = get_sources_ordering( )

required = 8
print "rand subset", required,"in large range", RANGE_MAX
result = list()

while required > 0:
  count = randint( 0, required )
  start, end = sources.pop()
  if len( sources ) == 0:
    count = required

  b = get_chunk( start, end, count )
  required -= len( b )
  print "got", repr( b ), "still need", str(required)
  result.extend( b )

print "done", repr(result)

  • « google app engine blob store upload and download delete
  • linked list implementation reverse dedupe »

Published

Jul 17, 2013

Category

python

~394 words

Tags

  • large 2
  • of 13
  • python 180
  • randint 1
  • random 4
  • set 5
  • subset 1