sample.py 1.78 KB
Newer Older
1 2 3 4 5
'''
Created on 31 oct. 2009

@author: coissac
'''
6
from random import randrange, sample
7 8 9 10 11
try:
    from collections import Counter
except ImportError:
    from obitools.collections import Counter

12 13 14 15

def lookfor(x,cumsum):
    lmax=len(cumsum)
    lmin=0
16 17 18 19
    
    assert x < cumsum[-1],"x must be smaller then cumulative sum"
    
    while((lmax - lmin) > 0):
20 21 22 23 24 25 26

        i=(lmax+lmin)/2
        #print i,lmin,lmax
        if (x<cumsum[i] and (i==0 or x>cumsum[i-1])):
            #print "return 1 :",i,cumsum[i-1],"<",x,"<",cumsum[i]
            return i
        elif cumsum[i]==x:
27 28
            while cumsum[i]==x:
                i+=1
29
            #print "return 2 :",i,cumsum[i],"<",x,"<",cumsum[i+1]
30
            return i
31 32 33 34
        elif x<cumsum[i]:
            lmax=i
        else:
            lmin=i
35 36
            
    raise AssertionError
37 38 39 40
    #print "end return :",i,cumsum[i-1],"<",x,"<",cumsum[i]
    return i

def weigthedSample(events,size):
41 42
    entries = events.keys()
    cumul=[0] * len(entries)
43
    s=0
44
    i=0
45 46
    for e in entries:
        s+=events[e]
47 48
        cumul[i]=s
        i+=1
49
    
50
    c = [randrange(0,s) for x in xrange(size)]
51
    c.sort()
52
    
53 54 55 56 57 58 59 60
    i = 0
    for j in xrange(len(c)):
        v = c[j]
        while (v > cumul[i]):
            i+=1
        c[j]=entries[i]
        
    result=Counter(c)
61

62 63 64
    return result

def weigthedSampleWithoutReplacement(events,size):
Eric Coissac committed
65 66 67
    # entries = [k for k in events.iterkeys() if events[k]>0]
    entries = events.keys()
    cumul=[0] * len(entries)
68
    s=0
Eric Coissac committed
69
    i=0
70 71
    for e in entries:
        s+=events[e]
Eric Coissac committed
72 73
        cumul[i]=s
        i+=1
74
    
75
    c = sample(xrange(s),size)
Eric Coissac committed
76
    c.sort()
77
    
Eric Coissac committed
78 79 80 81 82 83 84 85
    i = 0
    for j in xrange(len(c)):
        v = c[j]
        while (v > cumul[i]):
            i+=1
        c[j]=entries[i]
        
    result=Counter(c)
86

87
    return result