Commit 46b446f0 by Eric Coissac

Add capacity to filter on merged sequences

parent 6208cbee
......@@ -23,8 +23,11 @@ def minimum(seqs):
return min(s['select'] for s in seqs)
def maximum(seqs):
try:
return max(s['select'] for s in seqs)
except TypeError, e:
print >>sys.stderr, seqs
raise e
def mean(seqs):
ss= reduce(lambda x,y: x + y,(s['select'] for s in seqs),0)
return float(ss) / len(seqs)
......@@ -101,6 +104,13 @@ def addSelectOptions(optionManager):
default=[],
help="attributes to merge within each group")
group.add_option('-s','--sample',
action="store", dest="sample",
metavar="<TAGNAME>",
type="str",
default=None,
help="Tag containing sample descriptions, the default value is set to *merged_sample*")
group.add_option('--merge-ids',
action="store_true", dest="mergeids",
default=False,
......@@ -130,15 +140,21 @@ if __name__ == '__main__':
print >>sys.stderr,"\nLoading sequences...\n"
with_taxonomy=hasattr(options, 'taxonomy') and options.taxonomy is not None
nbseq=0
for s in entries:
nbseq+=1
category = []
for c in options.categories:
try:
if hasattr(options, 'taxonomy') and options.taxonomy is not None:
if with_taxonomy:
environ = {'taxonomy' : options.taxonomy,'sequence':s,'random':random()}
else:
environ = {'sequence':s,'random':random()}
for c in options.categories:
try:
v = eval(c,environ,s)
category.append(v)
except:
......@@ -149,11 +165,6 @@ if __name__ == '__main__':
group.append(s)
classes[category]= group
if hasattr(options, 'taxonomy') and options.taxonomy is not None:
environ = {'taxonomy' : options.taxonomy,'sequence':s,'random':random()}
else:
environ = {'sequence':s, 'random':random()}
try:
select = eval(options.function,environ,s)
s['select']=select
......@@ -181,11 +192,23 @@ if __name__ == '__main__':
i+=1
progressBar(i,lclasses,False,"%15s" % ("/".join(map(str,c)),))
seqs = classes[c]
if options.sample is not None:
subsets = {}
for s in seqs:
for sid in s[options.sample]:
ss = subsets.get(sid,[])
ss.append(s)
subsets[sid]=ss
else:
subsets={"all":seqs}
for seqs in subsets.values():
sortclass(seqs, options)
if len(c)==1:
c=c[0]
if options.number==1:
if options.number==1 and options.sample is None:
s = seqs[0]
for key in mergedKey:
......@@ -227,19 +250,6 @@ if __name__ == '__main__':
allmkey = set(m.keys()) | set(s[mkey].keys())
s[mkey] = dict((k,m.get(k,0)+s[mkey].get(k,0)) for k in allmkey)
# if mkey in seq:
# for skey in seq[mkey]:
# if skey in s:
# s[mkey][skey]=s[mkey].get(seq[skey],0)+seq[mkey][skey]
# else:
# s[mkey][skey]=seq[mkey][skey]
#for key in seq.iterkeys():
# # Merger proprement l'attribut merged s'il exist
# if key in s and s[key]!=seq[key] and key!='count' and key[0:7]!='merged_' and key!='merged' and key!='select':
# del(s[key])
if mergeIds:
s['merged'].append(seq.id)
......@@ -249,6 +259,19 @@ if __name__ == '__main__':
for s in seqs[0:options.number]:
s['class']=c
s['__@TOWRITE@__']=True
print >>sys.stderr,"\Writing sequences...\n"
progressBar(1,nbseq,True,'Writing')
i=0
for c in classes:
seqs = classes[c]
for s in seqs:
i+=1
progressBar(i,nbseq,False,"Writing")
if '__@TOWRITE@__' in s:
del s['__@TOWRITE@__']
del s['select']
writer(s)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment