1
3 uniques={}
4 uniqSeq=[]
5
6 for seq in seqIterator:
7 s = str(seq)
8 if s in uniques:
9 s = uniques[s]
10 if 'count' in seq:
11 s['count']+=seq['count']
12 else:
13 s['count']+=1
14 if taxonomy is not None and 'taxid' in seq:
15 s['merged_taxid'].add(seq['taxid'])
16 s['merged'].append(seq.id)
17 else:
18 uniques[s]=seq
19 if 'count' not in seq:
20 seq['count']=1
21 if taxonomy is not None:
22 seq['merged_taxid']=set([])
23 if 'taxid' in seq:
24 seq['merged_taxid'].add(seq['taxid'])
25 seq['merged']=[seq.id]
26 uniqSeq.append(seq)
27
28 if taxonomy is not None:
29 for seq in uniqSeq:
30 if seq['merged_taxid']:
31 seq['taxid']=taxonomy.lastCommonTaxon(*list(seq['merged_taxid']))
32 tsp = taxonomy.getSpecies(seq['taxid'])
33 tgn = taxonomy.getGenus(seq['taxid'])
34 tfa = taxonomy.getFamily(seq['taxid'])
35
36 if tsp is not None:
37 sp_sn = taxonomy.getScientificName(tsp)
38 else:
39 sp_sn="###"
40 tsp=-1
41
42 if tgn is not None:
43 gn_sn = taxonomy.getScientificName(tgn)
44 else:
45 gn_sn="###"
46 tgn=-1
47
48 if tfa is not None:
49 fa_sn = taxonomy.getScientificName(tfa)
50 else:
51 fa_sn="###"
52 tfa=-1
53
54 seq['species']=tsp
55 seq['genus']=tgn
56 seq['family']=tfa
57
58 seq['species_sn']=sp_sn
59 seq['genus_sn']=gn_sn
60 seq['family_sn']=fa_sn
61
62 seq['rank']=taxonomy.getRank(seq['taxid'])
63 seq['scientific_name']=fa_sn = taxonomy.getScientificName(seq['taxid'])
64
65
66
67
68 return uniqSeq
69
71 def compare(x,y):
72 try:
73 c1 = x[key]
74 except KeyError:
75 c1=None
76
77 try:
78 c2 = y[key]
79 except KeyError:
80 c2=None
81
82 if reverse:
83 s=c1
84 c1=c2
85 c2=s
86 return cmp(c1,c2)
87
88 return compare
89
94