Package obitools :: Package utils :: Module bioseq
[hide private]
[frames] | no frames]

Source Code for Module obitools.utils.bioseq

 1   
2 -def uniqSequence(seqIterator,taxonomy=None):
3 uniques={} 4 uniqSeq=[] 5 6 for seq in seqIterator: 7 s = str(seq) 8 if s in uniques: 9 s = uniques[s] 10 if 'count' in seq: 11 s['count']+=seq['count'] 12 else: 13 s['count']+=1 14 if taxonomy is not None and 'taxid' in seq: 15 s['merged_taxid'].add(seq['taxid']) 16 s['merged'].append(seq.id) 17 else: 18 uniques[s]=seq 19 if 'count' not in seq: 20 seq['count']=1 21 if taxonomy is not None: 22 seq['merged_taxid']=set([]) 23 if 'taxid' in seq: 24 seq['merged_taxid'].add(seq['taxid']) 25 seq['merged']=[seq.id] 26 uniqSeq.append(seq) 27 28 if taxonomy is not None: 29 for seq in uniqSeq: 30 if seq['merged_taxid']: 31 seq['taxid']=taxonomy.lastCommonTaxon(*list(seq['merged_taxid'])) 32 tsp = taxonomy.getSpecies(seq['taxid']) 33 tgn = taxonomy.getGenus(seq['taxid']) 34 tfa = taxonomy.getFamily(seq['taxid']) 35 36 if tsp is not None: 37 sp_sn = taxonomy.getScientificName(tsp) 38 else: 39 sp_sn="###" 40 tsp=-1 41 42 if tgn is not None: 43 gn_sn = taxonomy.getScientificName(tgn) 44 else: 45 gn_sn="###" 46 tgn=-1 47 48 if tfa is not None: 49 fa_sn = taxonomy.getScientificName(tfa) 50 else: 51 fa_sn="###" 52 tfa=-1 53 54 seq['species']=tsp 55 seq['genus']=tgn 56 seq['family']=tfa 57 58 seq['species_sn']=sp_sn 59 seq['genus_sn']=gn_sn 60 seq['family_sn']=fa_sn 61 62 seq['rank']=taxonomy.getRank(seq['taxid']) 63 seq['scientific_name']=fa_sn = taxonomy.getScientificName(seq['taxid']) 64 65 66 67 68 return uniqSeq 69
70 -def _cmpOnKeyGenerator(key,reverse=False):
71 def compare(x,y): 72 try: 73 c1 = x[key] 74 except KeyError: 75 c1=None 76 77 try: 78 c2 = y[key] 79 except KeyError: 80 c2=None 81 82 if reverse: 83 s=c1 84 c1=c2 85 c2=s 86 return cmp(c1,c2)
87 88 return compare 89
90 -def sortSequence(seqIterator,key,reverse=False):
91 seqs = list(seqIterator) 92 seqs.sort(_cmpOnKeyGenerator(key, reverse)) 93 return seqs
94