Commit 3faf2c79 by Celine Mercier

updated ecoFindTaxids to read SILVA databases.

parent 14bbcd66
......@@ -14,6 +14,14 @@ from obitools.format.genericparser import genericEntryIteratorGenerator
from obitools import NucSequence
def numberInStr(s) :
containsNumber = False
for c in s :
if c.isdigit() :
containsNumber = True
return containsNumber
def UNITEIterator(f):
fastaEntryIterator = genericEntryIteratorGenerator(startEntry='>')
......@@ -33,7 +41,7 @@ def UNITEIterator(f):
yield s
def SILVAIterator(f):
def SILVAIterator(f, tax):
fastaEntryIterator = genericEntryIteratorGenerator(startEntry='>')
for entry in fastaEntryIterator(f) :
......@@ -44,17 +52,44 @@ def SILVAIterator(f):
seq = all[1]
s = NucSequence(id, seq)
if '(' in fields[1] :
species_name = ''
found = False
for word in fields[1].split(' ') :
if word == '(' :
found = True
if found == False :
species_name = species_name + ' ' + word
if (
'(' in fields[1]
and len(fields[1].split('(')[1][:-1]) > 2
and ')' not in fields[1].split('(')[1][:-1]
and not numberInStr(fields[1].split('(')[1][:-1])
) :
species_name = fields[1].split('(')[0][:-1]
other_name = fields[1].split('(')[1][:-1]
ancestor = None
notAnAncestor = False
if (len(other_name.split(' ')) == 1 and other_name[0].isupper()):
try:
ancestor = tax.findTaxonByName(other_name)
except KeyError :
notAnAncestor = True
if (ancestor == None and notAnAncestor == False):
s['common_name'] = other_name
s['original_silva_name'] = fields[1]
s['species_name'] = species_name
elif (ancestor != None and notAnAncestor == False) :
s['ancestor_name'] = other_name
s['ancestor'] = ancestor[0]
s['original_silva_name'] = fields[1]
s['species_name'] = species_name
elif notAnAncestor == True :
s['species_name'] = fields[1]
#print formatFasta(s)
else :
species_name = fields[1]
s['species_name'] = species_name
s['species_name'] = fields[1]
yield s
......@@ -264,13 +299,16 @@ if __name__=='__main__':
if options.db_type == 'raw' :
entryIterator=fastaIterator
entries = entryIterator(entries)
elif options.db_type == 'UNITE' :
entryIterator=UNITEIterator
entries = entryIterator(entries)
elif options.db_type == 'SILVA' :
entryIterator=SILVAIterator
entries = entryIterator(entries, tax)
options.tagname = 'species_name'
entries = entryIterator(entries)
#entries = entryIterator(entries)
openFiles(options)
if (options.db_type == 'raw') or (options.db_type == 'SILVA') :
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment