Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
O
OBITools
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
18
Issues
18
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
OBITools
OBITools
Commits
b9988181
Commit
b9988181
authored
Jun 12, 2015
by
Celine Mercier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Closes
#9
: adds a parser in obiaddtaxids for the UNITE 'general FASTA
release' format
parent
df093b3f
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
36 additions
and
12 deletions
+36
-12
obiaddtaxids.py
src/obiaddtaxids.py
+36
-12
No files found.
src/obiaddtaxids.py
View file @
b9988181
...
...
@@ -68,7 +68,6 @@ Otherwise,
'''
import
sys
import
re
from
obitools.fasta
import
fastaIterator
,
formatFasta
...
...
@@ -111,9 +110,9 @@ def addObiaddtaxidsOptions(optionManager):
metavar
=
"<FORMAT>"
,
type
=
"string"
,
default
=
'raw'
,
help
=
"type of the database with the taxa to be added. Possibilities : 'raw', 'UNITE' or 'SILVA'."
"The UNITE
format must be the one used for the 'Full UNITE+INSD dataset'. Example :
"
"
>UDB016651|k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Thelephorales;f__Thelephoraceae;g__Tomentella;s__Tomentella sp|SH200602.06FU
"
help
=
"type of the database with the taxa to be added. Possibilities : 'raw', 'UNITE
_FULL', 'UNITE_GENERAL
' or 'SILVA'."
"The UNITE
_FULL format is the one used for the 'Full UNITE+INSD dataset', and the UNITE_GENERAL format is the
"
"
one used for the 'General FASTA release'.
"
" Default : raw."
)
optionManager
.
add_option
(
'-k'
,
'--key-name'
,
...
...
@@ -145,7 +144,7 @@ def numberInStr(s) :
return
containsNumber
def
UNITEIterator
(
f
):
def
UNITEIterator
_FULL
(
f
):
fastaEntryIterator
=
genericEntryIteratorGenerator
(
startEntry
=
'>'
)
for
entry
in
fastaEntryIterator
(
f
)
:
...
...
@@ -172,6 +171,28 @@ def UNITEIterator(f):
yield
s
def
UNITEIterator_GENERAL
(
f
):
fastaEntryIterator
=
genericEntryIteratorGenerator
(
startEntry
=
'>'
)
for
entry
in
fastaEntryIterator
(
f
)
:
all
=
entry
.
split
(
'
\n
'
)
header
=
all
[
0
]
fields
=
header
.
split
(
'|'
)
seq_id
=
fields
[
0
][
1
:]
seq
=
all
[
1
]
s
=
NucSequence
(
seq_id
,
seq
)
s
[
'species_name'
]
=
seq_id
.
replace
(
"_"
,
" "
)
path
=
fields
[
4
]
path
=
re
.
sub
(
'[a-z]__'
,
''
,
path
)
path
=
path
.
replace
(
';'
,
','
)
s
[
'path'
]
=
path
.
replace
(
',,'
,
','
)
yield
s
def
SILVAIterator
(
f
,
tax
):
fastaEntryIterator
=
genericEntryIteratorGenerator
(
startEntry
=
'>'
)
...
...
@@ -317,8 +338,10 @@ if __name__=='__main__':
if
options
.
db_type
==
'raw'
:
entryIterator
=
fastaIterator
entries
=
entryIterator
(
entries
)
elif
options
.
db_type
==
'UNITE'
:
entryIterator
=
UNITEIterator
elif
options
.
db_type
==
'UNITE_FULL'
:
entryIterator
=
UNITEIterator_FULL
elif
options
.
db_type
==
'UNITE_GENERAL'
:
entryIterator
=
UNITEIterator_GENERAL
entries
=
entryIterator
(
entries
)
elif
options
.
db_type
==
'SILVA'
:
entryIterator
=
SILVAIterator
...
...
@@ -353,6 +376,7 @@ if __name__=='__main__':
if
options
.
genus_found
is
not
None
and
len
(
species_name
.
split
(
' '
))
>=
2
:
try
:
genusTaxid
=
getGenusTaxid
(
tax
,
species_name
,
restricting_ancestor
)
s
[
'genus_taxid'
]
=
genusTaxid
print
>>
options
.
genus_found
,
formatFasta
(
s
)
genusFound
=
True
except
KeyError
:
...
...
@@ -368,16 +392,17 @@ if __name__=='__main__':
print
>>
options
.
unidentified
,
formatFasta
(
s
)
elif
options
.
db_type
==
'UNITE'
:
elif
((
options
.
db_type
==
'UNITE_FULL'
)
or
(
options
.
db_type
==
'UNITE_GENERAL'
))
:
restricting_ancestor
=
tax
.
findTaxonByName
(
'Fungi'
)[
0
][
0
]
for
s
in
entries
:
try
:
species_name
=
s
[
'species_name'
]
taxid
=
getTaxid
(
tax
,
species_name
,
restricting_ancestor
)
s
[
'taxid'
]
=
taxid
s
[
'taxid'
]
=
taxid
s
[
'rank'
]
=
tax
.
getRank
(
taxid
)
print
formatFasta
(
s
)
...
...
@@ -386,8 +411,7 @@ if __name__=='__main__':
genusFound
=
False
if
options
.
genus_found
is
not
None
:
try
:
genus_name
=
s
[
'genus_name'
]
genusTaxid
=
getGenusTaxid
(
tax
,
genus_name
,
restricting_ancestor
)
genusTaxid
=
getGenusTaxid
(
tax
,
species_name
,
restricting_ancestor
)
s
[
'genus_taxid'
]
=
genusTaxid
print
>>
options
.
genus_found
,
formatFasta
(
s
)
genusFound
=
True
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment