Commit 5dfc4b96 by Eric Coissac

--no commit message

parent d96b8f9d
......@@ -6,7 +6,7 @@ Created on 18 sept. 2009
'''
from obitools.options import getOptionManager
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
from obitools.utils import deprecatedScript
......@@ -23,6 +23,7 @@ if __name__ == '__main__':
(options, entries) = optionParser()
writer = sequenceWriterGenerator(options)
for entry in entries:
print printOutput(options,entry)
\ No newline at end of file
print writer(options,entry)
\ No newline at end of file
......@@ -7,9 +7,11 @@ from obitools.utils.bioseq import uniqSequence,sortSequence
from obitools.align import lenlcs
from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDatabase
from obitools.ecobarcode.databases import referenceDBIterator
from obitools.options import getOptionManager
from obitools.ecobarcode.rawdata import sequenceIterator
import sys
from obitools.ecobarcode.ecotag import storeIdentification
def addSearchOptions(optionManager):
......@@ -64,6 +66,16 @@ def addSearchOptions(optionManager):
default=False,
help='Write results in a tabular format')
optionManager.add_option('--store-in-db',
action='store_true',dest='storeindb',
default=False,
help='Write results in an ecobarcode DB')
optionManager.add_option('--update-db',
action='store_true',dest='updatedb',
default=False,
help='Run identification only on new sequences')
optionManager.add_option('--sort',
action='store',dest='sort',
type='string',
......@@ -135,7 +147,16 @@ if __name__=='__main__':
taxonomy = loadTaxonomyDatabase(options)
db = list(fastaNucIterator(options.database))
if (hasattr(options, 'ecobarcodedb')):
print >>sys.stderr,"Reading reference DB ...",
db = list(referenceDBIterator(options))
print >>sys.stderr," : %d" % len(db)
if options.primer is not None:
entries = sequenceIterator(options)
else:
db = list(fastaNucIterator(options.database))
# print "##########@",options.large
taxonlink = {}
......@@ -208,6 +229,10 @@ if __name__=='__main__':
if options.table:
print '\t'.join([str(x) for x in data])
elif options.storeindb:
storeIdentification(seq.id,data[0]=='ID',data[8],
dict((s[0]['refdbid'],s[1]) for s in match),
options)
else:
seq['id_status']=data[0]=='ID'
seq['count']=data[6]
......@@ -283,4 +308,9 @@ if __name__=='__main__':
if options.sequence:
d.append('--')
print '\t'.join([str(x) for x in d])
if (hasattr(options, 'ecobarcodedb')):
options.ecobarcodedb.commit()
options.ecobarcodedb.close()
\ No newline at end of file
#!/usr/local/bin/python
'''
-----------------------------------
fastaComplement.py
-----------------------------------
fastaComplement.py <fastafile>
complement and reverse all sequence in the fasta file
-----------------------------------"
-h --help : print this help
-----------------------------------"
'''
from obitools.fasta import fastaNucIterator,formatFasta
from obitools.options import getOptionManager
from obitools.options.bioseqfilter import addSequenceFilteringOptions
from obitools.options.bioseqfilter import filterGenerator
from obitools.utils import deprecatedScript
if __name__=='__main__':
deprecatedScript('obicomplement')
optionParser = getOptionManager([addSequenceFilteringOptions],
entryIterator=fastaNucIterator
)
(options, entries) = optionParser()
goodFasta = filterGenerator(options)
for seq in entries:
if goodFasta(seq):
print formatFasta(seq.complement())
else:
print formatFasta(seq)
\ No newline at end of file
......@@ -13,7 +13,7 @@ import math
from obitools.options import getOptionManager
from obitools.utils import ColumnFile
from obitools.align import FreeEndGap
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
......@@ -419,12 +419,18 @@ if __name__ == '__main__':
if options.unidentified is not None:
unidentified = open(options.unidentified,"w")
writer = sequenceWriterGenerator(options)
if options.unidentified is not None:
unidentified = sequenceWriterGenerator(options,open(unidentified,"w"))
else :
unidentified = None
for seq in entries:
good,seq = annotate(seq,options)
if good:
printOutput(options,seq)
elif options.unidentified is not None:
printOutput(options,seq,unidentified)
writer(seq)
elif unidentified is not None:
unidentified(seq)
\ No newline at end of file
......@@ -6,7 +6,7 @@ from obitools.options.bioseqfilter import addSequenceFilteringOptions
from obitools.options.bioseqfilter import filterGenerator
from obitools.options.bioseqedittag import addSequenceEditTagOptions
from obitools.options.bioseqedittag import sequenceTaggerGenerator
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
if __name__=='__main__':
......@@ -17,6 +17,8 @@ if __name__=='__main__':
(options, entries) = optionParser()
writer = sequenceWriterGenerator(options)
if not options.noPsyco:
try:
import psyco
......@@ -30,5 +32,5 @@ if __name__=='__main__':
for seq in entries:
if goodFasta(seq):
sequenceTagger(seq)
printOutput(options,seq)
writer(seq)
......@@ -4,7 +4,7 @@
from obitools.options import getOptionManager
from obitools.options.bioseqfilter import addSequenceFilteringOptions
from obitools.options.bioseqfilter import filterGenerator
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
if __name__=='__main__':
......@@ -14,11 +14,12 @@ if __name__=='__main__':
(options, entries) = optionParser()
goodFasta = filterGenerator(options)
writer = sequenceWriterGenerator(options)
for seq in entries:
if goodFasta(seq):
printOutput(options,seq.complement())
writer(seq.complement())
else:
printOutput(options,seq)
writer(seq)
\ No newline at end of file
......@@ -6,7 +6,7 @@ Created on 18 sept. 2009
'''
from obitools.options import getOptionManager
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
if __name__ == '__main__':
......@@ -20,6 +20,7 @@ if __name__ == '__main__':
(options, entries) = optionParser()
writer = sequenceWriterGenerator(options)
for entry in entries:
print printOutput(options,entry)
\ No newline at end of file
writer(entry)
\ No newline at end of file
......@@ -2,7 +2,7 @@
from obitools.options import getOptionManager
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
import math
......@@ -37,13 +37,16 @@ if __name__=='__main__':
digit = math.ceil(math.log10(options.number))
out=[]
template = "%s_%%0%dd.%s" % (options.prefix,digit,options.outputFormat)
out=[sequenceWriterGenerator(options,
open(template % (i+1),"w"))
for i in xrange(options.number)
]
i = 0
for seq in entries:
if not out:
template = "%s_%%0%dd.%s" % (options.prefix,digit,options.outputFormat)
out=[open(template % (i+1),"w") for i in xrange(options.number)]
printOutput(options,seq,out[i])
out[i](seq)
i+=1
i%=options.number
......
......@@ -4,7 +4,7 @@ Created on 27 avr. 2010
@author: coissac
'''
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
from obitools.options import getOptionManager
from obitools.options.bioseqfilter import addSequenceFilteringOptions
from obitools.options.bioseqfilter import sequenceFilterIteratorGenerator
......@@ -15,9 +15,10 @@ if __name__=='__main__':
optionParser = getOptionManager([addSequenceFilteringOptions,addInOutputOption])
(options, entries) = optionParser()
goodSeq = sequenceFilterIteratorGenerator(options)
writer = sequenceWriterGenerator(options)
for seq in goodSeq(entries):
printOutput(options,seq)
writer(seq)
......@@ -5,7 +5,7 @@ Created on 15 dec. 2009
@author: coissac
'''
import sys
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
from obitools.options import getOptionManager
......@@ -23,10 +23,12 @@ if __name__ == '__main__':
(options, entries) = optionParser()
i=0
writer = sequenceWriterGenerator(options)
for s in entries:
if i < options.count:
printOutput(options,s)
writer(s)
i+=1
else:
sys.exit(0)
......
......@@ -7,7 +7,7 @@ Created on 1 nov. 2009
from obitools.options import getOptionManager
from obitools.sample import weigthedSample
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
def addSampleOptions(optionManager):
optionManager.add_option('-s','--sample-size',
......@@ -44,11 +44,13 @@ if __name__ == '__main__':
idx+=1
sample =weigthedSample(distribution, options.size)
writer = sequenceWriterGenerator(options)
for idx in sample:
seq = db[idx]
seq['count']=sample[idx]
printOutput(options,seq)
writer(seq)
......@@ -4,7 +4,7 @@ Created on 6 juil. 2010
@author: coissac
'''
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
from obitools.options import getOptionManager
from obitools.format.sequence import autoSequenceIterator
......@@ -18,6 +18,11 @@ def addSelectOptions(optionManager):
help="file containing sample sequences to select on "
"on the base of their identifier")
optionManager.add_option('-v',
action="store_true", dest="invert",
default=False,
help="invert selection")
......@@ -26,9 +31,16 @@ if __name__ == '__main__':
(options, entries) = optionParser()
idset=set(x.id for x in autoSequenceIterator(options.identifier))
idset=set(x.strip() for x in open(options.identifier))
writer = sequenceWriterGenerator(options)
def invert(x):
if options.invert:
return not x
else:
return x
for seq in entries:
if seq.id in idset:
printOutput(options,seq)
if invert(seq.id in idset):
writer(seq)
......@@ -4,7 +4,7 @@ Created on 3 fevr. 2011
@author: coissac
'''
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
from obitools.options import getOptionManager
def addSortOptions(optionManager):
......@@ -49,5 +49,8 @@ if __name__ == '__main__':
seqs = [seq for seq in entries]
seqs.sort(cmpk, reverse=options.reverse)
writer = sequenceWriterGenerator(options)
for seq in seqs:
printOutput(options,seq)
writer(seq)
......@@ -4,7 +4,7 @@ Created on 15 dec. 2009
@author: coissac
'''
from obitools.format.options import addInOutputOption, printOutput
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
from obitools.options import getOptionManager
import collections
......@@ -24,9 +24,11 @@ if __name__ == '__main__':
i=0
queue = collections.deque(entries,options.count)
writer = sequenceWriterGenerator(options)
while queue:
printOutput(options,queue.popleft())
writer(queue.popleft())
......
'''
Created on 25 sept. 2010
@author: coissac
'''
from obitools import NucSequence
def referenceDBIterator(options):
cursor = options.ecobarcodedb.cursor()
cursor.execute("select id from databases.database where name='%s'" % options.database)
options.dbid = cursor.fetchone()[0]
cursor.execute('''
select s.accession,r.id,r.taxid,r.sequence
from databases.database d,
databases.reference r,
databases.relatedsequences s
where r.database = d.id
and s.reference= r.id
and s.mainac
and d.name = '%s'
''' % options.database
)
for ac,id,taxid,sequence in cursor:
s = NucSequence(ac,sequence)
s['taxid']=taxid
s['refdbid']=id
yield s
\ No newline at end of file
'''
Created on 25 sept. 2010
@author: coissac
'''
def alreadyIdentified(seqid,options):
cursor = options.ecobarcodedb.cursor()
cursor.execute('''
select count(*)
from ecotag.identification
where sequence=%s
and database=%s
''',(int(seqid),int(options.dbid)))
return int(cursor.fetchone()[0]) > 0;
def storeIdentification(seqid,
idstatus,taxid,
matches,
options
):
cursor = options.ecobarcodedb.cursor()
if not options.updatedb:
cursor.execute('''
delete from ecotag.identification where sequence=%s and database=%s
''',(int(seqid),int(options.dbid)))
cursor.execute('''
insert into ecotag.identification (sequence,database,idstatus,taxid)
values (%s,%s,%s,%s)
returning id
''' , (int(seqid),int(options.dbid),idstatus,int(taxid)))
idid = cursor.fetchone()[0]
for seq,identity in matches.iteritems():
cursor.execute('''
insert into ecotag.evidence (identification,reference,identity)
values (%s,
%s,
%s)
''',(idid,seq,identity))
cursor.close()
options.ecobarcodedb.commit()
'''
Created on 23 sept. 2010
@author: coissac
'''
import psycopg2
from obitools.ecobarcode.taxonomy import EcoTaxonomyDB
def addEcoBarcodeDBOption(optionManager):
optionManager.add_option('--dbname',
action="store", dest="ecobarcodedb",
type='str',
default=None,
help="Specify the name of the ecobarcode database")
optionManager.add_option('--server',
action="store", dest="dbserver",
type='str',
default="localhost",
help="Specify the adress of the ecobarcode database server")
optionManager.add_option('--user',
action="store", dest="dbuser",
type='str',
default='postgres',
help="Specify the user of the ecobarcode database")
optionManager.add_option('--port',
action="store", dest="dbport",
type='str',
default=5432,
help="Specify the port of the ecobarcode database")
optionManager.add_option('--passwd',
action="store", dest="dbpasswd",
type='str',
default='',
help="Specify the passwd of the ecobarcode database")
optionManager.add_option('--primer',
action="store", dest="primer",
type='str',
default=None,
help="Specify the primer used for amplification")
def ecobarcodeDatabaseConnection(options):
if options.ecobarcodedb is not None:
connection = psycopg2.connect(database=options.ecobarcodedb,
user=options.dbuser,
password=options.dbpasswd,
host=options.dbserver,
port=options.dbport)
options.dbname=options.ecobarcodedb
else:
connection=None
if connection is not None:
options.ecobarcodedb=connection
taxonomy = EcoTaxonomyDB(connection)
else:
taxonomy=None
return taxonomy
'''
Created on 25 sept. 2010
@author: coissac
'''
from obitools import NucSequence
from obitools.utils import progressBar
from obitools.ecobarcode.ecotag import alreadyIdentified
import sys
def sequenceIterator(options):
cursor = options.ecobarcodedb.cursor()
cursor.execute('''
select s.id,sum(o.count),s.sequence
from rawdata.sequence s,
rawdata.occurrences o
where o.sequence= s.id
and s.primers = '%s'
group by s.id,s.sequence
''' % options.primer
)
nbseq = cursor.rowcount
progressBar(1, nbseq, True, head=options.dbname)
for id,count,sequence in cursor:
progressBar(cursor.rownumber+1, nbseq, head=options.dbname)
if not options.updatedb or not alreadyIdentified(id,options):
s = NucSequence(id,sequence)
s['count']=count
print >>sys.stderr,' +', cursor.rownumber+1,
yield s
else:
print >>sys.stderr,' @', cursor.rownumber+1,
print >>sys.stderr
'''
Created on 24 sept. 2010
@author: coissac
'''
from obitools.ecopcr.taxonomy import TaxonomyDump
from obitools.ecopcr.taxonomy import Taxonomy
import sys
class EcoTaxonomyDB(TaxonomyDump) :
def __init__(self,dbconnect):
self._dbconnect=dbconnect
print >> sys.stderr,"Reading ecobarcode taxonomy database..."
self._readNodeTable()
print >> sys.stderr," ok"
print >>sys.stderr,"Adding scientific name..."
self._name=[]
for taxid,name,classname in self._nameIterator():
self._name.append((name,classname,self._index[taxid]))
if classname == 'scientific name':
self._taxonomy[self._index[taxid]].append(name)
print >>sys.stderr,"Adding taxid alias..."
for taxid,current in self._mergedNodeIterator():
self._index[taxid]=self._index[current]
print >>sys.stderr,"Adding deleted taxid..."
for taxid in self._deletedNodeIterator():
self._index[taxid]=None
Taxonomy.__init__(self)
#####
#
# Iterator functions
#
#####
def _readNodeTable(self):
cursor = self._dbconnect.cursor()
cursor.execute("""
select taxid,rank,parent
from ncbitaxonomy.nodes
""")
print >>sys.stderr,"Reading taxonomy nodes..."
taxonomy=[list(n) for n in cursor]
print >>sys.stderr,"List all taxonomy rank..."
ranks =list(set(x[1] for x in taxonomy))
ranks.sort()
rankidx = dict(map(None,ranks,xrange(len(ranks))))
print >>sys.stderr,"Sorting taxons..."
taxonomy.sort(TaxonomyDump._taxonCmp)
self._taxonomy=taxonomy
print >>sys.stderr,"Indexing taxonomy..."
index = {}
for t in self._taxonomy:
index[t[0]]=self._bsearchTaxon(t[0])
print >>sys.stderr,"Indexing parent and rank..."
for t in self._taxonomy:
t[1]=rankidx[t[1]]
t[2]=index[t[2]]
self._ranks=ranks
self._index=index
cursor.close()
def _nameIterator(self):
cursor = self._dbconnect.cursor()
cursor.execute("""
select taxid,name,nameclass
from ncbitaxonomy.names
""")
for taxid,name,nameclass in cursor:
yield taxid,name,nameclass
cursor.close()
def _mergedNodeIterator(self):
cursor = self._dbconnect.cursor()
cursor.execute("""
select oldtaxid,newtaxid
from ncbitaxonomy.merged
""")
for oldtaxid,newtaxid in cursor:
yield oldtaxid,newtaxid
cursor.close()
def _deletedNodeIterator(self):
cursor = self._dbconnect.cursor()
cursor.execute("""
select taxid
from ncbitaxonomy.delnodes
""")
for taxid in cursor:
yield taxid[0]
cursor.close()
'''
Created on 13 fvr. 2011
@author: coissac
'''
from obitools.ecopcr.taxonomy import Taxonomy, EcoTaxonomyDB, TaxonomyDump, ecoTaxonomyWriter