Commit a9f22fd6 by Eric Coissac

--no commit message

parent d86fc784
......@@ -209,7 +209,7 @@ def findC(root,base=None,pyrexs=None):
#from obitools.version import version as obiversion
#sys.path.pop(0)
VERSION = "00.02.503"
VERSION = "1.0.beta"
AUTHOR = 'Eric Coissac'
EMAIL = 'eric@coissac.eu'
URL = 'www.grenoble.prabi.fr/trac/OBITools'
......@@ -224,7 +224,7 @@ FILTERS = glob.glob('%s/*.py' % FILTERSRC)
DEPRECATED_SCRIPTS=["fastaComplement", "fastaUniq","fasta2tab","fastaAnnotate",
"fastaSample","fastaGrep","fastaCount","fastaLength",
"fastaHead","fastaTail","fastaSplit","fastaStrand",
"fastaLocate","solexaPairEnd","ecoTag"
"fastaLocate","solexaPairEnd","ecoTag","obijoinpairedend"
]
def rootname(x):
......
......@@ -16,11 +16,6 @@ from obitools.fasta import formatFasta
import sys
if __name__ == '__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addOutputFormatOption], progdoc=__doc__)
......
......@@ -45,11 +45,6 @@ def cmptax(taxonomy):
return cmptaxon
if __name__=='__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addRankOptions,addTaxonomyFilterOptions], progdoc=__doc__)
......
......@@ -205,7 +205,26 @@ def myLenlcs(s1, s2, minid, normalized, reference):
# results = [x for x in results if x[1]>=minid]
# yield seq,([maxid[0]],maxid[1]),results
def mostPreciseTaxid(taxidlist, options):
tl = set(x for x in taxidlist if x > 1)
if not tl:
tl=set([1])
while len(tl) > 1:
t1 = tl.pop()
t2 = tl.pop()
if options.taxonomy.isAncestor(t1,t2):
taxid = t2
elif options.taxonomy.isAncestor(t2,t1):
taxid = t1
else:
taxid = options.taxonomy.lastCommonTaxon(t1,t2)
tl.add(taxid)
taxid = tl.pop()
return taxid
def lcsIteratorSelf(entries,db,options):
for seq in entries:
......@@ -214,7 +233,7 @@ def lcsIteratorSelf(entries,db,options):
minid = options.minimum
for d in db:
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN)
if lcs > maxid[1]:
if lcs > maxid[1] and lcs > options.minimum:
maxid = ([d],lcs)
minid = maxid[1]
elif lcs==maxid[1]:
......@@ -286,7 +305,7 @@ if __name__=='__main__':
except KeyError:
seqcount=1
if best[0] is not None:
if best[0]:
taxlist = set(taxonlink[p[0].id] for p in match)
lca = taxonomy.betterCommonTaxon(options.error,*tuple(taxlist))
scname = taxonomy.getScientificName(lca)
......@@ -297,98 +316,13 @@ if __name__=='__main__':
else:
species_list = []
species = taxonomy.getSpecies(lca)
if species is not None:
spname = taxonomy.getScientificName(species)
else:
spname = '--'
species= '-1'
genus = taxonomy.getGenus(lca)
if genus is not None:
gnname = taxonomy.getScientificName(genus)
else:
gnname = '--'
genus= '-1'
order = taxonomy.getOrder(lca)
if order is not None:
orname = taxonomy.getScientificName(order)
else:
orname = '--'
order= '-1'
family = taxonomy.getFamily(lca)
if family is not None:
faname = taxonomy.getScientificName(family)
else:
faname = '--'
family= '-1'
worst = min(x[1] for x in match)
data =['ID',seq.id,best[0][0].id,best[1],worst,'%4.3f' %(best[1]**options.shape),seqcount,len(match),lca,scname,rank,order,orname,family,faname,genus,gnname,species,spname]
data =['ID',seq.id,best[0][0].id,best[1],worst,'NA',seqcount,len(match),lca,scname,rank]
else:
data =['UK',seq.id,'--','--','--','--',seqcount,0,1,'root','no rank','-1','--','-1','--','-1','--','-1','--']
data =['UK',seq.id,'NA','NA','NA','NA',seqcount,0,1,'root','no rank']
# if options.sequence:
# data.append(seq)
#
# if options.table:
# print '\t'.join([str(x) for x in data])
# if match and rankid is not None:
# splist=count((taxonomy.getTaxonAtRank(x, rankid),y)
# for x,y in ((taxonlink[p[0].id],p[1]) for p in match))
# if None in splist:
# del splist[None]
# data=[]
# for taxon in splist:
# scname = taxonomy.getScientificName(taxon)
# species=taxonomy.getSpecies(taxon)
# countt = len(splist[taxon])
# mini = min(splist[taxon])
# maxi = max(splist[taxon])
# if species is not None:
# spname = taxonomy.getScientificName(species)
# else:
# spname = '--'
# species= '-1'
#
# genus = taxonomy.getGenus(taxon)
# if genus is not None:
# gnname = taxonomy.getScientificName(genus)
# else:
# gnname = '--'
# genus= '-1'
#
# order = taxonomy.getOrder(taxon)
# if order is not None:
# orname = taxonomy.getScientificName(order)
# else:
# orname = '--'
# order= '-1'
#
# family = taxonomy.getFamily(taxon)
# if family is not None:
# faname = taxonomy.getScientificName(family)
# else:
# faname = '--'
# family= '-1'
#
#
# data.append(['CD',seq.id,'--',maxi,mini,'--','--',countt,taxon,scname,options.explain,order,orname,family,faname,genus,gnname,species,spname])
# data.sort(lambda x,y:cmp(y[2], x[2]))
# for d in data:
# if options.sequence:
# d.append('--')
# print '\t'.join([str(x) for x in d])
# elif options.storeindb:
# storeIdentification(seq.id,data[0]=='ID',data[8],
# dict((s[0]['refdbid'],s[1]) for s in match),
# options)
# else:
tag = seq.get('id_status',{})
tag[dbname]=data[0]=='ID'
......@@ -397,14 +331,18 @@ if __name__=='__main__':
tag = seq.get('match_count',{})
tag[dbname]=data[7]
tag = seq.get('taxid',{})
tag = seq.get('taxid_by_db',{})
tag[dbname]=data[8]
seq['taxid'] = mostPreciseTaxid(tag.values(), options)
tag = seq.get('scientific_name',{})
tag = seq.get('scientific_name_by_db',{})
tag[dbname]=data[9]
seq['scientific_name']=options.taxonomy.getScientificName(seq['taxid'])
tag = seq.get('rank',{})
tag = seq.get('rank_by_db',{})
tag[dbname]=data[10]
seq['rank']=options.taxonomy.getRank(seq['taxid'])
if data[0]=='ID':
tag = seq.get('best_match',{})
......@@ -416,36 +354,38 @@ if __name__=='__main__':
tag = seq.get('species_list',{})
tag[dbname]=species_list
if int(data[11])>=0:
tag = seq.get('order',{})
tag[dbname]=data[11]
tag = seq.get('order_name',{})
tag[dbname]=data[12]
if int(data[13])>=0:
tag = seq.get('family',{})
tag[dbname]=data[13]
tag = seq.get('family_name',{})
tag[dbname]=data[14]
if int(data[15])>=0:
tag = seq.get('genus',{})
tag[dbname]=data[15]
tag = seq.get('genus_name',{})
tag[dbname]=data[16]
if int(data[17])>=0:
tag = seq.get('species',{})
tag[dbname]=data[17]
tag = seq.get('species_name',{})
tag[dbname]=data[18]
if options.explain is not None:
tag = seq.get('explain',{})
tag[dbname]=dict((s[0].id,s[1]) for s in match)
writer(seq)
seq['order']=options.taxonomy.getOrder(seq['taxid'])
if seq['order']:
seq['order_name']=options.taxonomy.getScientificName(seq['order'])
else:
seq['order_name']=None
seq['family']=options.taxonomy.getFamily(seq['taxid'])
if seq['family']:
seq['family_name']=options.taxonomy.getScientificName(seq['family'])
else:
seq['family_name']=None
# if (hasattr(options, 'ecobarcodedb') and options.ecobarcodedb is not None):
# options.ecobarcodedb.commit()
# options.ecobarcodedb.close()
seq['genus']=options.taxonomy.getGenus(seq['taxid'])
if seq['genus']:
seq['genus_name']=options.taxonomy.getScientificName(seq['genus'])
else:
seq['genus_name']=None
seq['species']=options.taxonomy.getSpecies(seq['taxid'])
if seq['species']:
seq['species_name']=options.taxonomy.getScientificName(seq['species'])
else:
seq['species_name']=None
writer(seq)
......@@ -161,11 +161,6 @@ def checkSequence(seq,direct,reverse=None,taglength=5,tags=None):
if __name__=='__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
deprecatedScript('ngsfilter')
......
......@@ -189,11 +189,6 @@ def checkCDNA(seq,options):
return True,seq
if __name__=='__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([add454Options],
entryIterator=fastaNucIterator
......
......@@ -56,11 +56,6 @@ def addWindowsOptions(optionManager):
if __name__=='__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addWindowsOptions],
entryIterator=fastaIterator
......
......@@ -50,11 +50,6 @@ def addCountWordOptions(optionManager):
if __name__=='__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addCountWordOptions],
entryIterator=fastaIterator
......
......@@ -48,11 +48,6 @@ def addGROptions(optionManager):
if __name__=='__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addGROptions,addTaxonomyFilterOptions])
......
......@@ -69,8 +69,8 @@ def addCleanOptions(optionManager):
optionManager.add_option('-r','--ratio',
action="store", dest="ratio",
metavar="<FLOAT>",
type="float",
default=1.0,
type="string",
default="1.0",
help="Minimum ratio between counts of two sequence records so that the less abundant "
"one can be considered as a variant of the more abundant "
"[default: 1, i.e. all less abundant sequences are variant]")
......@@ -103,11 +103,6 @@ def cmpseqcount(s1,s2):
if __name__ == '__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addCleanOptions,addInOutputOption], progdoc=__doc__)
(options, entries) = optionParser()
......@@ -194,10 +189,14 @@ if __name__ == '__main__':
for ratio in options.ratio:
for node in graph.nodeIterator():
sequence = node['_sequence']
status={}
fathers={}
common={}
for sample in node['_sample']:
sampleid = (sample,round(ratio,2))
son=False
father=False
for neighbour in node.neighbourIterator():
......@@ -208,23 +207,24 @@ if __name__ == '__main__':
if c < ratio:
father|=True
if father and not son:
status[sample]='h'
status[sampleid]='h'
elif not father and not son:
status[sample]='s'
status[sampleid]='s'
else:
status[sample]='i'
status[sampleid]='i'
if options.head:
if status[sample]=='i':
fathers[sample]=[x['_sequence'].id for x in lookforFather(node, sample)]
if status[sampleid]=='i':
fathers[sampleid]=[x['_sequence'].id for x in lookforFather(node, sample)]
else:
fathers[sample]=[node['_sequence'].id]
fathers[sampleid]=[node['_sequence'].id]
if options.head:
for sa in fathers:
fathers[sa]=list(set(fathers[sa]))
for s in fathers[sa]:
common[s]=common.get(s,0)+1
sid = (s,round(ratio,2))
common[sid]=common.get(sid,0)+1
i=0
h=0
......@@ -232,42 +232,36 @@ if __name__ == '__main__':
o=0
for sample in status:
o+=1
if status[sample]=='i':
i+=1
elif status[sample]=='s':
s+=1
elif status[sample]=='h':
h+=1
for sampleid in status:
if sampleid[1]==ratio:
o+=1
if status[sampleid]=='i':
i+=1
elif status[sampleid]=='s':
s+=1
elif status[sampleid]=='h':
h+=1
sequence = node['_sequence']
### Should be removed ?
node['_sequence']['clean_%3.2f' % ratio]=status
node['_sequence']['head_%3.2f' % ratio]=h
node['_sequence']['internal_%3.2f' % ratio]=i
node['_sequence']['singleton_%3.2f' % ratio]=s
node['_sequence']['occurrence_%3.2f' % ratio]=o
if options.head:
node['_sequence']['father_%3.2f' % ratio]=fathers
node['_sequence']['fathers_%3.2f' % ratio]=common
sequence['clean']=status
tmp = sequence.get('head',{})
tmp[ratio]=h
tmp = sequence.get('internal',{})
tmp[ratio]=i
tmp = sequence.get('singleton',{})
tmp[ratio]=s
### Added the ratio as a supplementary key=value for the sequence
node['_sequence']['obiclean_ratio']='%.2f'%(ratio,)
node['_sequence']['clean']=status
node['_sequence']['head']=h
node['_sequence']['internal']=i
node['_sequence']['singleton']=s
node['_sequence']['occurrence']=o
sequence['occurrence']=o
if options.head:
node['_sequence']['father']=fathers
node['_sequence']['fathers']=common
node['_sequence'].get('father',{}).update(fathers)
node['_sequence'].get('fathers',{}).update(common)
for node in graph.nodeIterator():
......
......@@ -10,11 +10,6 @@ from obitools.format.options import addInOutputOption, sequenceWriterGenerator
if __name__ == '__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addInOutputOption])
......
......@@ -35,11 +35,6 @@ from obitools.options.bioseqfilter import addSequenceFilteringOptions, sequenceF
from obitools.options.bioseqcutter import addSequenceCuttingOptions, cutterIteratorGenerator
if __name__=='__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addSequenceCuttingOptions,
addSequenceFilteringOptions
......
......@@ -33,11 +33,6 @@ def cmpseqcount(s1,s2):
return cmp(s2['count'],s1['count'])
if __name__ == '__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addPCRErrorOptions,addInOutputOption])
(options, entries) = optionParser()
......
......@@ -120,11 +120,15 @@ if __name__=='__main__':
if k not in omit:
s = '%s%s%s'%(s,OFS,k)
else:
if k[0][0:7] not in omit:
if k[0] not in omit:
if type(k[1]) is tuple:
sk = ":".join([str(x) for x in k[1]])
else:
sk = str(k[1])
if k[0][0:7]=='merged_':
s = '%s%s%s:%s' % (s,OFS,k[0][7:],str(k[1]))
s = '%s%s%s:%s' % (s,OFS,k[0][7:],sk)
else:
s = '%s%s%s:%s' % (s,OFS,k[0],str(k[1]))
s = '%s%s%s:%s' % (s,OFS,k[0],sk)
if options.sequence:
s = "%s%ssequence"%(s,OFS)
......
......@@ -9,8 +9,10 @@ from obitools.format.sequence.genbank import genbankIterator
from obitools.format.sequence.fnaqual import fnaFastaIterator
from obitools.format.sequence.fasta import fastaAAIterator, fastaNucIterator, fastFastaIterator
from obitools.format.sequence.fastq import fastFastqIlluminaIterator,fastFastqSolexaIterator
from obitools.fastq import fastFastqSangerIterator
from obitools.fnaqual.quality import qualityIterator
from obitools.ecopcr.sequence import EcoPCRDBSequenceIterator
from obitools.fasta import formatFasta, rawFastaIterator,\
formatSAPFastaGenerator
from obitools.fastq import formatFastq
......@@ -120,6 +122,12 @@ def addInputFormatOption(optionManager):
# help="input file is in fastq nucleic format produced by old solexa sequencer")
#===========================================================================
# group.add_option('--ecopcrdb',
# action="store_const", dest="seqinformat",
# default=None,
# const='ecopcrdb',
# help="Input file is an ecopcr database")
group.add_option('--nuc',
action="store_const", dest="moltype",
default=None,
......@@ -274,6 +282,8 @@ def autoEntriesIterator(options):
reader=fastFastqIlluminaIterator
elif options.seqinformat=='ecopcr':
reader=EcoPCRFile
elif options.seqinformat=='ecopcrdb':
reader=EcoPCRDBSequenceIterator
if options.seqinformat=='fna' and options.withqualfile is not None:
qualfile=qualityIterator(options.withqualfile)
......
......@@ -61,11 +61,6 @@ def getOptionManager(optionDefinitions,entryIterator=None,progdoc=None,checkForm
default=False,
help="Set logging in debug mode")
# parser.add_option('--no-psyco',
# action="store_true", dest="noPsyco",
# default=False,
# help="Don't use psyco even if it installed")
parser.add_option('--without-progress-bar',
action="store_false", dest="progressbar",
default=True,
......
......@@ -84,11 +84,6 @@ def addMergeOptions(optionManager):
if __name__=='__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addMergeOptions],
entryIterator=SolexaFile
......
......@@ -150,11 +150,6 @@ def queryIterator(stack,blastdb,seqdb,protein,evalue,covmin,local,done,v,count=2
if __name__=='__main__':
try:
import psyco
psyco.full()
except ImportError:
pass
optionParser = getOptionManager([addBlasterOptions])
(options, entries) = optionParser()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment