Commit a4932a40 by Eric Coissac

@tweet:Change the management of unaligned pairend reads

parent 73597886
......@@ -12,6 +12,7 @@ from obitools.options import getOptionManager
from obitools.ecobarcode.rawdata import sequenceIterator
import sys
from obitools.ecobarcode.ecotag import storeIdentification
import math
def addSearchOptions(optionManager):
......@@ -102,12 +103,6 @@ def addSearchOptions(optionManager):
default=0.0,
help='Tolerated rate of wrong assignation')
optionManager.add_option('-F','--fieldcutname',
action='store',dest='fieldcutname',
default=None,
help="""Turn 'unassembled match mode' on when the tag FIELDCUTNAME is in the header of the sequence.
This tag gives the position of the junction of the two unassembled reads in the sequence (on (Celine + Fred) parle trop bien anglais !)""")
def count(data):
rep = {}
......@@ -128,22 +123,26 @@ def count(data):
return rep
from obitools.location import SimpleLocation
def myLenlcs(s1, s2, fieldCutName, minid, normalized, reference):
def myLenlcs(s1, s2, minid, normalized, reference):
if (fieldCutName is not None) and (s1.hasKey(fieldCutName)) :
cutPos = s1[fieldCutName]
locs1_5P = SimpleLocation(1,cutPos)
locs1_3P = SimpleLocation(cutPos+1,len(s1))
locs2_5P = SimpleLocation(1, min(cutPos, len(s2)))
locs2_3P = SimpleLocation(max(1, len(s2)-(len(s1)-cutPos)+1), len(s2))
if s1.hasKey('pairend.limit') :
overlap = min(0,len(s1) - len(s2))
f5P1 = s1[0:s1['pairend.limit']]
f3P1 = s1[s1['pairend.limit']:]
f5P2 = s2[0:s1['pairend.limit']]
from2 = len(s2) - min(len(s2),len(f3P1))
f3P2 = s2[from2:]
errors = int(math.ceil((1-minid) * len(s1)))
minid5P = max(len(f5P1),len(f5P2)) - errors
minid3P = max(len(f3P1),len(f3P2)) - errors
lcs5P, lali5P = lenlcs(s1.getSubSeq(locs1_5P),s2.getSubSeq(locs2_5P),minid,False)
lcs3P, lali3P = lenlcs(s1.getSubSeq(locs1_3P),s2.getSubSeq(locs2_3P),minid,False)
lcs5P, lali5P = lenlcs(f5P1,f5P2,minid5P,False)
lcs3P, lali3P = lenlcs(f3P1,f3P2,minid3P,False)
raw_lcs = lcs5P + lcs3P - overlap
lali = lali5P + lali3P - overlap
......@@ -162,7 +161,7 @@ def lcsIterator(entries,db,options):
maxid = (None,0.0)
minid = options.minimum
for d in db:
lcs,lali = myLenlcs(seq, d, options.fieldcutname, minid,normalized=True,reference=ALILEN)
lcs,lali = myLenlcs(seq, d, minid,normalized=True,reference=ALILEN)
if lcs > maxid[1]:
maxid = (d,lcs)
minid = maxid[1] ** options.shape
......@@ -179,7 +178,7 @@ def lcsIteratorSelf(entries,db,options):
maxid = ([],0.0)
minid = options.minimum
for d in db:
lcs,lali = myLenlcs(seq,d,options.fieldcutname,minid,normalized=True,reference=ALILEN)
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN)
if lcs > maxid[1]:
maxid = ([d],lcs)
minid = maxid[1]
......
......@@ -21,20 +21,6 @@ def addPairEndOptions(optionManager):
default=None,
help="Filename containing reverse solexa reads "
)
optionManager.add_option('-j','--junction-length',
action="store", dest="junction",
metavar="###",
type="int",
default=10,
help="length of the N junction between both the reads (default=10)"
)
optionManager.add_option('-n','--n-quality',
action="store", dest="nqual",
metavar="###",
type="int",
default=10,
help="quality assign to the N of the junction (default=10)"
)
def cutDirectReverse(entries):
......@@ -76,9 +62,9 @@ def buildJoinedSequence(sequences,options):
for d,r in sequences:
r=r.complement()
s = str(d) + 'n' * junction + str(r)
s = str(d) + str(r)
seq = NucSequence(d.id + '_Join',s,d.definition,**d)
seq = NucSequence(d.id + '_PairEnd',s,d.definition,**d)
withqual = hasattr(d, 'quality') or hasattr(r, 'quality')
......@@ -96,6 +82,8 @@ def buildJoinedSequence(sequences,options):
quality.extend([10**-4] * len(r))
seq.quality=quality
seq['pairend.limit']=len(d)
yield seq
......
......@@ -82,20 +82,6 @@ def addSolexaPairEndOptions(optionManager):
default=None,
help="maximum pvalue for keeping aligment")
optionManager.add_option('-j','--junction-length',
action="store", dest="junction",
metavar="###",
type="int",
default=10,
help="length of the N junction between both the reads (default=10)"
)
optionManager.add_option('-n','--n-quality',
action="store", dest="nqual",
metavar="###",
type="int",
default=10,
help="quality assign to the N of the junction (default=10)"
)
def cutDirectReverse(entries):
......@@ -172,9 +158,9 @@ def buildJoinedSequence(ali,options):
r=r.complement()
s = str(d) + 'n' * junction + str(r)
s = str(d) + str(r)
seq = NucSequence(d.id + '_Join',s,d.definition,**d)
seq = NucSequence(d.id + '_PairEnd',s,d.definition,**d)
withqual = hasattr(d, 'quality') or hasattr(r, 'quality')
......@@ -196,6 +182,7 @@ def buildJoinedSequence(ali,options):
seq['score']=ali.score
seq['direction']=ali.direction
seq['mode']='join'
seq['pairend.limit']=len(d)
return seq
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment