Commit c92d2785 by Eric Coissac

Preparation for the path2sam function

parent a40e6d39
......@@ -1204,12 +1204,15 @@ def path2fasta(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back
(oldid,stemid))
elif forceconnection:
logOrPrint(logger," Connection is forced")
if connected > 0:
glength = int(frglen-ml - self.index.getReadSize())
logOrPrint(logger,
"Connection is forced with only %d pair ended links (gap length=%f sd=%f)" %
(connected,float(glength),float(sl)))
" But asserted by %d pair ended links (gap length=%f sd=%f)" %
(connected,
glength,
sl))
label.append('{F-connection: %d - Gap length: %d, sd: %d}' %
(connected,
......@@ -1219,7 +1222,7 @@ def path2fasta(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back
else:
logOrPrint(logger,
"Connection is forced without pair ended links")
"Without any support from pair ended links")
label.append('{F-connection: %d}' % connected)
......@@ -2302,84 +2305,264 @@ def path2sam(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=2
:raises: :py:class:`AssertionError`
'''
alledges = dict((assgraph.getEdgeAttr(*e)['stemid'],
e)
frglen,frglensd = estimateFragmentLength(self) # @UnusedVariable
#
# Build a dictionary with:
# - Keys = stemid
# - Values = edge descriptor (from,to,x)
#
alledges = dict((assgraph.getEdgeAttr(*e)['stemid'],e)
for e in assgraph.edgeIterator(edgePredicate = lambda i: 'stemid' in assgraph.getEdgeAttr(*i)))
coverage=[]
slength=[]
seq=[]
label=[]
oldstem=None
oldid=None
oldstemclass=None
rank = 1
forceconnection=False
circular = alledges[path[-1]][1]==alledges[path[0]][0]
for stemid in path:
if stemid != 0:
stem = alledges[stemid]
attr = assgraph.getEdgeAttr(*stem)
stemclass = attr['class']
sequence = attr['sequence']
if rank==1 and stemclass!="sequence":
raise RuntimeError("A path cannot ends on a gap")
# Switch the stem to a dashed style
graphics = attr.get('graphics',{})
attr['graphics'] = graphics
graphics['style'] = 'dashed'
# Manage step rank information of each step
allsteps = attr.get('steps',{})
steps = allsteps.get(identifier,[])
steps.append(rank)
allsteps[identifier]=steps
attr['steps']=allsteps
if oldstem is not None:
connected,ml,sl,delta = pairEndedConnected(self,assgraph,oldid,stemid,back) # @UnusedVariable
if oldstem[1]==stem[0]:
if oldstemclass=="sequence":
if stemclass=="sequence": # Link between 2 sequences
if ml is not None:
logOrPrint(logger,
"Both segments %d and %d are connected (paired-end=%d frg length=%f sd=%f)" %
(oldid,stemid,connected,float(ml),float(sl)))
label.append('{connection: %d - length: %d, sd: %d}' % (connected,int(ml),int(sl)))
else:
logOrPrint(logger,
"Both segments %d and %d are connected but covered by 0 paired-end" %
(oldid,stemid))
label.append('{connection: 0}')
elif stemclass[0:9]=="scaffold:": # Link a sequence and a gap
logOrPrint(logger,
"Both segments %d and %d are disconnected" % attr['link'])
if circular:
POS = -self.index.getReadSize()
else:
POS = 0
if stemclass=="scaffold:paired-end":
logOrPrint(logger,
" But linked by %d pair ended links (gap length=%f sd=%f)" %
(attr['pairendlink'],
attr['length'],
attr['gapsd']))
label.append('{N-connection: %d - Gap length: %d, sd: %d}' %
(attr['pairendlink'],
attr['length'],
attr['gapsd']))
elif stemclass=="scaffold:forced":
logOrPrint(logger," Connection is forced")
if attr['pairendlink'] > 0:
logOrPrint(logger,
" But asserted by %d pair ended links (gap length=%f sd=%f)" %
(attr['pairendlink'],
attr['length'],
attr['gapsd']))
for stemid in path:
stem = alledges[stemid]
attr = assgraph.getEdgeAttr(*stem)
label.append('{F-connection: %d - Gap length: %d, sd: %d}' %
(attr['pairendlink'],
attr['length'],
attr['gapsd']))
else:
label.append('{F-connection: %d}' % connected)
elif stemclass=="scaffold:overlap":
logOrPrint(logger,
" But overlap by %dbp supported by %d pair ended links" %
(-attr['length'],
attr['pairendlink']))
label.append('{O-connection: %d - Overlap length: %d}' %
(attr['pairendlink'],
-attr['length']))
# Remove the overlap length on the last inserted sequence
seq[-1]=seq[-1][:attr['length']]
elif oldstemclass[0:9]=="scaffold:":
if stemclass=="sequence":
seq.append(self.index.getRead(attr['path'][0],
0,
self.index.getReadSize()).lower())
slength.append(self.index.getReadSize())
if oldstem is not None:
if oldstem[1]==stem[0]:
# addInFront=False
print("Both segments %d and %d are connected" % (oldid,stemid),file=sys.stderr)
else:
# addInFront=True
connected,ml,sl,delta = pairEndedConnected(self,assgraph,oldid,stemid,back) # @UnusedVariable
print("Both segments %d and %d are disconnected" % (oldid,stemid),file=sys.stderr)
if connected >= minlink:
print("But linked by %d pair ended links" % connected,file=sys.stderr)
POS+=nlength
connected=True
else:
raise RuntimeError('A scaffold link must be followed by a sequence %d --> %d' %
(oldid,stemid))
elif forceconnection:
logOrPrint(logger," Connection is forced")
if connected > 0:
glength = int(frglen-ml - self.index.getReadSize())
logOrPrint(logger,
" But asserted by %d pair ended links (gap length=%f sd=%f)" %
(connected,
glength,
sl))
label.append('{F-connection: %d - Gap length: %d, sd: %d}' %
(connected,
glength,
sl))
else:
logOrPrint(logger,
"Without any support from pair ended links")
label.append('{F-connection: %d}' % connected)
glength = nlength
seq.append(b'N'*int(glength))
slength.append(int(glength))
seq.append(self.index.getRead(attr['path'][0],
0,
self.index.getReadSize()).lower())
slength.append(self.index.getReadSize())
# Add the foced link to the compact assembly graph
flink = assgraph.addEdge(oldstem[1],stem[0])
rlink = assgraph.addEdge(-stem[0],-oldstem[1])
flink['label']="Forced (%d) %d -> %d" % (connected,oldid,stemid)
flink['graphics']={'width':1,
'arrow':'last',
'fill':'0xFF0000',
'style':'dashed'
}
rlink['label']="Forced (%d) %d -> %d" % (connected,-stemid,-oldid)
rlink['graphics']={'width':1,
'arrow':'last',
'fill':'0xFF0000',
'style':'dashed'
}
else:
connected=False
raise AssertionError('Disconnected path between stem '
'%d and %d only %d pair ended links' % (oldid,stemid,connected))
elif circular:
# addInFront=False
connected=True
if stemclass=="sequence":
if attr['length'] > 10:
attr['label']="%d : %s->(%d)->%s [%d] @ %s" % (stemid,sequence[0:5].decode('ascii'),
attr['length'],
sequence[-5:].decode('ascii'),
int(attr['weight']),
attr['steps'])
else:
attr['label']="%d : %s->(%d) [%d] @ %s" % (stemid,
sequence.decode('ascii'),
attr['length'],
int(attr['weight']),
attr['steps'])
label.append(attr['label'])
seq.append(sequence)
coverage.append(attr['weight'])
slength.append(attr['length'])
rank+=1
oldstem = stem
oldid=stemid
oldstemclass=stemclass
forceconnection=False
else:
connected=False
# addInfront=True
seq.append(attr['path'])
slength.append(attr['length'])
oldstem = stem
oldid=stemid
forceconnection=True
s1 = alledges[path[-1]]
s2 = alledges[path[0]]
sid1=assgraph.getEdgeAttr(*s1)['stemid']
sid2=assgraph.getEdgeAttr(*s2)['stemid']
connected,ml,sl,delta = pairEndedConnected(self,assgraph,sid1,sid2,back) # @UnusedVariable
sclass2=assgraph.getEdgeAttr(*s2)['class']
connected,ml,sl,delta = pairEndedConnected(self, # @UnusedVariable
assgraph,
sid1,
sid2,
back)
if s1[1]==s2[0]:
print("Path is circular",file=sys.stderr)
logOrPrint(logger, "Path is circular and connected by %d (length: %d, sd: %d)" %
(connected,int(ml),int(sl))
)
circular=True
if sclass2=="sequence":
label.append('{connection: %d - length: %d, sd: %d}' % (connected,int(ml),int(sl)))
else:
if connected >= minlink:
print("Path is circular but disconnected",file=sys.stderr)
print("Linked by %d pair ended links" % connected,file=sys.stderr)
seq.append([0]*nlength)
if sclass2!="sequence":
raise RuntimeError("A path cannot ends on a gap")
if forceconnection:
logOrPrint(logger,"Circular connection forced",)
logOrPrint(logger,"Linked by %d pair ended links" % connected)
label.append('{F-connection: %d}' % connected)
seq.append(b'N'*int(nlength))
circular=True
else:
print("Path is linear",file=sys.stderr)
length = sum(slength)
logOrPrint(logger,"Path is linear")
circular=False
seq.insert(0,self.index.getRead(s2[0],0,self.index.getReadSize()).lower())
slength.insert(0,self.index.getReadSize())
sam = ["""@HD\tVN:1.5\tSO:coordinate""",
"""@SQ SN:%s LN:%s""" % (identifier,length)
]
sequence = b''.join(seq)
length = sum(slength)
mcov = oneXcoverage(assgraph)
sam.extend(seq)
fasta=[">%s seq_length=%d; coverage=%5.1f; circular=%s; %s%s" % (identifier,length,
mcov,circular,
tags2str(tags) + " ",
'.'.join(label))]
fasta.extend(sequence[i:i+60].decode('ascii')
for i in range(0,len(sequence),60)
)
return "\n".join(sam)
return "\n".join(fasta)
def stem2fasta(self):
assgraph = self.compactAssembling(verbose=False)
si = assgraph.edgeIterator(edgePredicate=lambda e: 'stemid' in assgraph.getEdgeAttr(*e))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment