Commit 188bb558 by Eric Coissac

Beginning of the sam output function

parent f967e002
......@@ -478,8 +478,8 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
attr['path']=[first] + [0] * (attr['length']-1) + [last]
attr['class']='scaffold:paired-end'
elif kind=="o":
attr['label']="%d : Overlap (%dbp) [%d] %d -> %d" % (nid,overlap,z,s1,s2)
attr['length']=-overlap
attr['label']="%d : Overlap (%dbp) [%d] %d -> %d" % (nid,ml,z,s1,s2)
attr['length']=-ml
attr['first']=first
attr['last']=last
attr['weight']=0
......@@ -492,7 +492,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
}
attr['stemid']=nid
attr['sequence']=b''
attr['path']=[]
attr['path']=[first]+ [0] * (readsize-ml-1) +[last]
attr['class']='scaffold:overlap'
elif kind=="f":
glengths = [frglen - i - 2 * readsize
......@@ -1116,7 +1116,7 @@ def path2fasta(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back
sequence = attr['sequence']
if rank==1 and stemclass!="sequence":
raise RuntimeError("A path cannot ends on a gap")
raise RuntimeError("A path cannot start on a gap")
# Switch the stem to a dashed style
graphics = attr.get('graphics',{})
......@@ -2315,10 +2315,7 @@ def path2sam(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=2
alledges = dict((assgraph.getEdgeAttr(*e)['stemid'],e)
for e in assgraph.edgeIterator(edgePredicate = lambda i: 'stemid' in assgraph.getEdgeAttr(*i)))
coverage=[]
slength=[]
seq=[]
label=[]
oldstem=None
oldid=None
oldstemclass=None
......@@ -2331,10 +2328,10 @@ def path2sam(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=2
stem = alledges[stemid]
attr = assgraph.getEdgeAttr(*stem)
stemclass = attr['class']
sequence = attr['sequence']
sequence = attr['path']
if rank==1 and stemclass!="sequence":
raise RuntimeError("A path cannot ends on a gap")
raise RuntimeError("A path cannot start on a gap")
# Switch the stem to a dashed style
graphics = attr.get('graphics',{})
......@@ -2358,13 +2355,11 @@ def path2sam(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=2
"Both segments %d and %d are connected (paired-end=%d frg length=%f sd=%f)" %
(oldid,stemid,connected,float(ml),float(sl)))
label.append('{connection: %d - length: %d, sd: %d}' % (connected,int(ml),int(sl)))
else:
logOrPrint(logger,
"Both segments %d and %d are connected but covered by 0 paired-end" %
(oldid,stemid))
label.append('{connection: 0}')
elif stemclass[0:9]=="scaffold:": # Link a sequence and a gap
logOrPrint(logger,
......@@ -2377,10 +2372,6 @@ def path2sam(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=2
attr['length'],
attr['gapsd']))
label.append('{N-connection: %d - Gap length: %d, sd: %d}' %
(attr['pairendlink'],
attr['length'],
attr['gapsd']))
elif stemclass=="scaffold:forced":
logOrPrint(logger," Connection is forced")
......@@ -2391,35 +2382,15 @@ def path2sam(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=2
(attr['pairendlink'],
attr['length'],
attr['gapsd']))
label.append('{F-connection: %d - Gap length: %d, sd: %d}' %
(attr['pairendlink'],
attr['length'],
attr['gapsd']))
else:
label.append('{F-connection: %d}' % connected)
elif stemclass=="scaffold:overlap":
logOrPrint(logger,
" But overlap by %dbp supported by %d pair ended links" %
(-attr['length'],
attr['pairendlink']))
label.append('{O-connection: %d - Overlap length: %d}' %
(attr['pairendlink'],
-attr['length']))
# Remove the overlap length on the last inserted sequence
seq[-1]=seq[-1][:attr['length']]
elif oldstemclass[0:9]=="scaffold:":
if stemclass=="sequence":
seq.append(self.index.getRead(attr['path'][0],
0,
self.index.getReadSize()).lower())
slength.append(self.index.getReadSize())
else:
raise RuntimeError('A scaffold link must be followed by a sequence %d --> %d' %
(oldid,stemid))
elif oldstemclass[0:9]=="scaffold:" and stemclass[0:9]=="scaffold:":
raise RuntimeError('A scaffold link must be followed by a sequence %d --> %d' %
(oldid,stemid))
elif forceconnection:
logOrPrint(logger," Connection is forced")
......@@ -2432,28 +2403,13 @@ def path2sam(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=2
glength,
sl))
label.append('{F-connection: %d - Gap length: %d, sd: %d}' %
(connected,
glength,
sl))
else:
logOrPrint(logger,
"Without any support from pair ended links")
label.append('{F-connection: %d}' % connected)
glength = nlength
seq.append(b'N'*int(glength))
slength.append(int(glength))
seq.append(self.index.getRead(attr['path'][0],
0,
self.index.getReadSize()).lower())
slength.append(self.index.getReadSize())
seq.append([0] * int(glength) + [stem[0]])
# Add the foced link to the compact assembly graph
flink = assgraph.addEdge(oldstem[1],stem[0])
......@@ -2479,24 +2435,22 @@ def path2sam(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=2
if stemclass=="sequence":
if attr['length'] > 10:
attr['label']="%d : %s->(%d)->%s [%d] @ %s" % (stemid,sequence[0:5].decode('ascii'),
attr['label']="%d : %s->(%d)->%s [%d] @ %s" % (stemid,attr['sequence'][0:5].decode('ascii'),
attr['length'],
sequence[-5:].decode('ascii'),
attr['sequence'][-5:].decode('ascii'),
int(attr['weight']),
attr['steps'])
else:
attr['label']="%d : %s->(%d) [%d] @ %s" % (stemid,
sequence.decode('ascii'),
attr['sequence'].decode('ascii'),
attr['length'],
int(attr['weight']),
attr['steps'])
label.append(attr['label'])
seq.append(sequence)
coverage.append(attr['weight'])
slength.append(attr['length'])
if oldstemclass is not None:
seq.append(sequence[1:])
else:
seq.append(sequence)
rank+=1
oldstem = stem
......@@ -2528,8 +2482,7 @@ def path2sam(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=2
(connected,int(ml),int(sl))
)
circular=True
if sclass2=="sequence":
label.append('{connection: %d - length: %d, sd: %d}' % (connected,int(ml),int(sl)))
seq[-1].pop()
else:
if sclass2!="sequence":
raise RuntimeError("A path cannot ends on a gap")
......@@ -2538,30 +2491,117 @@ def path2sam(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=2
logOrPrint(logger,"Circular connection forced",)
logOrPrint(logger,"Linked by %d pair ended links" % connected)
label.append('{F-connection: %d}' % connected)
seq.append(b'N'*int(nlength))
seq.append([0]*int(nlength))
circular=True
else:
logOrPrint(logger,"Path is linear")
circular=False
seq.insert(0,self.index.getRead(s2[0],0,self.index.getReadSize()).lower())
slength.insert(0,self.index.getReadSize())
# seq.insert(0,self.index.getRead(s2[0],0,self.index.getReadSize()).lower())
sequence=[]
for l in seq:
sequence.extend(l)
if circular:
readsize=self.index.getReadSize()
sequence = sequence[readsize:] + sequence[0:readsize]
reads = []
location={}
for i in range(len(sequence)):
rid = sequence[i]
rl={}
reads.append(rl)
if rid !=0 and not self.index.isFake(rid):
allreads = self.index.getIds(rid)
for rid in allreads:
prid = self.index.getPairedRead(rid)
rl[rid]=(prid,min(abs(rid),abs(prid)))
if rid not in location:
lp=[]
location[rid]=lp
else:
lp=location[rid]
lp.append(i+1)
print("====>",len(sequence),file=sys.stderr)
seq.append("@HD VN:1.5 SO:coordinate")
seq.append("@SQ SN:%s LN:%d" % (identifier,len(sequence)))
mapped=set()
for i in range(len(reads)):
rlist=reads[i]
for rid in rlist:
paired,qname=rlist[rid]
npaired= self.index.getIds(paired)[0]
paired_mapped = npaired in location
readpos=i+1
if paired_mapped:
dist = [(x - readpos
if rid > 0 else readpos - x)
for x in location[npaired]]
pairedpos=location[npaired][dist.index(min( d for d in dist if dist > 0))]
if readpos < pairedpos:
tlen = pairedpos - readpos + readsize
else:
tlen = -(readpos - pairedpos + readsize)
else:
qname = abs(rid)
pairedpos=0
tlen=0
flag=0
flag |= 0x01 # template having multiple segments in sequencing
# All our reads are paired
if paired_mapped:
flag |= 0x02 # each segment properly aligned according to the aligner
# paired read is part of the assembly
if rid > 0:
flag |= 0x40 # the first segment in the template
flag |= 0x20 # SEQ of the next segment in the template being reverse complemented
else:
flag |= 0x80 # the last segment in the template
else:
flag |= 0x08 # next segment in the template unmapped
if rid < 0:
flag |= 0x10 # SEQ being reverse complemented
if abs(rid) in mapped:
flag|=0x100 # secondary alignment
else:
mapped.add(abs(rid))
rseq = str(self.index.getRead(rid,
0,
self.index.getReadSize()).lower())
seq.append("R09d %3d %s %6d %d %dM %s %6d %5d %s *"
% (qname,
flag,identifier,
readpos, # Read position
255, # No mapping quality
readsize, # For the CIGAR string
'=' if paired_mapped else '*', #
pairedpos,
tlen,
rseq
))
#sequence = b''.join(seq)
#length = len(sequence)
sequence = b''.join(seq)
length = sum(slength)
mcov = oneXcoverage(assgraph)
fasta=[">%s seq_length=%d; coverage=%5.1f; circular=%s; %s%s" % (identifier,length,
mcov,circular,
tags2str(tags) + " ",
'.'.join(label))]
fasta.extend(sequence[i:i+60].decode('ascii')
for i in range(0,len(sequence),60)
)
return "\n".join(fasta)
return sequence
def stem2fasta(self):
assgraph = self.compactAssembling(verbose=False)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment