Commit 1c745a3e authored by Eric Coissac's avatar Eric Coissac

Patches some minor bugs

parent 3fadd975
......@@ -25,16 +25,27 @@ def getOutput(config):
#
# Reformating outpout according to the new format
#
# Try to load the index to test its format
index=getIndex(config)
os.makedirs(dirname)
for extension in ['oax','omx','gml','stats',
'intermediate.gml','.intermediate.oax']:
'intermediate.gml','.intermediate.oax',
'chloroplast.path.gml',
'path']:
if os.path.exists('%s.%s' % (config['orgasm']['outputfilename'],
extension)):
os.renames('%s.%s' % (config['orgasm']['outputfilename'],
extension),
'%s.oas/assembling.%s' % (config['orgasm']['outputfilename'],
extension))
# Try to load the seeds to test its format
seeds=getSeeds(index, config)
sys.exit(0)
else:
#
# Exit with an error because the format is obsolete.
......
This diff is collapsed.
'''
Created on 28 sept. 2014
@author: coissac
'''
import orgasm.samples
from orgasm import getIndex, getSeeds,getOutput
from orgasm.tango import cutLowCoverage, cutSNPs,\
estimateDeadBrancheLength, estimateFragmentLength,\
genesincontig, scaffold, fillGaps, dumpGraph, restoreGraph
import sys
__title__="Recompact the assembling graph"
default_config = { 'seeds' : None
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
coverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
meanlength,sdlength = estimateFragmentLength(asm)
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
cg = asm.compactAssembling(verbose=False)
logger.info("Scaffold the assembly")
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False)
genesincontig(cg,r,x)
with open(output+'.gml','w') as gmlfile:
print(cg.gml(),file=gmlfile)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, cutLowCoverage, estimateDeadBrancheLength, dumpGraph
__title__="Cut low coverage edge in an assembling graph"
default_config = { 'coverage' : None,
'smallbranches' : None
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--coverage', dest='cutlow:coverage',
required=True,
type=int,
action='store',
default=None,
help='All edges with a coverage below this value will be deleted')
parser.add_argument('--smallbranches', dest='cutlow:smallbranches',
type=int,
action='store',
default=None,
help='maximum length of the branches to cut during '
'the cleaning process [default: <estimated>]')
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
xxx,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
logger.info("Cut low coverage")
cutLowCoverage(asm,config['cutlow']['coverage'],terminal=False)
if config['cutlow']['smallbranches'] is not None:
smallbranches = config['cutlow']['smallbranches']
else:
smallbranches = estimateDeadBrancheLength(asm)
logger.info("Dead branch length setup to : %d bp" % smallbranches)
asm.cleanDeadBranches(maxlength=smallbranches)
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False)
with open(output+'.gml','w') as gmlfile:
print(cg.gml(),file=gmlfile)
dumpGraph(output+'.oax',asm)
'''
Created on 26 nov. 2014
@author: boyer
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, genesincontig
import os
__title__="Build a fasta file from the assembling graph"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='<index>',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='<output>',
nargs='?',
default=None,
help='output prefix' )
def fastaFormat(edge, title=None, nchar=60):
if title is None:
title = 'Seq'
lheader = []
for k in ('weight', 'label', 'length', 'stemid', 'ingene'):
lheader.append('%s=%s'%(k, edge[k]))
l = ['; '.join(lheader)+";"]
l[0] = '>%s_%d %s'%(title, edge['stemid'], l[0])
seq = edge['sequence']
lseq = len(edge['sequence'])
i=0
while i < lseq:
l.append(seq[i:i+60].decode('ascii'))
i += 60
return '\n'.join(l)
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
fastaout = open(output+".fasta","w")
logger.info("Print the result as a fasta file")
edges = [cg.getEdgeAttr(*i) for i in cg.edgeIterator(edgePredicate = lambda e : cg.getEdgeAttr(*e)['stemid']>0)]
head, tail = os.path.split(output)
for e in edges:
print(fastaFormat(e, tail),file=fastaout)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, selectGoodComponent
__title__="Print some statistics about the assembling graph"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='<output>',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--back', dest='orgasm:back',
metavar='<insert size>',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False)
ccs = list(cg.connectedComponentIterator())
gcc = selectGoodComponent(cg)
gnode=set()
for cc in gcc:
for e in cc:
gnode.add(e[0])
gnode.add(e[1])
ucc = set()
for cc in ccs:
ccc = frozenset([-x for x in cc])
if ccc not in ucc:
ucc.add(frozenset(cc))
output = open(output+".stats","w")
print ("AssembledBasePairs:",len(asm)/2,file=output)
print ("TotalConnectedComponents:",len(ccs),file=output)
print ("UniqueConnectedComponents:",len(ucc),file=output)
print ("GoodConnectedComponents:",len(ucc),file=output)
print ("CompactNodes:",len(cg),file=output)
print ("GoodCompactNodes:",len(gnode),file=output)
print ("CompactEdges:",cg.edgeCount(),file=output)
print ("GoodCompactEdges:",sum(len(x) for x in gcc),file=output)
print ("FragmentMeanLength:",meanlength,file=output)
print ("FragmentSdLength:",sdlength,file=output)
'''
Created on 28 sept. 2014
@author: coissac
'''
from subprocess import Popen
from tempfile import mkdtemp
from tempfile import mktemp
from shutil import rmtree
from urllib.request import urlopen
import atexit
import os
import os.path
import sys
import re
from orgasm.utils.dna import reverseComplement # @UnresolvedImport
import zlib
from collections import Counter, deque
from sys import stderr
from orgasm.indexer import Index
__title__="Index a set of reads"
default_config = { 'reformat' : False,
'single' : False,
'forward' : None,
'reverse' : None,
'maxread' : None,
'length' : None,
'estimate' : None,
'fasta' : False,
'ffasta' : False,
'rfasta' : False,
'nopipe' : False,
'checkpairs':False,
'mate' : False,
'checkids' : False
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='name of the produced index')
parser.add_argument(dest='index:forward', metavar='forward',
nargs='?',
default=None,
help='Filename of the forward reads')
parser.add_argument(dest='index:reverse',
metavar='reverse',
nargs='?',
default=None,
help='Filename of the reverse reads' )
parser.add_argument("--reformat",
dest="index:reformat",
action='store_true',
default=None,
help='Asks for reformatting an old sequence index to the new format'
)
parser.add_argument('--single',
dest='index:single',
action='store_true',
default=None,
help='Single read mode')
parser.add_argument('--mate-pairs', dest='index:mate',
action='store_true',
default=None,
help='Mate pair library mode')
parser.add_argument('--check-ids', dest='index:checkids',
action='store_true',
default=False,
help='Checks that forward and reverse ids are identical')
parser.add_argument('--max-read', dest='index:maxread',
type=int,
action='store',
default=None,
help='the count of million of reads to '
'index (default the full file)')
parser.add_argument('--length', dest='index:length',
type=int,
action='store',
default=None,
help='The length of the read to index '
' (default indexed length is estimated from the first read)')
parser.add_argument('--estimate-length', dest='index:estimate',
metavar='FRACTION',
type=float,
action='store',
default=None,
help='Estimate the length to index for conserving FRACTION '
'of the overall dataset')
parser.add_argument('--fasta', dest='index:fasta',
action='store_true',
default=None,
help='forward and reverse sequence files are fasta formated')
parser.add_argument('--no-pipe', dest='index:nopipe',
action='store_true',
default=None,
help='do not use pipe but temp files instead')
parser.add_argument('--forward-fasta', dest='index:ffasta',
action='store_true',
default=False,
help='forward file is a fasta file')
parser.add_argument('--check-pairing', dest='index:checkpairs',
action='store_true',
default=False,
help='ensure that forward and reverse files are correctly paired')
parser.add_argument('--reverse-fasta', dest='index:rfasta',
action='store_true',
default=False,
help='reverse file is a fasta file')
tmpdir = []
# @atexit.register
# def cleanup():
# try:
# os.unlink(FIFO)
# except:
# pass
def reformatOldIndex(config):
logger = config['orgasm']['logger']
output = config['orgasm']['indexfilename']
if not (os.path.exists('%s.ofx' % output ) and
os.path.exists('%s.ogx' % output ) and
os.path.exists('%s.opx' % output ) and
os.path.exists('%s.orx' % output )
):
logger.error('The %s index does not exist or is not complete' % output)
sys.exit(1)
dirname = '%s.odx' % output
if not os.path.exists(dirname):
os.makedirs(dirname)
os.rename('%s.ofx' % output, '%s/index.ofx' % dirname)
os.rename('%s.ogx' % output, '%s/index.ogx' % dirname)
os.rename('%s.opx' % output, '%s/index.opx' % dirname)
os.rename('%s.orx' % output, '%s/index.orx' % dirname)
sys.exit(0)
def postponerm(filename):
def trigger():
print("Deleting tmp file : %s" % filename,file=sys.stderr)
try:
os.unlink(filename)
except:
pass
return trigger
def postponermdir(filename):
def trigger():
print("Deleting tmp directory : %s" % filename,file=sys.stderr)
try:
rmtree(filename)
except:
pass
return trigger
def getTmpDir():
global tmpdir
if not tmpdir:
tmpdir.append(mkdtemp())
atexit.register(postponermdir(tmpdir[0]))
return tmpdir[0]
def allopen(filename):
try:
f = urlopen(filename)
except:
f = open(filename,newline=None)
return f
def ungzip(filename,nopipe):
tmpdir = getTmpDir()
fifo = mktemp(prefix="unziped-", dir=tmpdir)
command="gzip -d -c %s > %s" % (filename,fifo)
if nopipe:
os.system(command)
else:
os.mkfifo(fifo)
process = Popen(command, # @UnusedVariable
shell=True,
stderr=open('/dev/null','w'))
atexit.register(postponerm(fifo))
return fifo
def unbzip(filename,nopipe):
tmpdir = getTmpDir()
fifo = mktemp(prefix="unziped-", dir=tmpdir)
command="bzip2 -d -c %s > %s" % (filename,fifo)
atexit.register(postponerm(fifo))
if nopipe:
os.system(command)
else:
os.mkfifo(fifo)
process = Popen(command, # @UnusedVariable
shell=True,
stderr=open('/dev/null','w'))