Commit 2fe1a0df by Eric Coissac

Merge branch 'v-0.2' into 'master'

V 0.2 development branch becomes the master branche



See merge request !2
parents 3504c054 1c745a3e
......@@ -3,6 +3,14 @@
The :program:`fillgaps` command
===============================
At the end of the :ref:`oa buildgraph <oa_buildgraph>` command the assembling graph
is not always complete. Because of the non homogeneous coverage, some parts of the
genome too low covered were not able to be assembled using the heuristics parameters
used to assemble the main parts of the genome. The :program:`fillgaps` command aims
to rerun the *fillgap* algorithm used by the :ref:`oa buildgraph <oa_buildgraph>`
but with other parameters allowing to fill the gaps not assembled during the initial
assembling.
.. figure:: ../oa-fillgaps.*
:align: center
:figwidth: 80 %
......@@ -47,6 +55,19 @@ Graph initialisation options
.. _seeds.kup:
.. code-block:: bash
$ oa fillgaps --seeds protChloroArabidopsis seqindex
A set of seed sequences must be or nucleic or proteic. For initiating
assembling with both nucleic and proteic sequences you must use at least two
``--seeds`` options one for each class of sequences.
.. code-block:: bash
$ oa fillgaps --seeds protChloroArabidopsis --seeds rDNAChloro.fasta seqindex
.. include:: ../options/kup.txt
Graph extension options
......
......@@ -22,214 +22,43 @@ import argparse
import logging
import json
default_config = {
'orgasm' : { 'log' : True,
'loglevel' : 'INFO',
'outputfilename' : None,
'back' : None,
'seeds' : None,
'seedmincov' : 1,
'kup' : None,
'identity' : 0.5,
'minlink' : 5,
'version' : False,
'progress' : True
}
}
from orgasm import command
from orgasm.version import version
from orgasm.apps.config import getConfiguration # @UnresolvedImport
__all__ = []
__version__ = version
__date__ = '2014-09-28'
__updated__ = '2014-09-28'
root_config_name='orgasm'
default_config = { 'software' : "The Organelle Assembler",
'log' : False,
'loglevel' : 'INFO',
'outputfilename' : None,
'back' : None,
'seeds' : None,
'seedmincov' : 1,
'kup' : None,
'identity' : 0.5,
'minlink' : 5,
'version' : False,
'progress' : True
}
DEBUG = 1
TESTRUN = 0
PROFILE = 0
def loadCommand(name,loader):
'''
Load a command module from its name and an ImpLoader
This function is for internal use
@param name: name of the module
@type name: str
@param loader: the module loader
@type loader: ImpLoader
@return the loaded module
@rtype: module
'''
module = loader.find_module(name).load_module(name)
return module
def getCommandsList():
'''
Returns the list of sub-commands available to the main orgasm command
@return: a dict instance with key corresponding to each command and
value corresponding to the module
@rtype: dict
'''
cmds = dict((x[1],loadCommand(x[1],x[0]))
for x in pkgutil.iter_modules(command.__path__)
if not x[2])
return cmds
def getLogger(config):
'''
Returns the logger as defined by the command line option
or by the config file
:param config:
'''
output = config['orgasm']['outputfilename']
level = config['orgasm']['loglevel']
logfile= config['orgasm']['log']
rootlogger = logging.getLogger()
logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s")
stderrHandler = logging.StreamHandler(sys.stderr)
stderrHandler.setFormatter(logFormatter)
rootlogger.addHandler(stderrHandler)
if logfile:
fileHandler = logging.FileHandler("%s.log" % output)
fileHandler.setFormatter(logFormatter)
rootlogger.addHandler(fileHandler)
try:
loglevel = getattr(logging, level)
except:
loglevel = logging.INFO
rootlogger.setLevel(loglevel)
config['orgasm']['logger']=rootlogger
return rootlogger
class OaParser(argparse.ArgumentParser):
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
def buildArgumentParser():
parser = OaParser()
parser.add_argument('--version', dest='orgasm:version',
action='store_true',
default=False,
help='Print the version of the Organelle Assembler')
parser.add_argument('--no-log', dest='orgasm:log',
action='store_false',
default=None,
help='Do not create a logfile for the assembling')
parser.add_argument('--no-progress', dest='orgasm:progress',
action='store_false',
default=None,
help='Do not print the progress bar during assembling')
subparsers = parser.add_subparsers(title='subcommands',
description='valid subcommands',
help='additional help')
commands = getCommandsList()
for c in commands:
module = commands[c]
if hasattr(module, "run"):
if hasattr(module, "__title__"):
sub = subparsers.add_parser(c,help=module.__title__)
else:
sub = subparsers.add_parser(c)
if hasattr(module, "addOptions"):
module.addOptions(sub)
sub.set_defaults(**{'orgasm:module' : module})
return parser
def buildDefaultConfiguration():
global default_config
commands = getCommandsList()
for c in commands:
module = commands[c]
assert hasattr(module, "run")
if hasattr(module, 'default_config'):
default_config[c]=module.default_config
else:
default_config[c]={}
return default_config
def getConfiguration():
global default_config
if '__done__' in default_config:
return default_config
parser = buildArgumentParser()
options = vars(parser.parse_args())
config = buildDefaultConfiguration()
for k in options:
section,key = k.split(':')
s = config[section]
if options[k] is not None:
s[key]=options[k]
if config['orgasm']['version']:
print("The Organelle Assembler - Version %s" % __version__)
sys.exit(0)
if not 'module' in config['orgasm']:
print('\nError: No oa command specified',file=sys.stderr)
parser.print_help()
sys.exit(2)
if config['orgasm']['outputfilename'] is None:
config['orgasm']['outputfilename']=config['orgasm']['indexfilename']
getLogger(config)
config['__done__']=True
return config
if __name__ =="__main__":
config = getConfiguration()
config = getConfiguration(root_config_name,
default_config)
config['orgasm']['module'].run(config)
if config[root_config_name]['outputfilename'] is None:
config[root_config_name]['outputfilename']=config[root_config_name]['indexfilename']
config[root_config_name]['module'].run(config)
\ No newline at end of file
......@@ -7,7 +7,59 @@ from .backtranslate.fasta import fasta
from . import samples
from orgasm.utils.dna import isDNA
import os
import os.path
def getOutput(config):
'''
@param config: a configuration object
@return: the path for the output files
'''
logger = config['orgasm']['logger']
dirname = "%s.oas" % config['orgasm']['outputfilename']
if not os.path.exists(dirname):
if os.path.exists('%s.oax' % config['orgasm']['outputfilename']):
if config['buildgraph']['reformat']:
#
# Reformating outpout according to the new format
#
# Try to load the index to test its format
index=getIndex(config)
os.makedirs(dirname)
for extension in ['oax','omx','gml','stats',
'intermediate.gml','.intermediate.oax',
'chloroplast.path.gml',
'path']:
if os.path.exists('%s.%s' % (config['orgasm']['outputfilename'],
extension)):
os.renames('%s.%s' % (config['orgasm']['outputfilename'],
extension),
'%s.oas/assembling.%s' % (config['orgasm']['outputfilename'],
extension))
# Try to load the seeds to test its format
seeds=getSeeds(index, config)
sys.exit(0)
else:
#
# Exit with an error because the format is obsolete.
#
logger.error("The %s assembly is not stored according to new format" % config['orgasm']['outputfilename'])
logger.error('Run the oa buildgraph command with the --reformat option')
sys.exit(1)
else:
os.makedirs(dirname)
return "%s/assembling" % dirname
def getIndex(config):
'''
......@@ -16,7 +68,21 @@ def getIndex(config):
@return: an Indexer instance
'''
return Index(config['orgasm']['indexfilename'])
logger=config['orgasm']['logger']
output=config['orgasm']['indexfilename']
dirname="%s.odx" % output
if ( (not os.path.exists(dirname)) and
(os.path.exists('%s.ofx' % output ) and
os.path.exists('%s.ogx' % output ) and
os.path.exists('%s.opx' % output ) and
os.path.exists('%s.orx' % output )
)):
logger.error('Index %s uses the old format please run the command' % output)
logger.error(' oa index --reformat %s' % output)
sys.exit(1)
return Index("%s/index" % dirname)
def getProbes(config):
'''
......@@ -45,6 +111,9 @@ def getProbes(config):
logger.info("Load probe internal dataset : %s" % s)
probes[s]=[p,{}]
else:
logger.info("No new probe set specified")
return probes
......@@ -80,51 +149,134 @@ def getAdapters(config):
return (p3,p5)
def getSeeds(index,config,extension=".omx"):
def getSeeds(index,config):
# looks if the internal blast was already run
output = config['orgasm']['outputfilename']
logger=config['orgasm']['logger']
kup=-1 if config['orgasm']['kup'] is None else config['orgasm']['kup']
clean=config['buildgraph']['clean']
forceseeds=config['buildgraph']['forceseeds']
probes = getProbes(config)
filename=output+extension
filename="%s.oas/assembling.omx" % output
if probes:
# --seeds option on the command line -> look for these seeds
logger.info("Running probes matching against reads...")
for probename in probes:
p = probes[probename][0]
logger.info(" -> probe set: %s" % probename)
seeds = index.lookForSeeds(p,
mincov=config['orgasm']['seedmincov'],
kup=kup,
identity=config['orgasm']['identity'],
logger=logger)
probes[probename][1]=seeds
logger.info("==> %d matches" % sum(len(seeds[i]) for i in seeds))
with open(filename,"wb") as fseeds:
pickle.dump(probes,fseeds)
else:
# no --seeds option on the command line -> load the previous results
#
# Check if the seeds are not correctly placed in the oas directory
#
oldfilename=output+'.omx'
if (not os.path.exists(filename) and
os.path.exists(oldfilename)):
if config['seeds']['reformat']:
os.renames(oldfilename, filename)
else:
logger.error('Seed matches are not stored following the new format')
logger.error('Run the oa seeds command with the --reformat option')
sys.exit(1)
#
# Look for seeds with the new probe sets
#
newprobes = getProbes(config)
#
# Load already run probe sets
#
reformated=[]
if not clean or not newprobes:
try:
with open(filename,'rb') as fseeds:
probes = pickle.load(fseeds)
logger.info("Load matches from previous run : %d probe sets restored" % len(probes))
if not isinstance(list(probes.values())[0][0], dict):
logger.error("Too old version of probes matches that cannot be reformated")
logger.error("Generate a new probe match set using the command oa seeds --seeds command")
logger.error("The old unsuable seed match has been erased")
os.remove(filename)
sys.exit(1)
oldversion=False
for probename in probes:
s = probes[probename][1]
if len(s[list(s.keys())[0]][0]) != 6:
if config['seeds']['reformat']:
oldversion=True
logger.warning("Old probe version save on the disk. Recomputes probes %s" % probename)
p = probes[probename][0]
logger.info(" -> probe set: %s" % probename)
seeds = index.lookForSeeds(p,
mincov=config['orgasm']['seedmincov'],
kup=kup,
identity=config['orgasm']['identity'],
logger=logger)
probes[probename][1]=seeds
reformated.append(probename)
logger.info("==> %d matches" % sum(len(seeds[i]) for i in seeds))
else:
logger.error('Seed matches are not stored following the new format')
logger.error('Run the oa seeds command with the --reformat option')
sys.exit(1)
if oldversion:
with open(filename,"wb") as fseeds:
pickle.dump(probes,fseeds)
nm=0
for k in probes:
for m in probes[k][1].values():
nm+=len(m)
logger.info(" ==> A total of : %d" % nm)
except FileNotFoundError:
logger.info("No --seeds option specified and not previous matches stored")
sys.exit(1)
logger.info("No previous matches loaded")
probes={}
else:
if os.path.exists(filename):
logger.info("Cleaning previous matches")
os.remove(filename)
probes={}
if newprobes:
# --seeds option on the command line -> look for these seeds
logger.info("Running probes matching against reads...")
for probename in newprobes:
p = newprobes[probename][0]
logger.info(" -> probe set: %s" % probename)
seeds = index.lookForSeeds(p,
mincov=config['orgasm']['seedmincov'],
kup=kup,
identity=config['orgasm']['identity'],
logger=logger)
nmatches = sum(len(seeds[i]) for i in seeds)
logger.info("==> %d matches" % nmatches)
if nmatches:
probes[probename]=[p,seeds]
newprobes = list(newprobes.keys())
if not probes:
logger.info("No --seeds option specified and not previous matches stored")
sys.exit(1)
if newprobes:
with open(filename,"wb") as fseeds:
pickle.dump(probes,fseeds)
logger.info("Match list :")
......@@ -147,7 +299,9 @@ def getSeeds(index,config,extension=".omx"):
if coverage > covmax:
covmax=coverage
if forceseeds:
newprobes=None
return covmax,probes
return covmax,probes,newprobes
#def reloadAssembling
\ No newline at end of file
#cython: language_level=3
cpdef buildArgumentParser(str configname, str softname)
\ No newline at end of file
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import argparse
import sys
from .command import getCommandsList
class ObiParser(argparse.ArgumentParser):
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
cpdef buildArgumentParser(str configname,
str softname):
parser = ObiParser()
parser.add_argument('--version', dest='%s:version' % configname,
action='store_true',
default=False,
help='Print the version of %s' % softname)
parser.add_argument('--log', dest='%s:log' % configname,
action='store',
type=str,
default=None,
help='Create a logfile')
parser.add_argument('--no-progress', dest='%s:progress' % configname,
action='store_false',
default=None,
help='Do not print the progress bar during analyzes')
subparsers = parser.add_subparsers(title='subcommands',
description='valid subcommands',
help='additional help')
commands = getCommandsList()
for c in commands:
module = commands[c]
if hasattr(module, "run"):
if hasattr(module, "__title__"):
sub = subparsers.add_parser(c,help=module.__title__)
else:
sub = subparsers.add_parser(c)
if hasattr(module, "addOptions"):
module.addOptions(sub)
sub.set_defaults(**{'%s:module' % configname : module})
return parser
#cython: language_level=3
cdef object loadCommand(str name,loader)
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import pkgutil
from .. import commands
cdef object loadCommand(str name,loader):
'''
Load a command module from its name and an ImpLoader
This function is for internal use
@param name: name of the module
@type name: str
@param loader: the module loader
@type loader: ImpLoader
@return the loaded module
@rtype: module
'''
module = loader.find_module(name).load_module(name)
return module
def getCommandsList():
'''
Returns the list of sub-commands available to the main `obi` command
@return: a dict instance with key corresponding to each command and
value corresponding to the module
@rtype: dict
'''
cdef dict cmds = dict((x[1],loadCommand(x[1],x[0]))
for x in pkgutil.iter_modules(commands.__path__)
if not x[2])
return cmds
#cython: language_level=3
cpdef str setRootConfigName(str rootname)
cpdef str getRootConfigName()
cdef dict buildDefaultConfiguration(str root_config_name,
dict config)
cpdef dict getConfiguration(str root_config_name=?,
dict config=?)
\ No newline at end of file
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import sys
from .command import getCommandsList
from .logging cimport getLogger
from .arguments cimport buildArgumentParser
from ..version import version
from _curses import version
cdef dict __default_config__ = {}
cpdef str setRootConfigName(str rootname):
global __default_config__
if '__root_config__' in __default_config__:
if __default_config__["__root_config__"] in __default_config__:
__default_config__[rootname]=__default_config__[__default_config__["__root_config__"]]
del __default_config__[__default_config__["__root_config__"]]
__default_config__['__root_config__']=rootname
return rootname
cpdef str getRootConfigName():
global __default_config__
return __default_config__.get('__root_config__',None)
cdef dict buildDefaultConfiguration(str root_config_name,
dict config):
global __default_config__
__default_config__.clear()
setRootConfigName(root_config_name)
__default_config__[root_config_name]=config
config['version']=version
commands = getCommandsList()
for c in commands:
module = commands[c]
assert hasattr(module, "run")
if hasattr(module, 'default_config'):
__default_config__[c]=module.default_config
else:
__default_config__[c]={}
return __default_config__
cpdef dict getConfiguration(str root_config_name="__default__",
dict config={}):
global __default_config__
if '__done__' in __def