Commit ea136ec4 by Eric Coissac

Add code to manage the noprobe option

Signed-off-by: Eric Coissac <eric.coissac@metabarcoding.org>
parent 262c9298
......@@ -84,7 +84,8 @@ def getIndex(config):
return Index("%s/index" % dirname)
def getProbes(config):
def getProbes(config,noprobe=False):
'''
According to the configuration file and the --seeds option this function return the set
of sequence probes to use
......@@ -96,7 +97,12 @@ def getProbes(config):
@rtype: dict
'''
logger=config['orgasm']['logger']
seeds =config['orgasm']['seeds']
if noprobe:
seeds =config['orgasm']['noseeds']
else:
seeds =config['orgasm']['seeds']
probes = {}
......@@ -157,7 +163,6 @@ def getSeeds(index,config):
clean=config['buildgraph']['clean']
forceseeds=config['buildgraph']['forceseeds']
filename="%s.oas/assembling.omx" % output
#
......@@ -180,7 +185,8 @@ def getSeeds(index,config):
# Look for seeds with the new probe sets
#
newprobes = getProbes(config)
newprobes = getProbes(config,noprobe=False)
newnoprobes= getProbes(config,noprobe=True)
#
# Load already run probe sets
......@@ -266,15 +272,52 @@ def getSeeds(index,config):
if nmatches:
probes[probename]=[p,seeds]
noprobes = set()
if newnoprobes:
# --seeds option on the command line -> look for these seeds
logger.info("Running no-probes matching against reads...")
for probename in newnoprobes:
p = newnoprobes[probename][0]
logger.info(" -> probe set: %s" % probename)
seeds = index.lookForSeeds(p,
mincov=config['orgasm']['seedmincov'],
kup=kup,
identity=config['orgasm']['identity'],
logger=logger)
nmatches = sum(len(seeds[i]) for i in seeds)
logger.info("==> %d matches" % nmatches)
if nmatches:
for vs in seeds.values():
noprobes|=set([x[0] for x in vs])
if noprobes:
logger.info("Removing no-probes matches...")
for s in probes:
print(s)
for p in probes[s][1]:
probes[s][1][p]=[x for x in probes[s][1][p] if x[0] not in noprobes]
if not probes[s][1][p]:
del probes[s][1][p]
if not probes[s][1]:
del probes[s]
logger.info("Done.")
newprobes = list(newprobes.keys())
newnoprobes = list(newnoprobes.keys())
if not probes:
logger.info("No --seeds option specified and not previous matches stored")
sys.exit(1)
if newprobes:
if newprobes or newnoprobes:
with open(filename,"wb") as fseeds:
pickle.dump(probes,fseeds)
......
......@@ -147,6 +147,17 @@ def addOptions(parser):
'among %s' % str(list(filter(lambda s: s.startswith('prot') or
s.startswith('nuc'),dir(orgasm.samples)))))
parser.add_argument('--no-seeds', dest ='orgasm:noseeds',
metavar='no-seeds',
action='append',
default=[],
type=str,
help='protein or nucleic probes that will be used to counterselect seeds; '
'either a fasta file containing '
'probe sequences or the name of one of the internal set of seeds '
'among %s' % str(list(filter(lambda s: s.startswith('prot') or
s.startswith('nuc'),dir(orgasm.samples)))))
parser.add_argument('--kup', dest='orgasm:kup',
type=int,
action='store',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment