Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
ORG.Asm
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
23
Issues
23
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
ORG.Asm
ORG.Asm
Commits
ae99d8d3
Commit
ae99d8d3
authored
Apr 11, 2016
by
Eric Coissac
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Renames the command sub-moduel to commands
parent
720501e7
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
1657 additions
and
0 deletions
+1657
-0
python/orgasm/commands/__init__.py
python/orgasm/commands/__init__.py
+0
-0
python/orgasm/commands/buildgraph.py
python/orgasm/commands/buildgraph.py
+596
-0
python/orgasm/commands/compact.py
python/orgasm/commands/compact.py
+72
-0
python/orgasm/commands/cutlow.py
python/orgasm/commands/cutlow.py
+97
-0
python/orgasm/commands/graphstats.py
python/orgasm/commands/graphstats.py
+95
-0
python/orgasm/commands/index.py
python/orgasm/commands/index.py
+604
-0
python/orgasm/commands/list.py
python/orgasm/commands/list.py
+25
-0
python/orgasm/commands/path.py
python/orgasm/commands/path.py
+97
-0
python/orgasm/commands/seeds.py
python/orgasm/commands/seeds.py
+71
-0
No files found.
python/orgasm/commands/__init__.py
0 → 100644
View file @
ae99d8d3
python/orgasm/commands/buildgraph.py
0 → 100644
View file @
ae99d8d3
This diff is collapsed.
Click to expand it.
python/orgasm/commands/compact.py
0 → 100644
View file @
ae99d8d3
'''
Created on 28 sept. 2014
@author: coissac
'''
import
orgasm.samples
from
orgasm
import
getIndex
,
getSeeds
,
getOutput
from
orgasm.tango
import
cutLowCoverage
,
cutSNPs
,
\
estimateDeadBrancheLength
,
estimateFragmentLength
,
\
genesincontig
,
scaffold
,
fillGaps
,
dumpGraph
,
restoreGraph
import
sys
__title__
=
"Recompact the assembling graph"
default_config
=
{
'seeds'
:
None
}
def
addOptions
(
parser
):
parser
.
add_argument
(
dest
=
'orgasm:indexfilename'
,
metavar
=
'index'
,
help
=
'index root filename (produced by the oa index command)'
)
parser
.
add_argument
(
dest
=
'orgasm:outputfilename'
,
metavar
=
'output'
,
nargs
=
'?'
,
default
=
None
,
help
=
'output prefix'
)
parser
.
add_argument
(
'--back'
,
dest
=
'orgasm:back'
,
type
=
int
,
action
=
'store'
,
default
=
None
,
help
=
'the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]'
)
def
run
(
config
):
logger
=
config
[
'orgasm'
][
'logger'
]
output
=
getOutput
(
config
)
r
=
getIndex
(
config
)
coverage
,
x
,
newprobes
=
getSeeds
(
r
,
config
)
asm
=
restoreGraph
(
output
+
'.oax'
,
r
,
x
)
meanlength
,
sdlength
=
estimateFragmentLength
(
asm
)
if
config
[
'orgasm'
][
'back'
]
is
not
None
:
back
=
config
[
'orgasm'
][
'back'
]
elif
config
[
'orgasm'
][
'back'
]
is
None
and
meanlength
is
not
None
:
back
=
int
(
meanlength
+
4
*
sdlength
)
if
back
>
500
:
back
=
500
else
:
back
=
300
if
meanlength
is
not
None
:
logger
.
info
(
"Fragment length estimated : %f pb (sd: %f)"
%
(
meanlength
,
sdlength
))
cg
=
asm
.
compactAssembling
(
verbose
=
False
)
logger
.
info
(
"Scaffold the assembly"
)
scaffold
(
asm
,
cg
,
minlink
=
5
,
back
=
int
(
back
),
addConnectedLink
=
False
)
genesincontig
(
cg
,
r
,
x
)
with
open
(
output
+
'.gml'
,
'w'
)
as
gmlfile
:
print
(
cg
.
gml
(),
file
=
gmlfile
)
python/orgasm/commands/cutlow.py
0 → 100644
View file @
ae99d8d3
'''
Created on 28 sept. 2014
@author: coissac
'''
from
orgasm
import
getOutput
,
getIndex
,
getSeeds
from
orgasm.tango
import
restoreGraph
,
estimateFragmentLength
,
genesincontig
,
\
scaffold
,
cutLowCoverage
,
estimateDeadBrancheLength
,
dumpGraph
__title__
=
"Cut low coverage edge in an assembling graph"
default_config
=
{
'coverage'
:
None
,
'smallbranches'
:
None
}
def
addOptions
(
parser
):
parser
.
add_argument
(
dest
=
'orgasm:indexfilename'
,
metavar
=
'index'
,
help
=
'index root filename (produced by the oa index command)'
)
parser
.
add_argument
(
dest
=
'orgasm:outputfilename'
,
metavar
=
'output'
,
nargs
=
'?'
,
default
=
None
,
help
=
'output prefix'
)
parser
.
add_argument
(
'--coverage'
,
dest
=
'cutlow:coverage'
,
required
=
True
,
type
=
int
,
action
=
'store'
,
default
=
None
,
help
=
'All edges with a coverage below this value will be deleted'
)
parser
.
add_argument
(
'--smallbranches'
,
dest
=
'cutlow:smallbranches'
,
type
=
int
,
action
=
'store'
,
default
=
None
,
help
=
'maximum length of the branches to cut during '
'the cleaning process [default: <estimated>]'
)
parser
.
add_argument
(
'--back'
,
dest
=
'orgasm:back'
,
type
=
int
,
action
=
'store'
,
default
=
None
,
help
=
'the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]'
)
def
run
(
config
):
logger
=
config
[
'orgasm'
][
'logger'
]
output
=
getOutput
(
config
)
r
=
getIndex
(
config
)
xxx
,
x
,
newprobes
=
getSeeds
(
r
,
config
)
asm
=
restoreGraph
(
output
+
'.oax'
,
r
,
x
)
logger
.
info
(
"Evaluate fragment length"
)
meanlength
,
sdlength
=
estimateFragmentLength
(
asm
)
if
meanlength
is
not
None
:
logger
.
info
(
"Fragment length estimated : %f pb (sd: %f)"
%
(
meanlength
,
sdlength
))
if
config
[
'orgasm'
][
'back'
]
is
not
None
:
back
=
config
[
'orgasm'
][
'back'
]
elif
config
[
'orgasm'
][
'back'
]
is
None
and
meanlength
is
not
None
:
back
=
int
(
meanlength
+
4
*
sdlength
)
if
back
>
500
:
back
=
500
else
:
back
=
300
logger
.
info
(
"Cut low coverage"
)
cutLowCoverage
(
asm
,
config
[
'cutlow'
][
'coverage'
],
terminal
=
False
)
if
config
[
'cutlow'
][
'smallbranches'
]
is
not
None
:
smallbranches
=
config
[
'cutlow'
][
'smallbranches'
]
else
:
smallbranches
=
estimateDeadBrancheLength
(
asm
)
logger
.
info
(
"Dead branch length setup to : %d bp"
%
smallbranches
)
asm
.
cleanDeadBranches
(
maxlength
=
smallbranches
)
cg
=
asm
.
compactAssembling
(
verbose
=
False
)
genesincontig
(
cg
,
r
,
x
)
scaffold
(
asm
,
cg
,
minlink
=
5
,
back
=
int
(
back
),
addConnectedLink
=
False
)
with
open
(
output
+
'.gml'
,
'w'
)
as
gmlfile
:
print
(
cg
.
gml
(),
file
=
gmlfile
)
dumpGraph
(
output
+
'.oax'
,
asm
)
python/orgasm/commands/graphstats.py
0 → 100644
View file @
ae99d8d3
'''
Created on 28 sept. 2014
@author: coissac
'''
from
orgasm
import
getOutput
,
getIndex
,
getSeeds
from
orgasm.tango
import
restoreGraph
,
estimateFragmentLength
,
genesincontig
,
\
scaffold
,
selectGoodComponent
__title__
=
"Print some statistics about the assembling graph"
default_config
=
{
}
def
addOptions
(
parser
):
parser
.
add_argument
(
dest
=
'orgasm:indexfilename'
,
metavar
=
'index'
,
help
=
'index root filename (produced by the oa index command)'
)
parser
.
add_argument
(
dest
=
'orgasm:outputfilename'
,
metavar
=
'<output>'
,
nargs
=
'?'
,
default
=
None
,
help
=
'output prefix'
)
parser
.
add_argument
(
'--back'
,
dest
=
'orgasm:back'
,
metavar
=
'<insert size>'
,
type
=
int
,
action
=
'store'
,
default
=
None
,
help
=
'the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]'
)
def
run
(
config
):
logger
=
config
[
'orgasm'
][
'logger'
]
output
=
getOutput
(
config
)
r
=
getIndex
(
config
)
ecoverage
,
x
,
newprobes
=
getSeeds
(
r
,
config
)
asm
=
restoreGraph
(
output
+
'.oax'
,
r
,
x
)
logger
.
info
(
"Evaluate fragment length"
)
meanlength
,
sdlength
=
estimateFragmentLength
(
asm
)
if
config
[
'orgasm'
][
'back'
]
is
not
None
:
back
=
config
[
'orgasm'
][
'back'
]
elif
config
[
'orgasm'
][
'back'
]
is
None
and
meanlength
is
not
None
:
back
=
int
(
meanlength
+
4
*
sdlength
)
if
back
>
500
:
back
=
500
else
:
back
=
300
cg
=
asm
.
compactAssembling
(
verbose
=
False
)
genesincontig
(
cg
,
r
,
x
)
scaffold
(
asm
,
cg
,
minlink
=
5
,
back
=
int
(
back
),
addConnectedLink
=
False
)
ccs
=
list
(
cg
.
connectedComponentIterator
())
gcc
=
selectGoodComponent
(
cg
)
gnode
=
set
()
for
cc
in
gcc
:
for
e
in
cc
:
gnode
.
add
(
e
[
0
])
gnode
.
add
(
e
[
1
])
ucc
=
set
()
for
cc
in
ccs
:
ccc
=
frozenset
([
-
x
for
x
in
cc
])
if
ccc
not
in
ucc
:
ucc
.
add
(
frozenset
(
cc
))
output
=
open
(
output
+
".stats"
,
"w"
)
print
(
"AssembledBasePairs:"
,
len
(
asm
)
/
2
,
file
=
output
)
print
(
"TotalConnectedComponents:"
,
len
(
ccs
),
file
=
output
)
print
(
"UniqueConnectedComponents:"
,
len
(
ucc
),
file
=
output
)
print
(
"GoodConnectedComponents:"
,
len
(
ucc
),
file
=
output
)
print
(
"CompactNodes:"
,
len
(
cg
),
file
=
output
)
print
(
"GoodCompactNodes:"
,
len
(
gnode
),
file
=
output
)
print
(
"CompactEdges:"
,
cg
.
edgeCount
(),
file
=
output
)
print
(
"GoodCompactEdges:"
,
sum
(
len
(
x
)
for
x
in
gcc
),
file
=
output
)
print
(
"FragmentMeanLength:"
,
meanlength
,
file
=
output
)
print
(
"FragmentSdLength:"
,
sdlength
,
file
=
output
)
python/orgasm/commands/index.py
0 → 100644
View file @
ae99d8d3
'''
Created on 28 sept. 2014
@author: coissac
'''
from
subprocess
import
Popen
from
tempfile
import
mkdtemp
from
tempfile
import
mktemp
from
shutil
import
rmtree
from
urllib.request
import
urlopen
import
atexit
import
os
import
os.path
import
sys
import
re
from
orgasm.utils.dna
import
reverseComplement
# @UnresolvedImport
import
zlib
from
collections
import
Counter
,
deque
from
sys
import
stderr
from
orgasm.indexer
import
Index
__title__
=
"Index a set of reads"
default_config
=
{
'reformat'
:
False
,
'single'
:
False
,
'forward'
:
None
,
'reverse'
:
None
,
'maxread'
:
None
,
'length'
:
None
,
'estimate'
:
None
,
'fasta'
:
False
,
'ffasta'
:
False
,
'rfasta'
:
False
,
'nopipe'
:
False
,
'checkpairs'
:
False
,
'mate'
:
False
,
'checkids'
:
False
}
def
addOptions
(
parser
):
parser
.
add_argument
(
dest
=
'orgasm:indexfilename'
,
metavar
=
'index'
,
help
=
'name of the produced index'
)
parser
.
add_argument
(
dest
=
'index:forward'
,
metavar
=
'forward'
,
nargs
=
'?'
,
default
=
None
,
help
=
'Filename of the forward reads'
)
parser
.
add_argument
(
dest
=
'index:reverse'
,
metavar
=
'reverse'
,
nargs
=
'?'
,
default
=
None
,
help
=
'Filename of the reverse reads'
)
parser
.
add_argument
(
"--reformat"
,
dest
=
"index:reformat"
,
action
=
'store_true'
,
default
=
None
,
help
=
'Asks for reformatting an old sequence index to the new format'
)
parser
.
add_argument
(
'--single'
,
dest
=
'index:single'
,
action
=
'store_true'
,
default
=
None
,
help
=
'Single read mode'
)
parser
.
add_argument
(
'--mate-pairs'
,
dest
=
'index:mate'
,
action
=
'store_true'
,
default
=
None
,
help
=
'Mate pair library mode'
)
parser
.
add_argument
(
'--check-ids'
,
dest
=
'index:checkids'
,
action
=
'store_true'
,
default
=
False
,
help
=
'Checks that forward and reverse ids are identical'
)
parser
.
add_argument
(
'--max-read'
,
dest
=
'index:maxread'
,
type
=
int
,
action
=
'store'
,
default
=
None
,
help
=
'the count of million of reads to '
'index (default the full file)'
)
parser
.
add_argument
(
'--length'
,
dest
=
'index:length'
,
type
=
int
,
action
=
'store'
,
default
=
None
,
help
=
'The length of the read to index '
' (default indexed length is estimated from the first read)'
)
parser
.
add_argument
(
'--estimate-length'
,
dest
=
'index:estimate'
,
metavar
=
'FRACTION'
,
type
=
float
,
action
=
'store'
,
default
=
None
,
help
=
'Estimate the length to index for conserving FRACTION '
'of the overall dataset'
)
parser
.
add_argument
(
'--fasta'
,
dest
=
'index:fasta'
,
action
=
'store_true'
,
default
=
None
,
help
=
'forward and reverse sequence files are fasta formated'
)
parser
.
add_argument
(
'--no-pipe'
,
dest
=
'index:nopipe'
,
action
=
'store_true'
,
default
=
None
,
help
=
'do not use pipe but temp files instead'
)
parser
.
add_argument
(
'--forward-fasta'
,
dest
=
'index:ffasta'
,
action
=
'store_true'
,
default
=
False
,
help
=
'forward file is a fasta file'
)
parser
.
add_argument
(
'--check-pairing'
,
dest
=
'index:checkpairs'
,
action
=
'store_true'
,
default
=
False
,
help
=
'ensure that forward and reverse files are correctly paired'
)
parser
.
add_argument
(
'--reverse-fasta'
,
dest
=
'index:rfasta'
,
action
=
'store_true'
,
default
=
False
,
help
=
'reverse file is a fasta file'
)
tmpdir
=
[]
# @atexit.register
# def cleanup():
# try:
# os.unlink(FIFO)
# except:
# pass
def
reformatOldIndex
(
config
):
logger
=
config
[
'orgasm'
][
'logger'
]
output
=
config
[
'orgasm'
][
'indexfilename'
]
if
not
(
os
.
path
.
exists
(
'%s.ofx'
%
output
)
and
os
.
path
.
exists
(
'%s.ogx'
%
output
)
and
os
.
path
.
exists
(
'%s.opx'
%
output
)
and
os
.
path
.
exists
(
'%s.orx'
%
output
)
):
logger
.
error
(
'The %s index does not exist or is not complete'
%
output
)
sys
.
exit
(
1
)
dirname
=
'%s.odx'
%
output
if
not
os
.
path
.
exists
(
dirname
):
os
.
makedirs
(
dirname
)
os
.
rename
(
'%s.ofx'
%
output
,
'%s/index.ofx'
%
dirname
)
os
.
rename
(
'%s.ogx'
%
output
,
'%s/index.ogx'
%
dirname
)
os
.
rename
(
'%s.opx'
%
output
,
'%s/index.opx'
%
dirname
)
os
.
rename
(
'%s.orx'
%
output
,
'%s/index.orx'
%
dirname
)
sys
.
exit
(
0
)
def
postponerm
(
filename
):
def
trigger
():
print
(
"Deleting tmp file : %s"
%
filename
,
file
=
sys
.
stderr
)
try
:
os
.
unlink
(
filename
)
except
:
pass
return
trigger
def
postponermdir
(
filename
):
def
trigger
():
print
(
"Deleting tmp directory : %s"
%
filename
,
file
=
sys
.
stderr
)
try
:
rmtree
(
filename
)
except
:
pass
return
trigger
def
getTmpDir
():
global
tmpdir
if
not
tmpdir
:
tmpdir
.
append
(
mkdtemp
())
atexit
.
register
(
postponermdir
(
tmpdir
[
0
]))
return
tmpdir
[
0
]
def
allopen
(
filename
):
try
:
f
=
urlopen
(
filename
)
except
:
f
=
open
(
filename
,
newline
=
None
)
return
f
def
ungzip
(
filename
,
nopipe
):
tmpdir
=
getTmpDir
()
fifo
=
mktemp
(
prefix
=
"unziped-"
,
dir
=
tmpdir
)
command
=
"gzip -d -c %s > %s"
%
(
filename
,
fifo
)
if
nopipe
:
os
.
system
(
command
)
else
:
os
.
mkfifo
(
fifo
)
process
=
Popen
(
command
,
# @UnusedVariable
shell
=
True
,
stderr
=
open
(
'/dev/null'
,
'w'
))
atexit
.
register
(
postponerm
(
fifo
))
return
fifo
def
unbzip
(
filename
,
nopipe
):
tmpdir
=
getTmpDir
()
fifo
=
mktemp
(
prefix
=
"unziped-"
,
dir
=
tmpdir
)
command
=
"bzip2 -d -c %s > %s"
%
(
filename
,
fifo
)
atexit
.
register
(
postponerm
(
fifo
))
if
nopipe
:
os
.
system
(
command
)
else
:
os
.
mkfifo
(
fifo
)
process
=
Popen
(
command
,
# @UnusedVariable
shell
=
True
,
stderr
=
open
(
'/dev/null'
,
'w'
))
return
fifo
def
readFasta
(
filename
):
if
isinstance
(
filename
,
str
):
filename
=
open
(
filename
)
seq
=
[]
sid
=
""
seqid
=
0
for
line
in
filename
:
if
line
[
0
]
==
">"
:
if
seqid
>
0
:
seq
=
''
.
join
(
seq
)
yield
(
sid
,
bytes
(
seq
,
encoding
=
'latin1'
))
sid
=
line
[
1
:].
split
(
None
,
1
)[
0
].
rsplit
(
'/'
,
1
)[
0
].
strip
()
seqid
+=
1
seq
=
[]
else
:
seq
.
append
(
line
.
strip
().
upper
())
seq
=
''
.
join
(
seq
)
yield
(
sid
,
bytes
(
seq
,
encoding
=
'latin1'
))
def
readFastq
(
filename
):
if
isinstance
(
filename
,
str
):
filename
=
open
(
filename
)
seq
=
[]
seqid
=
0
lseq
=
0
sid
=
""
for
line
in
filename
:
if
line
[
0
]
==
"@"
:
if
seqid
>
0
:
seq
=
''
.
join
(
seq
)
yield
(
sid
,
bytes
(
seq
,
encoding
=
'ascii'
))
sid
=
line
[
1
:].
split
(
None
,
1
)[
0
].
rsplit
(
'/'
,
1
)[
0
].
strip
()
seqid
+=
1
lseq
=
0
seq
=
[]
elif
line
[
0
]
!=
'+'
:
seq
.
append
(
line
.
strip
().
upper
())
lseq
+=
1
else
:
for
i
in
range
(
lseq
):
# @UnusedVariable
next
(
filename
)
seq
=
''
.
join
(
seq
)
yield
(
sid
,
bytes
(
seq
,
encoding
=
'latin1'
))
def
acgtCut
(
seqs
):
acgt
=
re
.
compile
(
b
'[ACGT]+'
)
for
s
in
seqs
:
m
=
acgt
.
findall
(
s
[
1
])
bs
=
''
for
ss
in
m
:
if
len
(
ss
)
>
len
(
bs
):
bs
=
ss
yield
(
s
[
0
],
bs
)
def
lengthStats
(
seqs
):
store
=
[]
def
seqIterator
():
for
s
in
store
:
# print(s)
if
s
[
1
]
is
not
None
:
yield
(
s
[
0
],
zlib
.
decompress
(
s
[
1
]))
else
:
yield
(
s
[
0
],
b
''
)
def
reader
():
for
s
in
seqs
:
# print(s)
if
len
(
s
[
1
]):
store
.
append
((
s
[
0
],
zlib
.
compress
(
s
[
1
])))
else
:
store
.
append
((
s
[
0
],
None
))
yield
len
(
s
[
1
])
stats
=
Counter
(
reader
())
return
stats
,
seqIterator
()
def
formatFastq
(
seq
):
fastq
=
'@{id:0>7}
\n
{seq}
\n
+
\n
{qual}'
return
fastq
.
format
(
id
=
seq
[
0
],
seq
=
seq
[
1
].
decode
(
'ascii'
),
qual
=
'0'
*
len
(
seq
[
1
]))
def
singleToPairedEnd
(
seqs
,
length
):
for
s
in
seqs
:
f
=
s
[
1
][
0
:
length
]
r
=
reverseComplement
(
s
[
1
][
-
length
:])