Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
OBITools3
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
23
Issues
23
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
OBITools
OBITools3
Commits
2ba6d161
Commit
2ba6d161
authored
Jul 28, 2018
by
Celine Mercier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
New command: obi ecopcr
parent
275d85dc
Changes
24
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
24 changed files
with
4523 additions
and
1 deletion
+4523
-1
python/obitools3/commands/ecopcr.pyx
python/obitools3/commands/ecopcr.pyx
+188
-0
python/obitools3/dms/capi/obiecopcr.pxd
python/obitools3/dms/capi/obiecopcr.pxd
+28
-0
python/obitools3/dms/dms.cfiles
python/obitools3/dms/dms.cfiles
+9
-0
python/obitools3/utils.cfiles
python/obitools3/utils.cfiles
+9
-0
src/libecoPCR/ecoError.c
src/libecoPCR/ecoError.c
+26
-0
src/libecoPCR/ecoMalloc.c
src/libecoPCR/ecoMalloc.c
+79
-0
src/libecoPCR/ecoPCR.h
src/libecoPCR/ecoPCR.h
+128
-0
src/libecoPCR/ecoapat.c
src/libecoPCR/ecoapat.c
+205
-0
src/libecoPCR/ecodna.c
src/libecoPCR/ecodna.c
+156
-0
src/libecoPCR/libapat/CODES/dft_code.h
src/libecoPCR/libapat/CODES/dft_code.h
+14
-0
src/libecoPCR/libapat/CODES/dna_code.h
src/libecoPCR/libapat/CODES/dna_code.h
+71
-0
src/libecoPCR/libapat/CODES/prot_code.h
src/libecoPCR/libapat/CODES/prot_code.h
+51
-0
src/libecoPCR/libapat/Gmach.h
src/libecoPCR/libapat/Gmach.h
+97
-0
src/libecoPCR/libapat/Gtypes.h
src/libecoPCR/libapat/Gtypes.h
+70
-0
src/libecoPCR/libapat/apat.h
src/libecoPCR/libapat/apat.h
+176
-0
src/libecoPCR/libapat/apat_parse.c
src/libecoPCR/libapat/apat_parse.c
+369
-0
src/libecoPCR/libapat/apat_search.c
src/libecoPCR/libapat/apat_search.c
+339
-0
src/libecoPCR/libapat/libstki.c
src/libecoPCR/libapat/libstki.c
+380
-0
src/libecoPCR/libapat/libstki.h
src/libecoPCR/libapat/libstki.h
+89
-0
src/libecoPCR/libthermo/nnparams.c
src/libecoPCR/libthermo/nnparams.c
+619
-0
src/libecoPCR/libthermo/nnparams.h
src/libecoPCR/libthermo/nnparams.h
+62
-0
src/obi_ecopcr.c
src/obi_ecopcr.c
+1229
-0
src/obi_ecopcr.h
src/obi_ecopcr.h
+127
-0
src/obierrno.h
src/obierrno.h
+2
-1
No files found.
python/obitools3/commands/ecopcr.pyx
0 → 100644
View file @
2ba6d161
#cython: language_level=3
from
obitools3.apps.progress
cimport
ProgressBar
# @UnresolvedImport
from
obitools3.dms.dms
cimport
DMS
from
obitools3.dms.capi.obidms
cimport
OBIDMS_p
from
obitools3.dms.view
import
RollbackException
from
obitools3.dms.capi.obiecopcr
cimport
obi_ecopcr
from
obitools3.apps.optiongroups
import
addSequenceInputOption
,
addMinimalOutputOption
,
addTaxonomyInputOption
from
obitools3.uri.decode
import
open_uri
from
obitools3.apps.config
import
logger
from
obitools3.utils
cimport
tobytes
from
obitools3.dms.view.typed_view.view_NUC_SEQS
cimport
View_NUC_SEQS
from
libc.stdlib
cimport
malloc
,
free
from
libc.stdint
cimport
int32_t
__title__
=
"in silico PCR"
# TODO: add option to output unique ids
def
addOptions
(
parser
):
addSequenceInputOption
(
parser
)
addMinimalOutputOption
(
parser
)
addTaxonomyInputOption
(
parser
)
group
=
parser
.
add_argument_group
(
'obi ecopcr specific options'
)
group
.
add_argument
(
'--primer1'
,
'-F'
,
action
=
"store"
,
dest
=
"ecopcr:primer1"
,
metavar
=
'<PRIMER>'
,
type
=
str
,
help
=
"Forward primer."
)
group
.
add_argument
(
'--primer2'
,
'-R'
,
action
=
"store"
,
dest
=
"ecopcr:primer2"
,
metavar
=
'<PRIMER>'
,
type
=
str
,
help
=
"Reverse primer."
)
group
.
add_argument
(
'--error'
,
'-e'
,
action
=
"store"
,
dest
=
"ecopcr:error"
,
metavar
=
'<ERROR>'
,
default
=
0
,
type
=
int
,
help
=
"Maximum number of errors (mismatches) allowed per primer. Default: 0."
)
group
.
add_argument
(
'--min-length'
,
'-l'
,
action
=
"store"
,
dest
=
"ecopcr:min-length"
,
metavar
=
"<MINIMUM LENGTH>"
,
type
=
int
,
default
=
0
,
help
=
"Minimum length of the in silico amplified DNA fragment, excluding primers."
)
group
.
add_argument
(
'--max-length'
,
'-L'
,
action
=
"store"
,
dest
=
"ecopcr:max-length"
,
metavar
=
"<MAXIMUM LENGTH>"
,
type
=
int
,
default
=
0
,
help
=
"Maximum length of the in silico amplified DNA fragment, excluding primers."
)
group
.
add_argument
(
'--restrict-to-taxid'
,
'-r'
,
action
=
"append"
,
dest
=
"ecopcr:restrict-to-taxid"
,
metavar
=
"<TAXID>"
,
type
=
int
,
default
=
[],
help
=
"Only the sequence records corresponding to the taxonomic group identified "
"by TAXID are considered for the in silico PCR. The TAXID is an integer "
"that can be found in the NCBI taxonomic database."
)
group
.
add_argument
(
'--ignore-taxid'
,
'-i'
,
action
=
"append"
,
dest
=
"ecopcr:ignore-taxid"
,
metavar
=
"<TAXID>"
,
type
=
int
,
default
=
[],
help
=
"The sequences of the taxonomic group identified by TAXID are not considered for the in silico PCR."
)
group
.
add_argument
(
'--circular'
,
'-c'
,
action
=
"store_true"
,
dest
=
"ecopcr:circular"
,
default
=
False
,
help
=
"Considers that the input sequences are circular (e.g. mitochondrial or chloroplastic DNA)."
)
group
.
add_argument
(
'--salt-concentration'
,
'-a'
,
action
=
"store"
,
dest
=
"ecopcr:salt-concentration"
,
metavar
=
"<FLOAT>"
,
type
=
float
,
default
=
0.05
,
help
=
"Salt concentration used for estimating the Tm. Default: 0.05."
)
group
.
add_argument
(
'--salt-correction-method'
,
'-m'
,
action
=
"store"
,
dest
=
"ecopcr:salt-correction-method"
,
metavar
=
"<1|2>"
,
type
=
int
,
default
=
1
,
help
=
"Defines the method used for estimating the Tm (melting temperature) between the primers and their corresponding "
"target sequences. SANTALUCIA: 1, or OWCZARZY: 2. Default: 1."
)
group
.
add_argument
(
'--keep-nucs'
,
'-D'
,
action
=
"store"
,
dest
=
"ecopcr:keep-nucs"
,
metavar
=
"<INTEGER>"
,
type
=
int
,
default
=
0
,
help
=
"Keeps the specified number of nucleotides on each side of the in silico amplified sequences, "
"(already including the amplified DNA fragment plus the two target sequences of the primers)."
)
group
.
add_argument
(
'--kingdom-mode'
,
'-k'
,
action
=
"store_true"
,
dest
=
"ecopcr:kingdom-mode"
,
default
=
False
,
help
=
"Print in the output the kingdom of the in silico amplified sequences (default: print the superkingdom)."
)
def
run
(
config
):
cdef
int32_t
*
restrict_to_taxids_p
=
NULL
cdef
int32_t
*
ignore_taxids_p
=
NULL
restrict_to_taxids_len
=
len
(
config
[
'ecopcr'
][
'restrict-to-taxid'
])
restrict_to_taxids_p
=
<
int32_t
*>
malloc
((
restrict_to_taxids_len
+
1
)
*
sizeof
(
int32_t
))
# +1 for the -1 flagging the end of the array
for
i
in
range
(
restrict_to_taxids_len
)
:
restrict_to_taxids_p
[
i
]
=
config
[
'ecopcr'
][
'restrict-to-taxid'
][
i
]
restrict_to_taxids_p
[
restrict_to_taxids_len
]
=
-
1
ignore_taxids_len
=
len
(
config
[
'ecopcr'
][
'ignore-taxid'
])
ignore_taxids_p
=
<
int32_t
*>
malloc
((
ignore_taxids_len
+
1
)
*
sizeof
(
int32_t
))
# +1 for the -1 flagging the end of the array
for
i
in
range
(
ignore_taxids_len
)
:
ignore_taxids_p
[
i
]
=
config
[
'ecopcr'
][
'ignore-taxid'
][
i
]
ignore_taxids_p
[
ignore_taxids_len
]
=
-
1
DMS
.
obi_atexit
()
logger
(
"info"
,
"obi ecopcr"
)
# TODO Bad URI reading because current one is not adapted
# Get input DMS path
i_dms_name
=
config
[
'obi'
][
'inputURI'
].
split
(
'/'
)[
0
]
# Read the name of the input view
i_uri
=
config
[
'obi'
][
'inputURI'
].
split
(
'/'
)
i_view_name
=
i_uri
[
1
]
# Read the name of the output view
o_uri
=
config
[
'obi'
][
'outputURI'
].
split
(
'/'
)
if
len
(
o_uri
)
==
2
:
# Get output DMS path
o_dms_name
=
o_uri
[
0
]
o_view_name
=
o_uri
[
1
]
else
:
o_dms_name
=
i_dms_name
o_view_name
=
o_uri
[
0
]
o_dms
=
open_uri
(
o_dms_name
,
input
=
False
)[
0
]
# Read taxonomy name
taxonomy_name
=
config
[
'obi'
][
'taxoURI'
].
split
(
'/'
)[
2
]
# TODO: input DMS, taxonomy and primers in comments
if
obi_ecopcr
(
tobytes
(
i_dms_name
),
tobytes
(
i_view_name
),
tobytes
(
taxonomy_name
),
\
tobytes
(
o_dms_name
),
tobytes
(
o_view_name
),
b
"ecopcr"
,
\
tobytes
(
config
[
'ecopcr'
][
'primer1'
]),
tobytes
(
config
[
'ecopcr'
][
'primer2'
]),
\
config
[
'ecopcr'
][
'error'
],
\
config
[
'ecopcr'
][
'min-length'
],
config
[
'ecopcr'
][
'max-length'
],
\
restrict_to_taxids_p
,
ignore_taxids_p
,
\
config
[
'ecopcr'
][
'circular'
],
config
[
'ecopcr'
][
'salt-concentration'
],
config
[
'ecopcr'
][
'salt-correction-method'
],
\
config
[
'ecopcr'
][
'keep-nucs'
],
config
[
'ecopcr'
][
'kingdom-mode'
])
<
0
:
raise
Exception
(
"Error running ecopcr"
)
free
(
restrict_to_taxids_p
)
free
(
ignore_taxids_p
)
print
(
"
\n
"
)
print
(
repr
(
o_dms
[
o_view_name
]))
o_dms
.
close
()
python/obitools3/dms/capi/obiecopcr.pxd
0 → 100644
View file @
2ba6d161
#cython: language_level=3
from
obitools3.dms.capi.obidms
cimport
OBIDMS_p
from
libc.stdint
cimport
int32_t
cdef
extern
from
"obi_ecopcr.h"
nogil
:
int
obi_ecopcr
(
const
char
*
input_dms_name
,
const
char
*
i_view_name
,
const
char
*
taxonomy_name
,
const
char
*
output_dms_name
,
const
char
*
o_view_name
,
const
char
*
o_view_comments
,
const
char
*
primer1
,
const
char
*
primer2
,
int
error_max
,
int
min_len
,
int
max_len
,
int32_t
*
restrict_to_taxids
,
int32_t
*
ignore_taxids
,
int
circular
,
double
salt_concentration
,
int
salt_correction_method
,
int
keep_nucleotides
,
bint
kingdom_mode
)
python/obitools3/dms/dms.cfiles
View file @
2ba6d161
...
...
@@ -9,6 +9,15 @@
../../../src/murmurhash2.c
../../../src/obi_align.c
../../../src/obi_clean.c
../../../src/obi_ecopcr.c
../../../src/libecoPCR/libthermo/nnparams.c
../../../src/libecoPCR/libapat/apat_parse.c
../../../src/libecoPCR/libapat/apat_search.c
../../../src/libecoPCR/libapat/libstki.c
../../../src/libecoPCR/ecoapat.c
../../../src/libecoPCR/ecodna.c
../../../src/libecoPCR/ecoError.c
../../../src/libecoPCR/ecoMalloc.c
../../../src/obiavl.c
../../../src/obiblob_indexer.c
../../../src/obiblob.c
...
...
python/obitools3/utils.cfiles
View file @
2ba6d161
...
...
@@ -9,6 +9,15 @@
../../src/murmurhash2.c
../../src/obi_align.c
../../src/obi_clean.c
../../src/obi_ecopcr.c
../../src/libecoPCR/libthermo/nnparams.c
../../src/libecoPCR/libapat/apat_parse.c
../../src/libecoPCR/libapat/apat_search.c
../../src/libecoPCR/libapat/libstki.c
../../src/libecoPCR/ecoapat.c
../../src/libecoPCR/ecodna.c
../../src/libecoPCR/ecoError.c
../../src/libecoPCR/ecoMalloc.c
../../src/obiavl.c
../../src/obiblob_indexer.c
../../src/obiblob.c
...
...
src/libecoPCR/ecoError.c
0 → 100644
View file @
2ba6d161
#include "ecoPCR.h"
#include <stdio.h>
#include <stdlib.h>
/*
* print the message given as argument and exit the program
* @param error error number
* @param message the text explaining what's going on
* @param filename the file source where the program failed
* @param linenumber the line where it has failed
* filename and linenumber are written at pre-processing
* time by a macro
*/
void
ecoError
(
int32_t
error
,
const
char
*
message
,
const
char
*
filename
,
int
linenumber
)
{
fprintf
(
stderr
,
"Error %d in file %s line %d : %s
\n
"
,
error
,
filename
,
linenumber
,
message
);
abort
();
}
src/libecoPCR/ecoMalloc.c
0 → 100644
View file @
2ba6d161
#include "ecoPCR.h"
#include <stdlib.h>
static
int
eco_log_malloc
=
0
;
void
eco_trace_memory_allocation
()
{
eco_log_malloc
=
1
;
}
void
eco_untrace_memory_allocation
()
{
eco_log_malloc
=
0
;
}
void
*
eco_malloc
(
int32_t
chunksize
,
const
char
*
error_message
,
const
char
*
filename
,
int32_t
line
)
{
void
*
chunk
;
chunk
=
calloc
(
1
,
chunksize
);
if
(
!
chunk
)
ecoError
(
ECO_MEM_ERROR
,
error_message
,
filename
,
line
);
if
(
eco_log_malloc
)
fprintf
(
stderr
,
"Memory segment located at %p of size %d is allocated (file : %s [%d])"
,
chunk
,
chunksize
,
filename
,
line
);
return
chunk
;
}
void
*
eco_realloc
(
void
*
chunk
,
int32_t
newsize
,
const
char
*
error_message
,
const
char
*
filename
,
int32_t
line
)
{
void
*
newchunk
;
newchunk
=
realloc
(
chunk
,
newsize
);
if
(
!
newchunk
)
ecoError
(
ECO_MEM_ERROR
,
error_message
,
filename
,
line
);
if
(
eco_log_malloc
)
fprintf
(
stderr
,
"Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])"
,
chunk
,
newchunk
,
newsize
,
filename
,
line
);
return
newchunk
;
}
void
eco_free
(
void
*
chunk
,
const
char
*
error_message
,
const
char
*
filename
,
int32_t
line
)
{
free
(
chunk
);
if
(
eco_log_malloc
)
fprintf
(
stderr
,
"Memory segment %p is released => %s (file : %s [%d])"
,
chunk
,
error_message
,
filename
,
line
);
}
src/libecoPCR/ecoPCR.h
0 → 100644
View file @
2ba6d161
#ifndef ECOPCR_H_
#define ECOPCR_H_
#include <stdio.h>
#include <inttypes.h>
#include "../obidmscolumn.h"
#include "../obiview.h"
#include "../obitypes.h"
#ifndef H_apat
#include "./libapat/apat.h"
#endif
/*****************************************************
*
* Data type declarations
*
*****************************************************/
/*
*
* Sequence types
*
*/
typedef
struct
{
int32_t
taxid
;
char
AC
[
20
];
int32_t
DE_length
;
int32_t
SQ_length
;
int32_t
CSQ_length
;
char
data
[
1
];
}
ecoseqformat_t
;
typedef
struct
{
int32_t
taxid
;
int32_t
SQ_length
;
char
*
AC
;
char
*
DE
;
char
*
SQ
;
}
ecoseq_t
;
/*****************************************************
*
* Function declarations
*
*****************************************************/
/*
*
* Low level system functions
*
*/
int32_t
is_big_endian
();
int32_t
swap_int32_t
(
int32_t
);
void
*
eco_malloc
(
int32_t
chunksize
,
const
char
*
error_message
,
const
char
*
filename
,
int32_t
line
);
void
*
eco_realloc
(
void
*
chunk
,
int32_t
chunksize
,
const
char
*
error_message
,
const
char
*
filename
,
int32_t
line
);
void
eco_free
(
void
*
chunk
,
const
char
*
error_message
,
const
char
*
filename
,
int32_t
line
);
void
eco_trace_memory_allocation
();
void
eco_untrace_memory_allocation
();
#define ECOMALLOC(size,error_message) \
eco_malloc((size),(error_message),__FILE__,__LINE__)
#define ECOREALLOC(chunk,size,error_message) \
eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
#define ECOFREE(chunk,error_message) \
eco_free((chunk),(error_message),__FILE__,__LINE__)
/*
*
* Error management
*
*/
void
ecoError
(
int32_t
,
const
char
*
,
const
char
*
,
int
);
#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
#define ECO_IO_ERROR (1)
#define ECO_MEM_ERROR (2)
#define ECO_ASSERT_ERROR (3)
#define ECO_NOTFOUND_ERROR (4)
/*
*
* Low level Disk access functions
*
*/
int32_t
delete_apatseq
(
SeqPtr
pseq
);
PatternPtr
buildPattern
(
const
char
*
pat
,
int32_t
error_max
);
PatternPtr
complementPattern
(
PatternPtr
pat
);
SeqPtr
ecoseq2apatseq
(
char
*
sequence
,
SeqPtr
out
,
int32_t
circular
);
char
*
ecoComplementPattern
(
char
*
nucAcSeq
);
char
*
ecoComplementSequence
(
char
*
nucAcSeq
);
char
*
getSubSequence
(
char
*
nucAcSeq
,
int32_t
begin
,
int32_t
end
);
#endif
/*ECOPCR_H_*/
src/libecoPCR/ecoapat.c
0 → 100644
View file @
2ba6d161
#include "./libapat/libstki.h"
#include "./libapat/apat.h"
#include "ecoPCR.h"
#include <string.h>
#include "../obidmscolumn.h"
#include "../obiview.h"
#include "../obitypes.h"
static
void
EncodeSequence
(
SeqPtr
seq
);
static
void
UpperSequence
(
char
*
seq
);
/* -------------------------------------------- */
/* uppercase sequence */
/* -------------------------------------------- */
#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
#define TO_UPPER(c) ((c) - 'a' + 'A')
void
UpperSequence
(
char
*
seq
)
{
char
*
cseq
;
for
(
cseq
=
seq
;
*
cseq
;
cseq
++
)
if
(
IS_LOWER
(
*
cseq
))
*
cseq
=
TO_UPPER
(
*
cseq
);
}
#undef IS_LOWER
#undef TO_UPPER
/* -------------------------------------------- */
/* encode sequence */
/* IS_UPPER is slightly faster than isupper */
/* -------------------------------------------- */
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
void
EncodeSequence
(
SeqPtr
seq
)
{
int
i
;
UInt8
*
data
;
char
*
cseq
;
data
=
seq
->
data
;
cseq
=
seq
->
cseq
;
while
(
*
cseq
)
{
*
data
=
(
IS_UPPER
(
*
cseq
)
?
*
cseq
-
'A'
:
0x0
);
data
++
;
cseq
++
;
}
for
(
i
=
0
,
cseq
=
seq
->
cseq
;
i
<
seq
->
circular
;
i
++
,
cseq
++
,
data
++
)
*
data
=
(
IS_UPPER
(
*
cseq
)
?
*
cseq
-
'A'
:
0x0
);
for
(
i
=
0
;
i
<
MAX_PATTERN
;
i
++
)
seq
->
hitpos
[
i
]
->
top
=
seq
->
hiterr
[
i
]
->
top
=
0
;
}
#undef IS_UPPER
SeqPtr
ecoseq2apatseq
(
char
*
sequence
,
SeqPtr
out
,
int32_t
circular
)
{
int
i
;
int32_t
seq_len
;
if
(
!
out
)
{
out
=
ECOMALLOC
(
sizeof
(
Seq
),
"Error in Allocation of a new Seq structure"
);
for
(
i
=
0
;
i
<
MAX_PATTERN
;
i
++
)
{
if
(
!
(
out
->
hitpos
[
i
]
=
NewStacki
(
kMinStackiSize
)))
ECOERROR
(
ECO_MEM_ERROR
,
"Error in hit stack Allocation"
);
if
(
!
(
out
->
hiterr
[
i
]
=
NewStacki
(
kMinStackiSize
)))
ECOERROR
(
ECO_MEM_ERROR
,
"Error in error stack Allocation"
);
}
}
seq_len
=
strlen
(
sequence
);
out
->
seqsiz
=
out
->
seqlen
=
seq_len
;
out
->
circular
=
circular
;
if
(
!
out
->
data
)
{
out
->
data
=
ECOMALLOC
((
out
->
seqlen
+
circular
)
*
sizeof
(
UInt8
),
"Error in Allocation of a new Seq data member"
);
out
->
datsiz
=
out
->
seqlen
+
circular
;
}
else
if
((
out
->
seqlen
+
circular
)
>=
out
->
datsiz
)
{
out
->
data
=
ECOREALLOC
(
out
->
data
,(
out
->
seqlen
+
circular
),
"Error during Seq data buffer realloc"
);
out
->
datsiz
=
out
->
seqlen
+
circular
;
}
UpperSequence
(
sequence
);
// ecoPCR only works on uppercase
out
->
cseq
=
sequence
;
EncodeSequence
(
out
);
return
out
;
}
int32_t
delete_apatseq
(
SeqPtr
pseq
)
{
int
i
;
if
(
pseq
)
{
if
(
pseq
->
data
)
ECOFREE
(
pseq
->
data
,
"Freeing sequence data buffer"
);
for
(
i
=
0
;
i
<
MAX_PATTERN
;
i
++
)
{
if
(
pseq
->
hitpos
[
i
])
FreeStacki
(
pseq
->
hitpos
[
i
]);
if
(
pseq
->
hiterr
[
i
])
FreeStacki
(
pseq
->
hiterr
[
i
]);