Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
OBITools3
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
23
Issues
23
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
OBITools
OBITools3
Commits
eb586b2f
Commit
eb586b2f
authored
Dec 09, 2018
by
Celine Mercier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
New command and C functions: obi ecotag
parent
9556130b
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
849 additions
and
0 deletions
+849
-0
python/obitools3/commands/ecotag.pyx
python/obitools3/commands/ecotag.pyx
+120
-0
python/obitools3/dms/capi/obiecotag.pxd
python/obitools3/dms/capi/obiecotag.pxd
+14
-0
python/obitools3/dms/dms.cfiles
python/obitools3/dms/dms.cfiles
+1
-0
python/obitools3/utils.cfiles
python/obitools3/utils.cfiles
+1
-0
src/obi_ecotag.c
src/obi_ecotag.c
+640
-0
src/obi_ecotag.h
src/obi_ecotag.h
+71
-0
src/obierrno.h
src/obierrno.h
+2
-0
No files found.
python/obitools3/commands/ecotag.pyx
0 → 100755
View file @
eb586b2f
#cython: language_level=3
from
obitools3.apps.progress
cimport
ProgressBar
# @UnresolvedImport
from
obitools3.dms.dms
cimport
DMS
from
obitools3.dms.view
import
RollbackException
from
obitools3.dms.capi.obiecotag
cimport
obi_ecotag
from
obitools3.apps.optiongroups
import
addMinimalInputOption
,
addTaxonomyOption
,
addMinimalOutputOption
from
obitools3.uri.decode
import
open_uri
from
obitools3.apps.config
import
logger
from
obitools3.utils
cimport
tobytes
,
str2bytes
from
obitools3.dms.view.view
cimport
View
from
obitools3.dms.view.typed_view.view_NUC_SEQS
cimport
View_NUC_SEQS
import
sys
__title__
=
"Taxonomic assignment of sequences"
def
addOptions
(
parser
):
addMinimalInputOption
(
parser
)
addTaxonomyOption
(
parser
)
addMinimalOutputOption
(
parser
)
group
=
parser
.
add_argument_group
(
'obi ecotag specific options'
)
group
.
add_argument
(
'--ref-database'
,
'-R'
,
action
=
"store"
,
dest
=
"ecotag:ref_view"
,
metavar
=
'<REF_VIEW>'
,
type
=
str
,
help
=
"URI of the view containing the reference database as built by the build_ref_db command."
)
group
.
add_argument
(
'--minimum-identity'
,
'-m'
,
action
=
"store"
,
dest
=
"ecotag:threshold"
,
metavar
=
'<THRESHOLD>'
,
default
=
0.0
,
type
=
float
,
help
=
"Minimum identity to consider for assignment, as a normalized identity, e.g. 0.95 for an identity of 95%%. "
"Default: 0.00 (no threshold)."
)
def
run
(
config
):
DMS
.
obi_atexit
()
logger
(
"info"
,
"obi ecotag"
)
# Open the query view: only the DMS
input
=
open_uri
(
config
[
'obi'
][
'inputURI'
],
dms_only
=
True
)
if
input
is
None
:
raise
Exception
(
"Could not read input"
)
i_dms
=
input
[
0
]
i_dms_name
=
input
[
0
].
name
i_view_name
=
input
[
1
]
# Open the reference view: only the DMS
ref
=
open_uri
(
config
[
'ecotag'
][
'ref_view'
],
dms_only
=
True
)
if
ref
is
None
:
raise
Exception
(
"Could not read reference view URI"
)
ref_dms
=
ref
[
0
]
ref_dms_name
=
ref
[
0
].
name
ref_view_name
=
ref
[
1
]
# Open the output: only the DMS
output
=
open_uri
(
config
[
'obi'
][
'outputURI'
],
input
=
False
,
dms_only
=
True
)
if
output
is
None
:
raise
Exception
(
"Could not create output"
)
o_dms
=
output
[
0
]
final_o_view_name
=
output
[
1
]
# If the input and output DMS are not the same, run ecotag creating a temporary view that will be exported to
# the right DMS and deleted in the other afterwards.
if
i_dms
!=
o_dms
:
temporary_view_name
=
final_o_view_name
i
=
0
while
temporary_view_name
in
i_dms
:
# Making sure view name is unique in input DMS
temporary_view_name
=
final_o_view_name
+
b
"_"
+
str2bytes
(
str
(
i
))
i
+=
1
o_view_name
=
temporary_view_name
else
:
o_view_name
=
final_o_view_name
# Read taxonomy DMS and name
taxo
=
open_uri
(
config
[
'obi'
][
'taxoURI'
],
dms_only
=
True
)
taxo_dms_name
=
taxo
[
0
].
name
taxo_dms
=
taxo
[
0
]
taxonomy_name
=
config
[
'obi'
][
'taxoURI'
].
split
(
"/"
)[
-
1
]
# Robust in theory
# Save command config in View comments
command_line
=
" "
.
join
(
sys
.
argv
[
1
:])
comments
=
View
.
print_config
(
config
,
"ecotag"
,
command_line
,
input_dms_name
=
[
i_dms_name
],
input_view_name
=
[
i_view_name
])
# TODO no. fix
if
obi_ecotag
(
tobytes
(
i_dms_name
),
tobytes
(
i_view_name
),
\
tobytes
(
ref_dms_name
),
tobytes
(
ref_view_name
),
\
tobytes
(
taxo_dms_name
),
tobytes
(
taxonomy_name
),
\
tobytes
(
o_view_name
),
comments
,
config
[
'ecotag'
][
'threshold'
])
<
0
:
raise
Exception
(
"Error running ecotag"
)
# If the input and output DMS are not the same, export result view to output DMS
if
i_dms
!=
o_dms
:
View
.
import_view
(
i_dms
.
full_path
[:
-
7
],
o_dms
.
full_path
[:
-
7
],
o_view_name
,
final_o_view_name
)
# Save command config in DMS comments
o_dms
.
record_command_line
(
command_line
)
print
(
"
\n
"
)
print
(
repr
(
o_dms
[
final_o_view_name
]))
# If the input and the output DMS are different, delete the temporary result view in the input DMS
if
i_dms
!=
o_dms
:
View
.
delete_view
(
i_dms
,
o_view_name
)
o_dms
.
close
()
i_dms
.
close
()
python/obitools3/dms/capi/obiecotag.pxd
0 → 100755
View file @
eb586b2f
#cython: language_level=3
cdef
extern
from
"obi_ecotag.h"
nogil
:
int
obi_ecotag
(
const
char
*
dms_name
,
const
char
*
query_view_name
,
const
char
*
ref_dms_name
,
const
char
*
ref_view_name
,
const
char
*
taxo_dms_name
,
const
char
*
taxonomy_name
,
const
char
*
output_view_name
,
const
char
*
output_view_comments
,
double
ecotag_threshold
)
python/obitools3/dms/dms.cfiles
View file @
eb586b2f
...
...
@@ -38,6 +38,7 @@
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.c
../../../src/obi_ecotag.c
../../../src/obierrno.c
../../../src/obilittlebigman.c
../../../src/obitypes.c
...
...
python/obitools3/utils.cfiles
View file @
eb586b2f
...
...
@@ -38,6 +38,7 @@
../../src/obidmscolumn_array.c
../../src/obidmscolumn.c
../../src/obidmscolumndir.c
../../src/obi_ecotag.c
../../src/obierrno.c
../../src/obilittlebigman.c
../../src/obitypes.c
...
...
src/obi_ecotag.c
0 → 100755
View file @
eb586b2f
This diff is collapsed.
Click to expand it.
src/obi_ecotag.h
0 → 100755
View file @
eb586b2f
/*************************************************************************************************
* Header file for functions for the taxonomic assignment of sequences *
*************************************************************************************************/
/**
* @file obi_ecotag.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date November 15th 2018
* @brief Header file for the functions for the taxonomic assignment of sequences.
*/
#ifndef OBI_ECOTAG_H_
#define OBI_ECOTAG_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#define ECOTAG_TAXID_COLUMN_NAME "TAXID"
#define ECOTAG_NAME_COLUMN_NAME "SCIENTIFIC_NAME"
#define ECOTAG_STATUS_COLUMN_NAME "ID_STATUS"
#define ECOTAG_BEST_MATCH_IDS_COLUMN_NAME "BEST_MATCH"
#define ECOTAG_SCORE_COLUMN_NAME "BEST_IDENTITY"
/**
* @brief Taxonomic assignment of sequences.
*
* Note: The columns where the results are written are automatically named and created.
*
* @param dms_name The path to the DMS where the views are.
* @param query_view_name The name of the view containing the query sequences.
* @param ref_dms_name The name of the DMS containing the reference database.
* @param ref_view_name The name of the view corresponding to the reference database as built by build_reference_db().
* @param taxo_dms_name The name of the DMS containing the taxonomy associated with the reference database.
* @param taxonomy_name The name of the taxonomy associated with the reference database.
* @param output_view_name The name to give to the output view.
* @param output_view_comments The comments to associate to the output view.
* @param ecotag_threshold The threshold at which to assign.
*
* The algorithm works like this:
* For each query sequence:
* Align with reference database
* Keep the indices of all the best matches
* For each kept index, get the LCA at that threshold as stored in the reference database, then the LCA of those LCAs
* Write result (max score, threshold, taxid and scientific name of the LCA assigned, list of the ids of the best matches)
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since November 2018
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int
obi_ecotag
(
const
char
*
dms_name
,
const
char
*
query_view_name
,
const
char
*
ref_dms_name
,
const
char
*
ref_view_name
,
const
char
*
taxo_dms_name
,
const
char
*
taxonomy_name
,
const
char
*
output_view_name
,
const
char
*
output_view_comments
,
double
ecotag_threshold
);
#endif
/* OBI_ECOTAG_H_ */
src/obierrno.h
View file @
eb586b2f
...
...
@@ -130,6 +130,8 @@ extern int obi_errno;
*/
#define OBIVIEW_ALREADY_EXISTS_ERROR (35)
/** Tried to create a new view with a name already existing in the DMS.
*/
#define OBI_ECOTAG_ERROR (36)
/** Tried to create a new view with a name already existing in the DMS.
*/
/**@}*/
#endif
/* OBIERRNO_H_ */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment