Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
OBITools3
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
23
Issues
23
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
OBITools
OBITools3
Commits
41ad3dee
Commit
41ad3dee
authored
Jan 09, 2017
by
Celine Mercier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Taxonomy: informations about deleted taxids is now read from
delnodes.dmp file and added to *.adx file
parent
d6837401
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
222 additions
and
24 deletions
+222
-24
src/obidms_taxonomy.c
src/obidms_taxonomy.c
+222
-24
No files found.
src/obidms_taxonomy.c
View file @
41ad3dee
...
...
@@ -1208,12 +1208,15 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
bool
nodes_found
=
false
;
bool
names_found
=
false
;
bool
merged_found
=
false
;
bool
delnodes_found
=
false
;
int32_t
*
delnodes
=
NULL
;
int32_t
delnodes_count
;
char
line
[
2048
];
// TODO large enough?
char
*
elt
;
char
*
file_name
;
int
buffer_size
;
int
i
,
j
;
int
n
;
int
n
,
nD
,
nT
;
char
**
rank_names
;
int
*
parent_taxids
;
int
taxid
,
old_taxid
;
...
...
@@ -1492,6 +1495,154 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
return
NULL
;
}
// Go through taxonomy files
while
((
dp
=
readdir
(
tax_dir
))
!=
NULL
)
{
if
(
strcmp
(
dp
->
d_name
,
"delnodes.dmp"
)
==
0
)
{
delnodes_found
=
true
;
buffer_size
=
10000
;
// Initializing the list of deleted nodes
delnodes
=
(
int32_t
*
)
malloc
(
sizeof
(
int32_t
)
*
buffer_size
);
if
(
delnodes
==
NULL
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
obidebug
(
1
,
"
\n
Error allocating the memory for a taxonomy structure"
);
obi_close_taxonomy
(
tax
);
free
(
parent_taxids
);
free
(
rank_names
);
closedir
(
tax_dir
);
return
NULL
;
}
// Allocating the memory for the file name
file_name
=
(
char
*
)
malloc
((
strlen
(
taxdump
)
+
12
)
*
sizeof
(
char
));
if
(
file_name
==
NULL
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
obidebug
(
1
,
"
\n
Error allocating the memory for a file name"
);
obi_close_taxonomy
(
tax
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
delnodes
);
closedir
(
tax_dir
);
return
NULL
;
}
// Build the file path
if
(
sprintf
(
file_name
,
"%s/delnodes.dmp"
,
taxdump
)
<
0
)
{
obi_set_errno
(
OBI_TAXONOMY_ERROR
);
obidebug
(
1
,
"
\n
Error building a taxonomy file name"
);
obi_close_taxonomy
(
tax
);
closedir
(
tax_dir
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
file_name
);
free
(
delnodes
);
return
NULL
;
}
file
=
fopen
(
file_name
,
"r"
);
if
(
file
==
NULL
)
{
obi_set_errno
(
OBI_TAXONOMY_ERROR
);
obidebug
(
1
,
"
\n
Problem opening a taxonomy file"
);
obi_close_taxonomy
(
tax
);
closedir
(
tax_dir
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
file_name
);
free
(
delnodes
);
return
NULL
;
}
free
(
file_name
);
n
=
0
;
while
(
fgets
(
line
,
sizeof
(
line
),
file
))
{
// Check for terminal '\n' character (line complete)
if
(
line
[
strlen
(
line
)
-
1
]
!=
'\n'
)
{
obi_set_errno
(
OBI_TAXONOMY_ERROR
);
obidebug
(
1
,
"
\n
Error: line buffer size not large enough for line in taxonomy file"
);
obi_close_taxonomy
(
tax
);
fclose
(
file
);
closedir
(
tax_dir
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
delnodes
);
return
NULL
;
}
// Get first and only element of the line (the deprecated taxid)
elt
=
strtok
(
line
,
"|"
);
// Remove the last character (tab character)
elt
[
strlen
(
elt
)
-
1
]
=
'\0'
;
// First element: old deprecated taxid
old_taxid
=
atoi
(
elt
);
// Store the old taxid in the list of deleted taxids
// Enlarge array if needed
if
(
n
==
buffer_size
)
{
buffer_size
=
buffer_size
*
2
;
delnodes
=
(
int32_t
*
)
realloc
(
tax
->
merged_idx
,
sizeof
(
int32_t
)
*
buffer_size
);
if
(
delnodes
==
NULL
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
obidebug
(
1
,
"
\n
Error reallocating memory for a taxonomy structure"
);
obi_close_taxonomy
(
tax
);
fclose
(
file
);
closedir
(
tax_dir
);
free
(
parent_taxids
);
free
(
rank_names
);
return
NULL
;
}
}
delnodes
[
n
]
=
old_taxid
;
n
++
;
}
// Check that fgets stopped because it reached EOF
if
(
!
feof
(
file
))
{
obi_set_errno
(
OBI_TAXONOMY_ERROR
);
obidebug
(
1
,
"
\n
Error: file reading was stopped before end of file"
);
obi_close_taxonomy
(
tax
);
fclose
(
file
);
closedir
(
tax_dir
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
delnodes
);
return
NULL
;
}
// Store count
delnodes_count
=
n
;
fclose
(
file
);
}
}
closedir
(
tax_dir
);
// Go through directory again for next file // TODO make separate functions?
tax_dir
=
opendir
(
taxdump
);
if
(
tax_dir
==
NULL
)
{
obi_set_errno
(
OBI_TAXONOMY_ERROR
);
obidebug
(
1
,
"
\n
Problem opening a taxdump directory"
);
obi_close_taxonomy
(
tax
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
delnodes
);
return
NULL
;
}
// Go through taxonomy files
while
((
dp
=
readdir
(
tax_dir
))
!=
NULL
)
{
...
...
@@ -1509,6 +1660,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
obi_close_taxonomy
(
tax
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
delnodes
);
closedir
(
tax_dir
);
return
NULL
;
}
...
...
@@ -1522,6 +1674,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
obi_close_taxonomy
(
tax
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
delnodes
);
closedir
(
tax_dir
);
return
NULL
;
}
...
...
@@ -1536,6 +1689,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
free
(
parent_taxids
);
free
(
rank_names
);
free
(
file_name
);
free
(
delnodes
);
return
NULL
;
}
...
...
@@ -1549,13 +1703,15 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
free
(
parent_taxids
);
free
(
rank_names
);
free
(
file_name
);
free
(
delnodes
);
return
NULL
;
}
free
(
file_name
);
n
=
0
;
i
=
0
;
nT
=
0
;
// to point in current taxa list while merging
nD
=
delnodes_count
-
1
;
// to point in deleted taxids list while merging (going from count-1 to 0 because taxids are sorted in descending order)
n
=
0
;
// to point in final merged list while merging
while
(
fgets
(
line
,
sizeof
(
line
),
file
))
{
// Check for terminal '\n' character (line complete)
...
...
@@ -1568,6 +1724,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
closedir
(
tax_dir
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
delnodes
);
return
NULL
;
}
...
...
@@ -1588,34 +1745,68 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
taxid
=
atoi
(
elt
);
// Store the old taxid in the merged_idx ordered taxid list
// First, store the taxids from the current taxonomy that come before
while
((
i
<
(
tax
->
taxa
)
->
count
)
&&
((
tax
->
taxa
)
->
taxon
[
i
].
taxid
<
old_taxid
))
// The merged list is an ordered list of the current taxids, the deprecated taxids that have current references,
// and the deleted taxids with no current reference. An element of the list is composed of the taxid, and the index
// of the taxon in the taxa structure, or -1 for deleted taxids.
// Creating the merged list requires to merge the 3 ordered lists into one.
while
(((
nT
<
(
tax
->
taxa
)
->
count
)
&&
((
tax
->
taxa
)
->
taxon
[
nT
].
taxid
<
old_taxid
))
&&
((
nD
>=
0
)
&&
(
delnodes
[
nD
]
<
old_taxid
)))
{
// Enlarge structures if needed
if
(
n
==
buffer_size
)
{
buffer_size
=
buffer_size
*
2
;
tax
->
merged_idx
=
(
ecomergedidx_t
*
)
realloc
(
tax
->
merged_idx
,
sizeof
(
ecomergedidx_t
)
+
sizeof
(
ecomerged_t
)
*
buffer_size
);
if
(
tax
->
merged_idx
==
NULL
)
if
((
tax
->
taxa
)
->
taxon
[
nT
].
taxid
<
delnodes
[
nD
])
{
// Add element from taxa list
// Enlarge structure if needed
if
(
n
==
buffer_size
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
obidebug
(
1
,
"
\n
Error reallocating memory for a taxonomy structure"
);
obi_close_taxonomy
(
tax
);
fclose
(
file
);
closedir
(
tax_dir
);
free
(
parent_taxids
);
free
(
rank_names
);
return
NULL
;
buffer_size
=
buffer_size
*
2
;
tax
->
merged_idx
=
(
ecomergedidx_t
*
)
realloc
(
tax
->
merged_idx
,
sizeof
(
ecomergedidx_t
)
+
sizeof
(
ecomerged_t
)
*
buffer_size
);
if
(
tax
->
merged_idx
==
NULL
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
obidebug
(
1
,
"
\n
Error reallocating memory for a taxonomy structure"
);
obi_close_taxonomy
(
tax
);
fclose
(
file
);
closedir
(
tax_dir
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
delnodes
);
return
NULL
;
}
}
(
tax
->
merged_idx
)
->
merged
[
n
].
taxid
=
(
tax
->
taxa
)
->
taxon
[
nT
].
taxid
;
(
tax
->
merged_idx
)
->
merged
[
n
].
idx
=
nT
;
nT
++
;
n
++
;
}
else
if
(
delnodes
[
nD
]
<
(
tax
->
taxa
)
->
taxon
[
nT
].
taxid
)
{
// Add element from deleted taxids list
// Enlarge structure if needed
if
(
n
==
buffer_size
)
{
buffer_size
=
buffer_size
*
2
;
tax
->
merged_idx
=
(
ecomergedidx_t
*
)
realloc
(
tax
->
merged_idx
,
sizeof
(
ecomergedidx_t
)
+
sizeof
(
ecomerged_t
)
*
buffer_size
);
if
(
tax
->
merged_idx
==
NULL
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
obidebug
(
1
,
"
\n
Error reallocating memory for a taxonomy structure"
);
obi_close_taxonomy
(
tax
);
fclose
(
file
);
closedir
(
tax_dir
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
delnodes
);
return
NULL
;
}
}
(
tax
->
merged_idx
)
->
merged
[
n
].
taxid
=
(
tax
->
taxa
)
->
taxon
[
i
].
taxid
;
(
tax
->
merged_idx
)
->
merged
[
n
].
idx
=
i
;
i
++
;
n
++
;
(
tax
->
merged_idx
)
->
merged
[
n
].
taxid
=
delnodes
[
nD
];
(
tax
->
merged_idx
)
->
merged
[
n
].
idx
=
-
1
;
// The index to tag deleted taxids is -1
nD
--
;
n
++
;
}
}
// Enlarge structures if needed
// Add the deprecated taxid
// Enlarge structure if needed
if
(
n
==
buffer_size
)
{
buffer_size
=
buffer_size
*
2
;
...
...
@@ -1629,6 +1820,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
closedir
(
tax_dir
);
free
(
parent_taxids
);
free
(
rank_names
);
free
(
delnodes
);
return
NULL
;
}
}
...
...
@@ -1675,6 +1867,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
fclose
(
file
);
}
}
// Free delnodes array, not needed anymore
free
(
delnodes
);
closedir
(
tax_dir
);
...
...
@@ -2494,6 +2690,8 @@ ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid
if
(
indexed_taxon
==
NULL
)
current_taxon
=
NULL
;
else
if
(
indexed_taxon
->
idx
==
-
1
)
current_taxon
=
NULL
;
// TODO discuss what to do when old deleted taxon
else
current_taxon
=
(
taxonomy
->
taxa
->
taxon
)
+
(
indexed_taxon
->
idx
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment