Commit 802a3f59 authored by Celine Mercier's avatar Celine Mercier

data import: entries now counted if there are multiple files

parent 7e208707
......@@ -130,8 +130,11 @@ def run(config):
output[0].record_command_line(" ".join(sys.argv[1:]))
output[0].close()
return
pb = ProgressBar(entry_count, config, seconde=5)
if entry_count >= 0:
pb = ProgressBar(entry_count, config, seconde=5)
else:
pb = None
entries = input[1]
......@@ -161,7 +164,8 @@ def run(config):
else:
raise RollbackException("obi import error, rollbacking view", view)
pb(i)
if pb is not None:
pb(i)
if NUC_SEQS_view:
id_col[i] = entry.id
......@@ -271,10 +275,11 @@ def run(config):
# Fill value
dcols[tag][0][i] = value
i+=1 # TODO Not if None sequence
i+=1
pb(i, force=True)
print("", file=sys.stderr)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
......
......@@ -13,10 +13,13 @@ from obitools3.dms.capi.obitypes cimport is_a_DNA_seq, \
from obitools3.dms.capi.obierrno cimport OBI_LINE_IDX_ERROR, \
OBI_ELT_IDX_ERROR
#obi_errno
#obi_errno # TODO
import re
import mmap
import os
import glob
import gzip
cpdef bytes format_separator(bytes format):
......@@ -35,16 +38,49 @@ cpdef bytes format_separator(bytes format):
cpdef int count_entries(file, bytes format):
try:
sep = format_separator(format)
if sep is None:
return -1
sep = re.compile(sep)
mmapped_file = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
return len(re.findall(sep, mmapped_file))
if type(file) and (format == b'genbank' or format == b'embl'): # file is actually a directory with multiple files
files = []
if format == b'embl':
extensions = [b"*.dat"]
elif format == b"genbank":
extensions = [b"*.gbff"]
for ext in extensions:
for filename in glob.glob(os.path.join(file, ext)):
#if filename[:-3] == ".gz":
# files.append(gzip.open(filename, "rb"))
#else:
files.append(open(filename, "rb"))
else:
files = [file]
if len(files)==0:
return -1
total_count = 0
for f in files:
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
total_count += len(re.findall(sep, mmapped_file))
except:
if len(files) > 1:
for file in files:
file.close()
return -1
if len(files) > 1:
for f in files:
f.close()
return total_count
# TODO RollbackException?
cdef obi_errno_to_exception(int obi_errno, index_t line_nb=-1, object elt_id=None, str error_message=None) :
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment