Commit 85ea5de9 by Eric Coissac

Add files and reads modules to deal with index checking more efficiently

parent 6fa7d4cd
from .universalopener import uopen
\ No newline at end of file
#cython: language_level=3
cdef class LineBuffer:
cdef object fileobj
cdef int size
#cython: language_level=3
'''
Created on 30 mars 2016
@author: coissac
'''
cdef class LineBuffer:
def __init__(self,object fileobj,int size=100000000):
self.fileobj=fileobj
self.size=size
def __iter__(self):
cdef list buff = self.fileobj.readlines(self.size)
cdef str l
while buff:
for l in buff:
yield l
buff = self.fileobj.readlines(self.size)
#cython: language_level=3
cdef class MagicKeyFile:
cdef object stream
cdef str stream_mode
cdef object binary
cdef bytes key
cdef int keylength
cdef int pos
cpdef bytes read(self,int size=?)
cpdef bytes read1(self,int size=?)
cpdef int tell(self)
cdef class CompressedFile:
cdef object accessor
\ No newline at end of file
#cython: language_level=3
'''
Created on 28 mars 2016
@author: coissac
'''
import zipfile
import bz2
import gzip
import io
cdef class MagicKeyFile:
def __init__(self,stream,length=2):
binary=stream
self.stream = stream
self.stream_mode = None
if hasattr(stream, "mode"):
self.stream_mode = stream.mode
if (not 'b' in stream.mode and
hasattr(stream, "buffer") and
'b' in stream.buffer.mode):
binary=stream.buffer
if (self.stream_mode is None and
not (hasattr(stream, 'headers') and
hasattr(stream.headers, "keys") and
'Content-type' in stream.headers)):
raise TypeError("stream does not present the good interface")
self.binary=binary
self.key=binary.read(length)
self.keylength=length
self.pos=0
cpdef bytes read(self,int size=-1):
cdef bytes r
if self.pos < self.keylength:
if size > (self.keylength - self.pos):
size = size - self.keylength + self.pos
r = self.key[self.pos:] + self.binary.read(size)
self.pos=self.keylength + 1
elif size >=0 :
r = self.key[self.pos:(self.pos+size)]
self.pos+=size
else:
r = self.key[self.pos:] + self.binary.read(size)
self.pos=self.keylength + 1
else:
r = self.binary.read(size)
return r
cpdef bytes read1(self,int size=-1):
return self.read(size)
cpdef int tell(self):
cdef int p
if self.pos < self.keylength:
p = self.pos
else:
p = self.binary.tell()
return p
def __getattr__(self,name):
return getattr(self.binary, name)
cdef class CompressedFile:
def __init__(self,stream):
cdef int keylength
cdef MagicKeyFile magic
cdef str compressor
cdef bytes k
cdef object c
cdef dict compress = { 'zip' : (b'\x50\x4b\x03\x04',zipfile.ZipFile),
'bz2' : (b'\x42\x5a\x68',bz2.BZ2File),
'gz' : (b'\x1f\x8b\x08',gzip.open)
}
keylength = max([len(x[0]) for x in compress.values()])
magic=MagicKeyFile(stream,keylength)
self.accessor = None
for compressor in compress:
k,c = compress[compressor]
if magic.key.startswith(k):
self.accessor = c(magic)
if self.accessor is None:
self.accessor = magic
if ((hasattr(stream, 'headers') and
hasattr(stream.headers, "keys") and
'Content-type' in stream.headers and
stream.headers['Content-type'].startswith('text/')) or
'b' not in magic.stream_mode):
self.accessor = io.TextIOWrapper(self.accessor)
def __getattr__(self,name):
return getattr(self.accessor, name)
def __iter__(self):
for x in self.accessor:
yield x
\ No newline at end of file
#cython: language_level=3
from .uncompress cimport CompressedFile
from .linebuffer cimport LineBuffer
\ No newline at end of file
#cython: language_level=3
'''
Created on 25 mars 2016
@author: coissac
'''
from urllib.request import urlopen
def uopen(str name, mode='r', int buffersize=100000000):
cdef CompressedFile c
cdef LineBuffer lb
try:
f = urlopen(name)
except:
f = open(name,mode)
c = CompressedFile(f)
if isinstance(c, LineBuffer):
lb=c
else:
lb=LineBuffer(c,buffersize)
i = iter(lb)
return i
#cython: language_level=3
from cpython.array cimport array
from orgasm.files.universalopener import uopen
def readFasta(filename,int quality=40,int shift=33):
cdef int seqid = 0
cdef int cut_slash
cdef int cut_space
cdef bytes seq
cdef list seqlines = []
if isinstance(filename, str):
filename = uopen(filename)
try:
line = next(filename)
bline = bytes(line,
encoding='ascii'
)
while(bline[0]!=62): # ord(b'>')
bline = bytes(next(filename),
encoding='ascii'
)
while(1):
cut_space = bline.find(b' ')
cut_slash = bline.find(b'/')
if cut_space >= 0 :
if cut_space < cut_slash or cut_slash < 0:
sid = bline[1:cut_space]
else:
sid = bline[1:cut_slash]
elif cut_slash >= 0:
sid = bline[1:cut_slash]
else:
sid = bline.strip()
bline = bytes(next(filename),
encoding='ascii'
)
while (bline[0]!=62): # ord(b'>')
seqlines.append(bline.strip().upper())
bline = bytes(next(filename),
encoding='ascii'
)
seq = b''.join(seqlines)
yield (sid,
seq,
array('B',[quality+shift] * len(seq)))
seqlines=[]
except StopIteration:
pass
seq = b''.join(seqlines)
yield (sid,
seq,
array('B',[quality+shift] * len(seq)))
#cython: language_level=3
from cpython.array cimport array
from orgasm.files.universalopener import uopen
def readFastq(filename):
cdef str line
cdef bytes bline
cdef bytes sid
cdef int seqid = 0
cdef int cut_slash
cdef int cut_space
cdef array quality
cdef bytes seq
if isinstance(filename, str):
filename = uopen(filename)
for line in filename:
bline = bytes(line,
encoding='ascii'
)
while(bline[0]!=64): # ord(b'@')
bline = bytes(next(filename),
encoding='ascii'
)
seqid+=1
cut_space = bline.find(b' ')
cut_slash = bline.find(b'/')
if cut_space >= 0 :
if cut_space < cut_slash or cut_slash < 0:
sid = bline[1:cut_space]
else:
sid = bline[1:cut_slash]
elif cut_slash >= 0:
sid = bline[1:cut_slash]
else:
sid = bline.strip()
seq = bytes(next(filename),
encoding='ascii').strip().upper()
next(filename)
quality = array("B",
bytes(next(filename),
encoding='ascii').strip())
yield(sid,seq,quality)
../../../fiboheap/fibo.h
../../../src/orgasm.h
from orgasm.utils.dna cimport reverseComplement
cpdef tuple cut5prime(tuple sequence, int trimfirst=?)
cdef int firstbelow(char[:] qualities, int quality, int shift=?)
cpdef tuple cut3primeQuality(tuple sequence, int quality, int shift=?)
cpdef tuple longestACGT(bytes seq)
cpdef int bestWindow(char[:] qualities, int length)
cpdef dict getStats()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment