Commit 22f2cf1b by Eric Coissac

Optimize speed

parent 56d7d645
/* Generated by Cython 0.12 on Mon May 17 08:05:47 2010 */
/* Generated by Cython 0.12 on Mon May 17 13:45:18 2010 */
#define PY_SSIZE_T_CLEAN
#include "Python.h"
......@@ -321,62 +321,6 @@ struct __pyx_opt_args_8obitools_5fasta_6_fasta_parseFastaDescription {
#define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);} } while(0)
#define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r);} } while(0)
#ifndef CYTHON_PROFILE
#define CYTHON_PROFILE 1
#endif
#ifndef CYTHON_PROFILE_REUSE_FRAME
#define CYTHON_PROFILE_REUSE_FRAME 0
#endif
#if CYTHON_PROFILE
#include "compile.h"
#include "frameobject.h"
#include "traceback.h"
#if CYTHON_PROFILE_REUSE_FRAME
#define CYTHON_FRAME_MODIFIER static
#define CYTHON_FRAME_DEL
#else
#define CYTHON_FRAME_MODIFIER
#define CYTHON_FRAME_DEL Py_DECREF(__pyx_frame)
#endif
#define __Pyx_TraceCall(funcname, srcfile, firstlineno) \
static PyCodeObject *__pyx_frame_code = NULL; \
CYTHON_FRAME_MODIFIER PyFrameObject *__pyx_frame = NULL; \
int __Pyx_use_tracing = 0; \
if (unlikely(PyThreadState_GET()->use_tracing && PyThreadState_GET()->c_profilefunc)) { \
__Pyx_use_tracing = __Pyx_TraceSetupAndCall(&__pyx_frame_code, &__pyx_frame, funcname, srcfile, firstlineno); \
}
#define __Pyx_TraceException() \
if (unlikely(__Pyx_use_tracing( && PyThreadState_GET()->use_tracing && PyThreadState_GET()->c_profilefunc) { \
PyObject *exc_info = __Pyx_GetExceptionTuple(); \
if (exc_info) { \
PyThreadState_GET()->c_profilefunc( \
PyThreadState_GET()->c_profileobj, __pyx_frame, PyTrace_EXCEPTION, exc_info); \
Py_DECREF(exc_info); \
} \
}
#define __Pyx_TraceReturn(result) \
if (unlikely(__Pyx_use_tracing) && PyThreadState_GET()->use_tracing && PyThreadState_GET()->c_profilefunc) { \
PyThreadState_GET()->c_profilefunc( \
PyThreadState_GET()->c_profileobj, __pyx_frame, PyTrace_RETURN, (PyObject*)result); \
CYTHON_FRAME_DEL; \
}
static PyCodeObject *__Pyx_createFrameCodeObject(const char *funcname, const char *srcfile, int firstlineno); /*proto*/
static int __Pyx_TraceSetupAndCall(PyCodeObject** code, PyFrameObject** frame, const char *funcname, const char *srcfile, int firstlineno); /*proto*/
#else
#define __Pyx_TraceCall(funcname, srcfile, firstlineno)
#define __Pyx_TraceException()
#define __Pyx_TraceReturn(result)
#endif /* CYTHON_PROFILE */
static INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
static INLINE void __Pyx_RaiseTooManyValuesError(void);
......@@ -490,7 +434,6 @@ static PyObject *__pyx_f_8obitools_5fasta_6_fasta_parseFastaDescription(PyObjec
PyObject *__pyx_t_7 = NULL;
int __pyx_t_8;
__Pyx_RefNannySetupContext("parseFastaDescription");
__Pyx_TraceCall("parseFastaDescription", __pyx_f[0], 9);
if (__pyx_optional_args) {
if (__pyx_optional_args->__pyx_n > 0) {
__pyx_v_tagparser = __pyx_optional_args->tagparser;
......@@ -652,7 +595,6 @@ static PyObject *__pyx_f_8obitools_5fasta_6_fasta_parseFastaDescription(PyObjec
__Pyx_DECREF(__pyx_v_x);
__Pyx_DECREF(__pyx_v_y);
__Pyx_XGIVEREF(__pyx_r);
__Pyx_TraceReturn(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
......@@ -674,7 +616,6 @@ static PyObject *__pyx_pf_8obitools_5fasta_6_fasta_parseFastaDescription(PyObjec
struct __pyx_opt_args_8obitools_5fasta_6_fasta_parseFastaDescription __pyx_t_2;
static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__ds,&__pyx_n_s__tagparser,0};
__Pyx_RefNannySetupContext("parseFastaDescription");
__Pyx_TraceCall("parseFastaDescription", __pyx_f[0], 9);
__pyx_self = __pyx_self;
if (unlikely(__pyx_kwds)) {
Py_ssize_t kw_args = PyDict_Size(__pyx_kwds);
......@@ -736,7 +677,6 @@ static PyObject *__pyx_pf_8obitools_5fasta_6_fasta_parseFastaDescription(PyObjec
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_TraceReturn(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
......@@ -853,7 +793,7 @@ PyMODINIT_FUNC PyInit__fasta(void)
/*--- Execution code ---*/
/* "/Users/coissac/encours/OBITools/src/obitools/fasta/_fasta.pyx":3
* # cython: profile=True
* # cython: profile=False
*
* import re # <<<<<<<<<<<<<<
* from _fasta cimport *
......@@ -934,76 +874,6 @@ static void __pyx_init_filenames(void) {
__pyx_f = __pyx_filenames;
}
#if CYTHON_PROFILE
static int __Pyx_TraceSetupAndCall(PyCodeObject** code,
PyFrameObject** frame,
const char *funcname,
const char *srcfile,
int firstlineno) {
if (*frame == NULL || !CYTHON_PROFILE_REUSE_FRAME) {
if (*code == NULL) {
*code = __Pyx_createFrameCodeObject(funcname, srcfile, firstlineno);
if (*code == NULL) return 0;
}
*frame = PyFrame_New(
PyThreadState_GET(), /*PyThreadState *tstate*/
*code, /*PyCodeObject *code*/
PyModule_GetDict(__pyx_m), /*PyObject *globals*/
0 /*PyObject *locals*/
);
if (*frame == NULL) return 0;
}
else {
(*frame)->f_tstate = PyThreadState_GET();
}
return PyThreadState_GET()->c_profilefunc(PyThreadState_GET()->c_profileobj, *frame, PyTrace_CALL, NULL) == 0;
}
static PyCodeObject *__Pyx_createFrameCodeObject(const char *funcname, const char *srcfile, int firstlineno) {
PyObject *py_srcfile = 0;
PyObject *py_funcname = 0;
PyCodeObject *py_code = 0;
#if PY_MAJOR_VERSION < 3
py_funcname = PyString_FromString(funcname);
py_srcfile = PyString_FromString(srcfile);
#else
py_funcname = PyUnicode_FromString(funcname);
py_srcfile = PyUnicode_FromString(srcfile);
#endif
if (!py_funcname | !py_srcfile) goto bad;
py_code = PyCode_New(
0, /*int argcount,*/
#if PY_MAJOR_VERSION >= 3
0, /*int kwonlyargcount,*/
#endif
0, /*int nlocals,*/
0, /*int stacksize,*/
0, /*int flags,*/
__pyx_empty_bytes, /*PyObject *code,*/
__pyx_empty_tuple, /*PyObject *consts,*/
__pyx_empty_tuple, /*PyObject *names,*/
__pyx_empty_tuple, /*PyObject *varnames,*/
__pyx_empty_tuple, /*PyObject *freevars,*/
__pyx_empty_tuple, /*PyObject *cellvars,*/
py_srcfile, /*PyObject *filename,*/
py_funcname, /*PyObject *name,*/
firstlineno, /*int firstlineno,*/
__pyx_empty_bytes /*PyObject *lnotab*/
);
bad:
Py_XDECREF(py_srcfile);
Py_XDECREF(py_funcname);
return py_code;
}
#endif /* CYTHON_PROFILE */
static INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) {
PyErr_Format(PyExc_ValueError,
#if PY_VERSION_HEX < 0x02050000
......
# cython: profile=True
# cython: profile=False
import re
from _fasta cimport *
......
# cython: profile=True
# cython: profile=False
'''
Created on 16 sept. 2009
......
# cython: profile=False
from obitools.tools.solexapairend import iterOnAligment
from array import array
from obitools import NucSequence
cimport array
cdef class IterOnConsensus:
cdef object _ali
cdef int __seqASingle
cdef int __seqBSingle
cdef int __seqABMatch
cdef int __seqAMismatch
cdef int __seqBMismatch
cdef int __seqAInsertion
cdef int __seqBInsertion
cdef int __seqADeletion
cdef int __seqBDeletion
cdef object __ioa
cdef bint __firstSeqB
def __cinit__(self,ali):
self._ali=ali
self.__seqASingle=0
self.__seqBSingle=0
self.__seqABMatch=0
self.__seqAMismatch=0
self.__seqBMismatch=0
self.__seqAInsertion=0
self.__seqBInsertion=0
self.__seqADeletion=0
self.__seqBDeletion=0
self.__ioa = iterOnAligment(self._ali)
self.__firstSeqB=False
def get_seqASingle(self):
return self.__seqASingle
def get_seqBSingle(self):
return self.__seqBSingle
def get_seqABMatch(self):
return self.__seqABMatch
def get_seqAMismatch(self):
return self.__seqAMismatch
def get_seqBMismatch(self):
return self.__seqBMismatch
def get_seqAInsertion(self):
return self.__seqAInsertion
def get_seqBInsertion(self):
return self.__seqBInsertion
def get_seqADeletion(self):
return self.__seqADeletion
def get_seqBDeletion(self):
return self.__seqBDeletion
def __next__(self):
cdef bytes snuc0
cdef bytes snuc1
cdef char* nuc0
cdef char* nuc1
cdef char* dash='-'
cdef double score0
cdef double score1
cdef double h0
cdef double h1
while(1):
snuc0,score0,snuc1,score1 = self.__ioa.next()
nuc0=snuc0
nuc1=snuc1
if nuc0[0]==nuc1[0]:
if nuc1[0]!=dash[0]:
self.__firstSeqB=True
self.__seqABMatch+=1
self.__seqBSingle=0
return (nuc0,score0*score1)
else:
h0 = score0 * (1-score1/3)
h1 = score1 * (1-score0/3)
if h0 < h1:
if nuc0[0]!=dash[0]:
self.__seqBSingle=0
if nuc1[0]==dash[0]:
if self.__firstSeqB:
self.__seqAInsertion+=1
else:
self.__seqASingle+=1
else:
self.__firstSeqB=True
self.__seqAMismatch+=1
return (nuc0,h0)
else:
self.__seqADeletion+=1
else:
if nuc1[0]!=dash[0]:
self.__firstSeqB=True
if nuc0[0]==dash[0]:
self.__seqBInsertion+=1
self.__seqBSingle+=1
else:
self.__seqBMismatch+=1
self.__seqBSingle=0
return (nuc1,h1)
else:
self.__seqBSingle=0
self.__seqBDeletion+=1
def __iter__(self):
return self
seqASingle = property(get_seqASingle, None, None, "direct's docstring")
seqBSingle = property(get_seqBSingle, None, None, "reverse's docstring")
seqABMatch = property(get_seqABMatch, None, None, "idem's docstring")
seqAMismatch = property(get_seqAMismatch, None, None, "mismatchdirect's docstring")
seqBMismatch = property(get_seqBMismatch, None, None, "mismatchreverse's docstring")
seqAInsertion = property(get_seqAInsertion, None, None, "insertdirect's docstring")
seqBInsertion = property(get_seqBInsertion, None, None, "insertreverse's docstring")
seqADeletion = property(get_seqADeletion, None, None, "deletedirect's docstring")
seqBDeletion = property(get_seqBDeletion, None, None, "deletereverse's docstring")
def buildConsensus(ali):
cdef array.array[double] quality = array.array('d')
cdef array.array[char] aseq = array.array('c')
cdef double* dquality
cdef char* cseq
cdef int i=0
cdef char* cnuc
cdef bytes nuc
cdef double score
cdef str sseq
array.resize(quality,len(ali[0]))
array.resize(aseq,len(ali[0]))
dquality = quality._d
cseq = aseq._c
ic=IterOnConsensus(ali)
for nuc,score in ic:
cnuc=nuc
dquality[i]=score
cseq[i]=cnuc[0]
i+=1
array.resize(quality,i)
array.resize(aseq,i)
sseq=''.join(aseq)
seq=NucSequence(ali[0].wrapped.id+'_CONS',sseq,**ali[0].wrapped.getTags())
seq.quality=quality
if hasattr(ali, "direction"):
seq['alignment']=ali.direction
seq['seqASingle']=ic.seqASingle
seq['seqBSingle']=ic.seqBSingle
seq['seqABMatch']=ic.seqABMatch
seq['seqAMismatch']=ic.seqAMismatch
seq['seqBMismatch']=ic.seqBMismatch
seq['seqAInsertion']=ic.seqAInsertion
seq['seqBInsertion']=ic.seqBInsertion-ic.seqBSingle
seq['seqADeletion']=ic.seqADeletion
seq['seqBDeletion']=ic.seqBDeletion
return seq
"""
array.pxd
Cython interface to Python's array.array module.
* 1D contiguous data view
* tools for fast array creation, maximum C-speed and handiness
* suitable as allround light weight auto-array within Cython code too
Usage:
>>> cimport array
Usage through Cython buffer interface (Py2.3+):
>>> def f(arg1, unsigned i, double dx)
array.array[double] a = arg1
a[i] += dx
Fast C-level new_array(_zeros), resize_array, copy_array, .length,
zero_array
cdef array.array[double] k = array.copy(d)
cdef array.array[double] n = array.array(d, d.length * 2 )
cdef array.array[double] m = array.zeros_like(FLOAT_TEMPLATE)
array.resize(f, 200000)
Zero overhead with naked data pointer views by union:
_f, _d, _i, _c, _u, ...
=> Original C array speed + Python dynamic memory management
cdef array.array a = inarray
if
a._d[2] += 0.66 # use as double array without extra casting
float *subview = vector._f + 10 # starting from 10th element
unsigned char *subview_buffer = vector._B + 4
Suitable as lightweight arrays intra Cython without speed penalty.
Replacement for C stack/malloc arrays; no trouble with refcounting,
mem.leaks; seamless Python compatibility, buffer() option
IMPORTANT: arrayarray.h (arrayobject, arraydescr) is not part of
the official Python C-API so far; arrayarray.h is located
next to this file copy it to PythonXX/include or local or
somewhere on your -I path
last changes: 2009-05-15 rk
: 2009-12-06 bp
"""
cimport stdlib
cdef extern from "stdlib.h" nogil:
void *memset(void *str, int c, size_t n)
char *strcat(char *str1, char *str2)
char *strncat(char *str1, char *str2, size_t n)
void *memchr(void *str, int c, size_t n)
int memcmp(void *str1, void *str2, size_t n)
void *memcpy(void *str1, void *str2, size_t n)
void *memmove(void *str1, void *str2, size_t n)
cdef extern from "arrayarray.h":
ctypedef void PyTypeObject
ctypedef short Py_UNICODE
int PyErr_BadArgument()
ctypedef class array.array [object arrayobject]
ctypedef object GETF(array a, Py_ssize_t ix)
ctypedef object SETF(array a, Py_ssize_t ix, object o)
ctypedef struct arraydescr: # [object arraydescr]:
int typecode
int itemsize
GETF getitem # PyObject * (*getitem)(struct arrayobject *, Py_ssize_t);
SETF setitem # int (*setitem)(struct arrayobject *, Py_ssize_t, PyObject *);
ctypedef class array.array [object arrayobject]:
cdef __cythonbufferdefaults__ = {'ndim' : 1, 'mode':'c'}
cdef:
PyTypeObject* ob_type
int ob_size # number of valid items;
unsigned length # == ob_size (by union)
char* ob_item # to first item
Py_ssize_t allocated # bytes
arraydescr* ob_descr # struct arraydescr *ob_descr;
object weakreflist # /* List of weak references */
# view's of ob_item:
float* _f # direct float pointer access to buffer
double* _d # double ...
int* _i
unsigned *_I
unsigned char *_B
signed char *_b
char *_c
unsigned long *_L
long *_l
short *_h
unsigned short *_H
Py_UNICODE *_u
void* _v
def __getbuffer__(array self, Py_buffer* info, int flags):
# This implementation of getbuffer is geared towards Cython
# requirements, and does not yet fullfill the PEP.
# In particular strided access is always provided regardless
# of flags
cdef unsigned rows, columns, itemsize
info.suboffsets = NULL
info.buf = self.ob_item
info.readonly = 0
info.ndim = 1
info.itemsize = itemsize = self.ob_descr.itemsize # e.g. sizeof(float)
info.strides = <Py_ssize_t*> \
stdlib.malloc(sizeof(Py_ssize_t) * info.ndim * 2 + 2)
info.shape = info.strides + 1
info.shape[0] = self.ob_size # number of items
info.strides[0] = info.itemsize
info.format = <char*>(info.strides + 2 * info.ndim)
info.format[0] = self.ob_descr.typecode
info.format[1] = 0
info.obj = self
def __releasebuffer__(array self, Py_buffer* info):
#if PyArray_HASFIELDS(self):
# stdlib.free(info.format)
#if sizeof(npy_intp) != sizeof(Py_ssize_t):
stdlib.free(info.strides)
array newarrayobject(PyTypeObject* type, Py_ssize_t size,
arraydescr *descr)
# fast resize/realloc
# not suitable for small increments; reallocation 'to the point'
int resize(array self, Py_ssize_t n)
# efficient for small increments (not in Py2.3-)
int resize_smart(array self, Py_ssize_t n)
# fast creation of a new array - init with zeros
# yet you need a (any) template array of the same item type (but not same size)
cdef inline array zeros_like(array sametype):
cdef array op = newarrayobject(<PyTypeObject*>sametype.ob_type, sametype.ob_size, sametype.ob_descr)
if op:
memset(op.ob_item, 0, op.ob_size * op.ob_descr.itemsize)
return op
# fast creation of a new array - no init with zeros
cdef inline array empty_like(array sametype):
return newarrayobject(<PyTypeObject*>sametype.ob_type, sametype.op.ob_size,
sametype.ob_descr)
cdef inline array copy(array self):
cdef array op = newarrayobject(<PyTypeObject*>self.ob_type, self.ob_size,
self.ob_descr)
memcpy(op.ob_item, self.ob_item, op.ob_size * op.ob_descr.itemsize)
return op
cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n):
""" efficent appending of new stuff of same type (e.g. of same array type)
n: number of elements (not number of bytes!)
"""
cdef Py_ssize_t itemsize = self.ob_descr.itemsize, orgsize = self.ob_size
if -1 == resize_smart(self, orgsize + n):
return -1
memcpy( self.ob_item + orgsize * itemsize, stuff, n * itemsize )
cdef inline int extend(array self, array other):
if self.ob_descr.typecode != self.ob_descr.typecode:
PyErr_BadArgument()
return -1
return extend_buffer(self, other.ob_item, other.ob_size)
cdef inline void zero(array op):
memset(op.ob_item, 0, op.ob_size * op.ob_descr.itemsize)
/* arrayarray.h
artificial C-API for Python's
<array.array> type.
copy this file to your -I path, e.g. .../pythonXX/include
See array.pxd next to this file
last changes: 2009-05-15 rk
*/
#ifndef _ARRAYARRAY_H
#define _ARRAYARRAY_H
#include <Python.h>
struct arrayobject; /* Forward */
/* All possible arraydescr values are defined in the vector "descriptors"
* below. That's defined later because the appropriate get and set
* functions aren't visible yet.
*/
typedef struct arraydescr {
int typecode;
int itemsize;
PyObject * (*getitem)(struct arrayobject *, Py_ssize_t);
int (*setitem)(struct arrayobject *, Py_ssize_t, PyObject *);
#if PY_VERSION_HEX >= 0x03000000
char *formats;
#endif
} arraydescr;
typedef struct arrayobject {
PyObject_HEAD
union {
int ob_size;
unsigned length;
};
union {
char *ob_item;
float *_f;
double *_d;
int *_i;
unsigned *_I;
unsigned char *_B;
signed char *_b;
char *_c;
unsigned long *_L;
long *_l;
short *_h;
unsigned short *_H;
Py_UNICODE *_u;
void *_v;
};
#if PY_VERSION_HEX >= 0x02040000
Py_ssize_t allocated;
#endif
struct arraydescr *ob_descr;
#if PY_VERSION_HEX >= 0x02040000
PyObject *weakreflist; /* List of weak references */
#if PY_VERSION_HEX >= 0x03000000
int ob_exports; /* Number of exported buffers */
#endif
#endif
} arrayobject;
#ifndef NO_NEWARRAY_INLINE
/*
*
* fast creation of a new array - init with zeros
*/
static inline PyObject *
newarrayobject(PyTypeObject *type, Py_ssize_t size, struct arraydescr *descr)
{
arrayobject *op;
size_t nbytes;
if (size < 0) {
PyErr_BadInternalCall();
return NULL;
}
nbytes = size * descr->itemsize;
/* Check for overflow */
if (nbytes / descr->itemsize != (size_t)size) {
return PyErr_NoMemory();
}
op = (arrayobject *) type->tp_alloc(type, 0);
if (op == NULL) {
return NULL;
}
op->ob_descr = descr;
#if !( PY_VERSION_HEX < 0x02040000 )
op->allocated = size;
op->weakreflist = NULL;
#endif
Py_SIZE(op) = size;
if (size <= 0) {
op->ob_item = NULL;
}
else {
op->ob_item = PyMem_NEW(char, nbytes);
if (op->ob_item == NULL) {
Py_DECREF(op);
return PyErr_NoMemory();
}
}
return (PyObject *) op;
}
#else
PyObject *
newarrayobject(PyTypeObject *type, Py_ssize_t size, struct arraydescr *descr);
#endif
/* fast resize (reallocation to the point)
not designed for filing small increments (but for fast opaque array apps) */
static inline int resize(arrayobject *self, Py_ssize_t n)
{
char *item=self->ob_item;
PyMem_RESIZE(item, char, (unsigned)(n * self->ob_descr->itemsize));
if (item == NULL) {
PyErr_NoMemory();
return -1;
}
self->ob_item = item;
self->ob_size = n;
#if PY_VERSION_HEX >= 0x02040000
self->allocated = n;
#endif
return 0;
}
/* suitable for small increments; over allocation 50% ;
Remains non-smart in Python 2.3- ; but exists for compatibility */
static inline int resize_smart(arrayobject *self, Py_ssize_t n)
{
char *item=self->ob_item;
#if PY_VERSION_HEX >= 0x02040000
if (n < self->allocated) {
if (n*4 > self->allocated) {
self->ob_size = n;
return 0;
}
}
Py_ssize_t newsize = n * 3 / 2 + 1;
PyMem_RESIZE(item, char, (unsigned)(newsize * self->ob_descr->itemsize));
if (item == NULL) {
PyErr_NoMemory();
return -1;
}
self->ob_item = item;
self->ob_size = n;
self->allocated = newsize;
return 0;
#else
return resize(self, n) /* Python 2.3 has no 'allocated' */
#endif
}
#endif
/* _ARRAYARRAY_H */
'''
Created on 17 mai 2010
@author: coissac
'''
from obitools.alignment import columnIterator
def iterOnAligment(ali):
pos0=0
pos1=len(ali[1].wrapped)-1
begin0=False
end0=False
begin1=False
end1=False
for nuc0,nuc1 in columnIterator(ali):