Commit a4a854a2 by Eric Coissac

Merge branch 'master' of git.metabarcoding.org:obitools/obitools

Conflicts:
	src/obitools/align/_lcs.ext.4.c

the symbolic link is correctly restored
parents a186bf43 a6daa709
@toto
cgatcgtagctagctagctag
+
IIIIIIIIIIIIIIIIIIIII
@titi
ggtgctagctgctagctgatgc
+
AAAAAAAAAAAAAAAAAAAAAA
_upperbond.ext.1.c
\ No newline at end of file
......@@ -5,29 +5,31 @@ Created on 6 Nov. 2009
'''
#@PydevCodeAnalysisIgnore
from cpython cimport array
from obitools import BioSequence
from _lcs cimport *
from _upperbond cimport *
from _dynamic cimport *
from _dynamic cimport *
from _upperbond import *
cdef class LCS(DynamicProgramming):
def __init__(self):
DynamicProgramming.__init__(self,opengap=0,extgap=0)
property opengap:
def __get__(self):
return self._opengap
property extgap:
def __get__(self):
return self._extgap
cdef double matchScore(self,int h, int v):
return iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
cdef double doAlignment(self) except? 0:
cdef int i # vertical index
cdef int j # horizontal index
......@@ -38,24 +40,24 @@ cdef class LCS(DynamicProgramming):
cdef double scoremax
cdef int path
if self.needToCompute:
self.allocate()
self.reset()
for j in range(1,self.hSeq.length+1):
idx = self.index(j,0)
self.matrix.matrix[idx].score = 0
self.matrix.matrix[idx].path = j
for i in range(1,self.vSeq.length+1):
idx = self.index(0,i)
self.matrix.matrix[idx].score = 0
self.matrix.matrix[idx].path = -i
for i in range(1,self.vSeq.length+1):
for j in range(1,self.hSeq.length+1):
# 1 - came from diagonal
idx = self.index(j-1,i-1)
# print "computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),
......@@ -68,46 +70,46 @@ cdef class LCS(DynamicProgramming):
# 2 - open horizontal gap
idx = self.index(j-1,i)
score = self.matrix.matrix[idx].score
if score > scoremax :
if score > scoremax :
scoremax = score
path = self.matrix.matrix[idx].path
if path >=0:
path+=1
else:
else:
path=+1
# 3 - open vertical gap
idx = self.index(j,i-1)
score = self.matrix.matrix[idx].score
if score > scoremax :
score = self.matrix.matrix[idx].score
if score > scoremax :
scoremax = score
path = self.matrix.matrix[idx].path
if path <=0:
path-=1
else:
path=-1
idx = self.index(j,i)
self.matrix.matrix[idx].score = scoremax
self.matrix.matrix[idx].path = path
self.matrix.matrix[idx].path = path
self.sequenceChanged=False
self.scoreChanged=False
idx = self.index(self.hSeq.length,self.vSeq.length)
return self.matrix.matrix[idx].score
cdef void backtrack(self):
#cdef list path=[]
cdef int i
cdef int j
cdef int j
cdef int p
self.doAlignment()
i=self.vSeq.length
j=self.hSeq.length
self.path=allocatePath(i,j,self.path)
while (i or j):
p=self.matrix.matrix[self.index(j,i)].path
self.path.path[self.path.length]=p
......@@ -120,25 +122,23 @@ cdef class LCS(DynamicProgramming):
i+=p
else:
j-=p
#path.reverse()
#reversePath(self.path)
self.path.hStart=0
self.path.vStart=0
#return 0,0,path
ALILEN=0
MAXLEN=1
MINLEN=2
def lenlcs(seq1,seq2,double minimum=0.,bint normalized=False, int reference=ALILEN):
cdef double lcs
cdef bytes se1=bytes(str(seq1))
cdef bytes se2=bytes(str(seq2))
cdef int l1 = len(seq1)
cdef int l2 = len(seq2)
cdef array.array[unsigned char] w1
cdef array.array[unsigned char] w2
cdef int o1
cdef int o2
cdef int wordcount
......@@ -146,11 +146,15 @@ def lenlcs(seq1,seq2,double minimum=0.,bint normalized=False, int reference=ALIL
cdef bint possible
cdef bint large
cdef array.array[unsigned char] w1
cdef array.array[unsigned char] w2
cdef char *s1
cdef char *s2
s1=se1
s2=se2
if min(l1,l2) < 8:
lcsali = LCS()
lcsali.seqA = seq1
......@@ -162,8 +166,8 @@ def lenlcs(seq1,seq2,double minimum=0.,bint normalized=False, int reference=ALIL
w1 = seq1.word4table
o1 = seq1.word4over
else:
w1 = newtable()
o1 = buildTable(s1,w1._B,&wordcount)
w1 = newtable()
o1 = buildTable(s1,w1.data.as_uchars,&wordcount)
if isinstance(seq1, BioSequence):
seq1.word4table=w1
seq1.word4over=o1
......@@ -171,14 +175,15 @@ def lenlcs(seq1,seq2,double minimum=0.,bint normalized=False, int reference=ALIL
w2 = seq2.word4table
o2 = seq2.word4over
else:
w2 = newtable()
o2 = buildTable(s2,w2._B,&wordcount)
w2 = newtable()
o2 = buildTable(s2,w2.data.as_uchars,&wordcount)
if isinstance(seq2, BioSequence) :
seq2.word4table=w2
seq2.word4over=o2
large = reference==ALILEN or reference==MAXLEN
possible = ispossible(l1, w1._B, o1,
l2, w2._B, o2,
possible = ispossible(l1, w1.data.as_uchars, o1,
l2, w2.data.as_uchars, o2,
minimum,normalized,large)
if possible:
lcs = fastLCSScore(s1,s2,NULL,&alilength)
......@@ -186,7 +191,7 @@ def lenlcs(seq1,seq2,double minimum=0.,bint normalized=False, int reference=ALIL
lcs = -1.0
else:
lcs = fastLCSScore(s1,s2,NULL,&alilength)
if lcs >= 0 and normalized:
if reference==ALILEN:
if alilength > 0:
......@@ -197,7 +202,5 @@ def lenlcs(seq1,seq2,double minimum=0.,bint normalized=False, int reference=ALIL
lcs /=max(l1,l2)
elif reference==MINLEN:
lcs /=min(l1,l2)
return lcs,alilength
return lcs,alilength
arrayarray.h
\ No newline at end of file
_sse.h
from array import array
cimport array
from cpython cimport array
cdef extern from *:
ctypedef char* const_char_ptr "const char*"
cdef import from "_upperbond.h":
int buildTable(const_char_ptr sequence, unsigned char *table, int *count)
int compareTable(unsigned char *t1, int over1, unsigned char* t2, int over2)
......@@ -13,5 +12,5 @@ cdef import from "_upperbond.h":
bint ispossible(int len1, unsigned char *t1, int over1,
int len2, unsigned char* t2, int over2,
double minimum, bint normalized, bint large)
cdef array.array[unsigned char] newtable()
......@@ -5,9 +5,7 @@ Created on 6 Nov. 2009
'''
#@PydevCodeAnalysisIgnore
from _dynamic cimport *
from array import array
cimport array
from _dynamic cimport *
from obitools import BioSequence
from _upperbond cimport *
......@@ -25,10 +23,10 @@ def indexSequences(seq,double threshold=0.95):
cdef int overflow
cdef int wordcount
cdef int wordmin
table = newtable()
table = newtable()
sequence=bytes(str(seq))
overflow = buildTable(sequence,table._B,&wordcount)
overflow = buildTable(sequence,table._BYTES,&wordcount)
wordmin = threshold4(wordcount,threshold)
return (table,overflow,wordmin)
......@@ -36,15 +34,15 @@ cpdef int countCommonWords(array.array table1,
int overflow1,
array.array table2,
int overflow2):
return compareTable(table1._B,overflow1,
table2._B,overflow2)
return compareTable(table1._BYTES,overflow1,
table2._BYTES,overflow2)
cpdef bint isLCSReachable(object seq1,
object seq2,
double minimum,
bint normalized=False,
bint normalized=False,
bint large=True):
cdef bytes se1
cdef bytes se2
cdef int l1 = len(seq1)
......@@ -65,29 +63,28 @@ cpdef bint isLCSReachable(object seq1,
else:
se1=bytes(str(seq1))
s1=se1
w1 = newtable()
o1 = buildTable(s1,w1._B,&wordcount)
w1 = newtable()
o1 = buildTable(s1,w1._BYTES,&wordcount)
if isinstance(seq1, BioSequence):
seq1.word4table=w1
seq1.word4over=o1
if isinstance(seq2, BioSequence) and seq2.word4table is not None:
w2 = seq2.word4table
o2 = seq2.word4over
else:
se2=bytes(str(seq2))
s2=se2
w2 = newtable()
o2 = buildTable(s2,w2._B,&wordcount)
w2 = newtable()
o2 = buildTable(s2,w2._BYTES,&wordcount)
if isinstance(seq2, BioSequence) :
seq2.word4table=w2
seq2.word4over=o2
possible = ispossible(l1, w1._B, o1,
l2, w2._B, o2,
minimum,normalized,large)
return possible
possible = ispossible(l1, w1._BYTES, o1,
l2, w2._BYTES, o2,
minimum,normalized,large)
return possible
# cython: profile=False
from cpython cimport array
from obitools.tools.solexapairend import iterOnAligment
from array import array
from obitools import NucSequence
cimport array
cdef class IterOnConsensus:
......@@ -20,7 +18,7 @@ cdef class IterOnConsensus:
cdef int __seqBInsertion
cdef int __seqADeletion
cdef int __seqBDeletion
cdef object __ioa
cdef object __ioa
cdef bint __firstSeqB
def __cinit__(self,ali):
......@@ -34,7 +32,7 @@ cdef class IterOnConsensus:
self.__seqBInsertion=0
self.__seqADeletion=0
self.__seqBDeletion=0
self.__ioa = iterOnAligment(self._ali)
self.__firstSeqB=False
......@@ -72,7 +70,7 @@ cdef class IterOnConsensus:
def get_seqBDeletion(self):
return self.__seqBDeletion
def __next__(self):
cdef bytes snuc0
cdef bytes snuc1
......@@ -83,12 +81,12 @@ cdef class IterOnConsensus:
cdef double score1
cdef double h0
cdef double h1
while(1):
snuc0,score0,snuc1,score1 = self.__ioa.next()
nuc0=snuc0
nuc1=snuc1
if nuc0[0]==nuc1[0]:
if nuc0[0]==nuc1[0]:
if nuc1[0]!=dash[0]:
self.__firstSeqB=True
self.__seqABMatch+=1
......@@ -98,7 +96,7 @@ cdef class IterOnConsensus:
h0 = score0 * (1-score1/3)
h1 = score1 * (1-score0/3)
if h0 < h1:
if nuc0[0]!=dash[0]:
self.__seqBSingle=0
if nuc1[0]==dash[0]:
......@@ -126,10 +124,10 @@ cdef class IterOnConsensus:
self.__seqBSingle=0
self.__seqBDeletion+=1
def __iter__(self):
return self
seqASingle = property(get_seqASingle, None, None, "direct's docstring")
seqBSingle = property(get_seqBSingle, None, None, "reverse's docstring")
seqABMatch = property(get_seqABMatch, None, None, "idem's docstring")
......@@ -150,7 +148,7 @@ def buildConsensus(ali):
cdef bytes nuc
cdef double score
cdef bytes sseq
if len(ali[0])>999:
raise AssertionError,"To long alignemnt"
......@@ -161,13 +159,13 @@ def buildConsensus(ali):
quality[i]=score
aseq[i]=cnuc[0]
i+=1
aseq[i]=0
sseq=aseq
seq=NucSequence(ali[0].wrapped.id+'_CONS',sseq,**ali[0].wrapped.getTags())
seq.quality=array.array('d',[quality[j] for j in range(i)])
if hasattr(ali, "direction"):
seq['direction']=ali.direction
if hasattr(ali, "counter"):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment