Commit 284096bf authored by Eric Coissac's avatar Eric Coissac

Merge branch 'master' of git.metabarcoding.org:org-asm/org-annotate

Conflicts:
	org-annotate.sh
parents abd6112a e30a53b5
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -31,7 +31,7 @@ set GenoName = `basename $GenoFile:r`
set ProtFile = $Argv[1]; Shift
set ProtDir = `dirname $ProtFile`
set ProtName = `basename $ProtFile:r`
set ProtName = `basename $ProtFile | $AwkCmd -F'.' '{print $1}'`
set ProtType = `basename $ProtDir`
NeedFile $GenoFile
......@@ -122,7 +122,7 @@ endif
if ($PASS1_SPEEDUP != 0) then
tcsh -f $PROG_DIR/do_filterbx.sh $GenoFile $ProtFile \
tcsh -f $PROG_DIR/do_filterbx.csh $GenoFile $ProtFile \
$PASS1_BLASTX_FILTER_IDMIN \
$PASS1_BLASTX_FILTER_NBMIN \
$PASS1_BLASTX_FILTER_NBMAX > D_$$
......
......@@ -55,10 +55,10 @@ endif
foreach dir ("core" "shell" "dust")
if (-d $DbRoot/$dir) then
set fams = `ls $DbRoot/$dir/*.fst`
set fams = `ls $DbRoot/$dir/*.clean.fst`
Notify "running pass1:$dir exonerate of $Genome on $DbRoot"
foreach f ($fams)
tcsh -f $PROG_DIR/do_exonerate.sh $Fasta $f $DbRoot/models $temp
tcsh -f $PROG_DIR/do_exonerate.csh $Fasta $f $DbRoot/models $temp
end
endif
end
......
......@@ -3,6 +3,8 @@
#
# -v MAX_SPAN ALLOW_STOP EXCLUDE
#
#
#
BEGIN {
PROCINFO["sorted_in"] = "@ind_num_asc"
......
......@@ -6,6 +6,9 @@ source "${THIS_DIR}/../../../scripts/bash_init.sh"
CORELIB="${CDS_DATA_DIR}/chlorodb/core"
CDHIT_ID=0.7
CDHIT_DELTA=0.8
function clusterize() {
......@@ -17,12 +20,15 @@ function clusterize() {
rm -rf "${prot}"
mkdir -p "${prot}"
cd-hit -i "${fastain}" \
-o "${prot}/${cdhitout}" \
-c 0.6 -G 1 \
-g 1 -s 0.8 \
-b 150 -p 1 \
-d 0 -n 3
cd-hit -i "${fastain}" \
-o "${prot}/${cdhitout}" \
-c ${CDHIT_DELTA} \
-G 1 \
-g 1 \
-aL 0.95 \
-s ${CDHIT_ID} \
-b 350 -p 1 \
-d 0 -n 3
fasta1line "${fastain}" > "${prot}/${fasta1}"
......@@ -35,6 +41,8 @@ function clusterize() {
filename=prot".cluster."cluster".ids"; \
print $3 >> filename ; \
close(filename) }' "${cdhitout}.clstr"
rm -f "../$prot.clean.fst"
for ids in *.cluster.*.ids ; do
cluster=$(printf "%03d" $(echo "${ids}" | $AwkCmd -F'.' '{print $3}'))
......@@ -52,10 +60,10 @@ function clusterize() {
egrep -v -- '^--$' > "$alignment"
fi
if (( size >= 10 )) ; then
if (( size >= 5 )) ; then
egrep -f "$ids" -A 1 "${fasta1}" | \
egrep -v -- '^--$' | \
formatfasta >> "$prot.clean.fst"
formatfasta >> "../$prot.clean.fst"
fi
rm -f "$ids"
......@@ -73,6 +81,8 @@ function clusterize() {
pushd $CORELIB
rm -rf *.clean.fst
for prot in *.fst ; do
prot="${prot/.fst/}"
clusterize "$prot"
......
#!/usr/bin/env python
import sys
from math import lgamma
from math import log
data = open(sys.argv[1])
repeats = open(sys.argv[2])
......@@ -8,6 +10,10 @@ repeats = open(sys.argv[2])
chloro = {'LSC' : [], 'SSC' : [] }
chlorosize =0
def lpbinom(x,n,p):
lprob = log(p) * x + log(1-p) * (n-x) + lgamma(n+1) - lgamma(x+1) - lgamma(n-x+1)
return lprob
# We scan the blast matches:
# We build a vector with one position per base pair counting the matches
......@@ -102,6 +108,7 @@ for line in repeats:
o_ssc /= o
score = ((c_lsc - c_ssc) ** 2 + (o_lsc - o_ssc) ** 2) / 2.0
# pvalue=
# print >>sys.stderr,"c.lsc = %f c.ssc = %f o.lsc = %f o.ssc = %f score = %6.4f (len=%d)" % (c_lsc,c_ssc,o_lsc,o_ssc,score,len1)
if (score >= scoreMax) and ((len1 > len1Max) or (len2 > len2Max)):
......
......@@ -151,7 +151,7 @@ pushTmpDir ORG.organnot
loginfo "Done."
loginfo "Annotating the CDS..."
tcsh -f ${PROG_DIR}/detectors/cds/bin/go_cds.sh "${RESULTS}.norm.fasta" >> "${RESULTS}.annot"
tcsh -f ${PROG_DIR}/detectors/cds/bin/go_cds.csh "${RESULTS}.norm.fasta" >> "${RESULTS}.annot"
loginfo "Done."
if (( partial == 0 )) ; then
......
......@@ -129,11 +129,13 @@ function fasta1line {
}
function formatfasta {
$AwkCmd 'function printfasta(seq) { \
$AwkCmd 'function printfasta(seq) { \
seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \
} \
(seq && /^>/) { printfasta(seq); \
seq=""} \
/^>/ { print $0 } \
! /^>/ { seq=seq $0 } \
END { printfasta(seq)}' "${1}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment