Commit 639cbbdc by Alain Viari

added test for chlorodb

parent 55fd2882
......@@ -35,6 +35,7 @@ cd $DB_BASE/info
#
if (! -e $DB_BASE/parameters.sh) then
Notify "no $DB_BASE/parameters.sh found : creating one for you"
@ n = `find $DB_BASE/download -depth 1 -type f -print | wc -l`
@ cor_cutoff = $n / 2
@ atg_cutoff = $n / 10
......@@ -70,7 +71,9 @@ if (! -e $DB_BASE/parameters.sh) then
echo "set DUST_PMAX = 1e-6" >> $DB_BASE/parameters.sh
echo "set DUST_IDMIN = 30" >> $DB_BASE/parameters.sh
echo "set DUST_SIZMIN = 10" >> $DB_BASE/parameters.sh
Cat $DB_BASE/parameters.sh
else
Notify "DB parameters : $DB_BASE/parameters.sh"
endif
source $DB_BASE/parameters.sh
......@@ -198,12 +201,34 @@ if (-e db.core.pat.txt) then
Notify "Making core DB (take some time... please wait)"
$PROG_DIR/subdb/go_subdb.sh db.prot.fst db.core.pat.txt \
$CORE_DELTA $CORE_COVMIN $CORE_PMAX $CORE_IDMIN $CORE_SIZMIN
# add discarded entries into shell
if (-e db.core.pat.db/Annot.lst) then
sort db.core.pat.txt > A_$$
sort db.core.pat.db/Annot.lst > B_$$
join -a1 A_$$ B_$$ | awk '(NF==3) {print $0}' > C_$$
set n = `cat C_$$ | wc -l`
Notify "transfering $n discarded entries to shell"
cat C_$$ >> db.shell.pat.txt
\rm -f ?_$$
endif
endif
if (-e db.shell.pat.txt) then
Notify "Making shell DB (take some time... please wait)"
$PROG_DIR/subdb/go_subdb.sh db.prot.fst db.shell.pat.txt \
$SHEL_DELTA $SHEL_COVMIN $SHEL_PMAX $SHEL_IDMIN $SHEL_SIZMIN
# add discarded entries into dust
if (-e db.shell.pat.db/Annot.lst) then
sort db.shell.pat.txt > A_$$
sort db.shell.pat.db/Annot.lst > B_$$
join -a1 A_$$ B_$$ | awk '(NF==3) {print $0}' >> C_$$
set n = `cat C_$$ | wc -l`
Notify "transfering $n discarded entries to dust"
cat C_$$ >> db.dust.pat.txt
\rm -f ?_$$
endif
endif
if (-e db.dust.pat.txt) then
......
......@@ -27,7 +27,7 @@ set Delta = 0.5
set Covmin = 30
set Pmax = 1e-6
set Idmin = 30
set Sizmin = 5
set Sizmin = 10
if ($#Argv > 0) then
set Delta = $Argv[1]; Shift
......@@ -91,14 +91,17 @@ set noms = `awk '{print $1}' $PatFile`
foreach nom ($noms)
set pat = `egrep "^$nom " $PatFile | awk '{print $2}'`
$AwkCmd -f $LIB_DIR/db.filter.pat.awk -v PAT="$pat" P_$$ > D_$$/$nom.fst
Report D_$$/$nom.fst "pattern_filter"
set n = `egrep '^>' D_$$/$nom.fst | wc -l`
Notify " pattern : $nom : $n"
Report D_$$/$nom.fst "pattern_filter"
if ($n <= $Sizmin) \rm -f D_$$/$nom.fst
if ($n < $Sizmin) \rm -f D_$$/$nom.fst
end
set ok = `ls D_$$ | wc -l`
if ($ok == 0) goto fin
if ($ok == 0) then
Warning "no entries found after pattern selection (increase Sizmin = $Sizmin)"
goto fin
endif
#
# select by length
......@@ -115,12 +118,14 @@ foreach f (D_$$/*.fst)
Report E_$$/$nom.fst "length_filter"
set n = `egrep '^>' E_$$/$nom.fst | wc -l`
Notify " length filter : $nom : $n"
if ($n <= $Sizmin) \rm -f E_$$/$nom.fst
if ($n < $Sizmin) \rm -f E_$$/$nom.fst
end
set ok = `ls E_$$ | wc -l`
if ($ok == 0) goto fin
if ($ok == 0) then
Warning "no entries found after length selection (increase Sizmin = $Sizmin)"
goto fin
endif
#
# select by similarity
......@@ -152,12 +157,15 @@ foreach f (E_$$/*.fst)
set n = `egrep '^>' F_$$/$nom.fst | wc -l`
Notify " blast filter : $nom : $n"
if ($n <= $Sizmin) \rm -f F_$$/$nom.fst
if ($n < $Sizmin) \rm -f F_$$/$nom.fst
end
set ok = `ls D_$$ | wc -l`
if ($ok == 0) goto fin
set ok = `ls F_$$ | wc -l`
if ($ok == 0) then
Warning "no entries found after similarity selection (increase Sizmin = $Sizmin)"
goto fin
endif
#
# annotations
......@@ -187,7 +195,14 @@ Notify "copy $n files to $OutDir"
#
fin:
Notify "output directory : $OutDir"
set n = `find $OutDir -name \*.fst -print | wc -l`
if ($n == 0) then
Warning "no entries found : removing $OutDir"
\rm -r $OutDir
else
Notify "output directory : $OutDir : $n entries"
endif
\rm -r ?_$$
......
#!/bin/csh -f
setenv ORG_HOME `dirname $0`/../../../../..
source $ORG_HOME/scripts/csh_init.sh
echo "+ testing go_chlorodb.sh"
\cp -r test.db TMP
`dirname $0`/../go_chlorodb.sh TMP
find TMP -name \*.fst -print | sort > test.bak
diff -q test.bak test.ref >& /dev/null
set stat = $status
if ($stat == 0) then
echo "+ $VTC[3]test Ok$VTC[1]"
\rm -r TMP test.bak
else
echo "* $VTC[2]test Failure$VTC[1]"
endif
exit $stat
# sourced file
#
# warning : these parameters are just for speeding up test
# (SIZMIN is way too high)
#
set CORE_NCDS_CUTOFF = 10
set CORE_START_ATG_CUTOFF = 5
set CORE_START_DFT_CUTOFF = 5
set CORE_START_OTH_CUTOFF = 5
set CORE_STOP_CUTOFF = 5
set CORE_SPLICE_CUTOFF = 5
set SHEL_NCDS_CUTOFF = 9
set CORE_DELTA = Inf
set CORE_COVMIN = 30
set CORE_PMAX = 1e-6
set CORE_IDMIN = 30
set CORE_SIZMIN = 11
set SHEL_DELTA = 0.5
set SHEL_COVMIN = 30
set SHEL_PMAX = 1e-6
set SHEL_IDMIN = 30
set SHEL_SIZMIN = 10
set DUST_DELTA = 0.5
set DUST_COVMIN = 30
set DUST_PMAX = 1e-6
set DUST_IDMIN = 30
set DUST_SIZMIN = 9
TMP/core/rpl2.fst
TMP/core/rpl23.fst
TMP/core/rps12.fst
TMP/core/rps19.fst
TMP/core/rps7.fst
TMP/dust/atpa.fst
TMP/dust/atpb.fst
TMP/dust/atpe.fst
TMP/dust/atpf.fst
TMP/dust/atph.fst
TMP/dust/atpi.fst
TMP/dust/ccsa.fst
TMP/dust/petb.fst
TMP/dust/petg.fst
TMP/dust/psaa.fst
TMP/dust/psab.fst
TMP/dust/psac.fst
TMP/dust/psba.fst
TMP/dust/psbb.fst
TMP/dust/psbc.fst
TMP/dust/psbd.fst
TMP/dust/psbe.fst
TMP/dust/psbf.fst
TMP/dust/psbh.fst
TMP/dust/psbi.fst
TMP/dust/psbj.fst
TMP/dust/psbk.fst
TMP/dust/psbl.fst
TMP/dust/psbn.fst
TMP/dust/psbt.fst
TMP/dust/rpl14.fst
TMP/dust/rpl32.fst
TMP/dust/rpob.fst
TMP/dust/rpoc1.fst
TMP/dust/rpoc2.fst
TMP/dust/ycf4.fst
TMP/info/db.prot.fst
TMP/shell/rbcl.fst
TMP/shell/rpl16.fst
TMP/shell/rpl20.fst
TMP/shell/rpl36.fst
TMP/shell/rps11.fst
TMP/shell/rps14.fst
TMP/shell/rps2.fst
TMP/shell/rps3.fst
TMP/shell/rps4.fst
TMP/shell/rps8.fst
......@@ -98,6 +98,8 @@ alias Cat 'awk -v D="`date '"'"'+%Y-%m-%d %H:%M:%S'"'"'`" '"'"'{print D " [OA FI
alias Notify 'Cout "INFO " \!:*'
alias Warning 'Cout "WARN " \!:*'
alias Error 'Cout ERROR \!:2-*; Exit \!:1'
alias Exit 'set Stat = \!:1; Debug "<--- $0 [$Stat]"; exit \!:1'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment