go_compare.sh 2.5 KB
Newer Older
alain viari's avatar
alain viari committed
1 2 3 4 5
#!/bin/csh -f
#
# compare CDS annotation in reference file to predicted file
# annotation file are in Genbank/Embl format
#
6
# usage: go_compare.sh reference predicted
alain viari's avatar
alain viari committed
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#
# output on stdout
#
unsetenv ORG_SOURCED

setenv ORG_HOME `dirname $0`/../../../..
source $ORG_HOME/scripts/csh_init.sh

NeedArg 2

set RefFile = $Argv[1]
set PrdFile = $Argv[2]

NeedFile $RefFile
NeedFile $PrdFile

set RefType = $RefFile:e
set PrdType = $PrdFile:e

26 27 28 29
if ((! -e $LIB_DIR/$RefType.oneliner.awk) || (! -e $LIB_DIR/$PrdType.oneliner.awk)) then
  Error 1 "file extension should be 'gbk' or 'embl'"
endif

alain viari's avatar
alain viari committed
30 31 32 33 34 35 36
#
# parse ref and prediction
#

Notify "get genome info from $RefFile"

$AwkCmd -f $LIB_DIR/$RefType.oneliner.awk $RefFile |\
37
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$RefType.cds_short.awk > R_$$
alain viari's avatar
alain viari committed
38 39 40 41

Notify "get prediction info from $PrdFile"

$AwkCmd -f $LIB_DIR/$PrdType.oneliner.awk $PrdFile |\
42
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$PrdType.cds_short.awk > P_$$
alain viari's avatar
alain viari committed
43 44 45 46 47 48 49

#
# compare
#

Notify "compare bank to predictions"

alain viari's avatar
alain viari committed
50 51
$AwkCmd -f $LIB_DIR/libnws.awk       \
        -f $LIB_DIR/compare.cds.awk  \
alain viari's avatar
alain viari committed
52 53
        R_$$ P_$$ > S_$$

54 55
egrep "^MATCH" S_$$ | grep "MISSED" | awk '{print $2}' | sort | uniq > D_$$

alain viari's avatar
alain viari committed
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
# base statistics

egrep "^MATCH" S_$$ | tr '.' ' ' | awk '{print $5}' |\
sort | uniq -c | sort -nr | awk '{print "#",$0}' > U_$$

# add chlorodb/core statistics

if (-d $DATA_DIR/cds/chlorodb/core) then

  ls $DATA_DIR/cds/chlorodb/core/*.fst |\
  sed -e 's@^.*core/@@1' | sed -e 's/.fst$//g' |\
  sort > C_$$

  join D_$$ C_$$ > E_$$
  @ nc = `cat C_$$ | wc -l`
  @ mt = `cat D_$$ | wc -l`
  @ mc = `cat E_$$ | wc -l`
  @ mn = $mt - $mc
  set LC = `cat E_$$`

  echo "#"                                   >> U_$$
77 78
  echo "# $mc MISSED in Core ($LC)"          >> U_$$
  echo "# $mn MISSED not in Core"            >> U_$$
alain viari's avatar
alain viari committed
79 80 81
  echo "#"                                   >> U_$$
endif

82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104

# add all chloro statistics

find $DATA_DIR/cds/chlorodb -maxdepth 1 -name \*.fst -print |\
  grep -v info |\
  sed -e 's@^.*/@@1' | sed -e 's/.fst$//g' |\
  sort > C_$$

join D_$$ C_$$ > E_$$
@ nc = `cat C_$$ | wc -l`
@ mt = `cat D_$$ | wc -l`
@ mc = `cat E_$$ | wc -l`
@ mn = $mt - $mc
set LC = `cat E_$$`

echo "#"                                   >> U_$$
echo "# $mc MISSED in ChloroDB ($LC)"      >> U_$$
echo "# $mn MISSED not in ChloroDB"        >> U_$$
echo "#"                                   >> U_$$

# add detailled resutls

echo ""                                    >> U_$$
alain viari's avatar
alain viari committed
105 106
cat S_$$ >> U_$$

107 108 109
# print results


alain viari's avatar
alain viari committed
110 111 112 113 114 115 116 117 118
cat U_$$

#
# end
#

(\rm -f ?_$$) >> /dev/null

Exit 0