extract_ref16S.sh 3.12 KB
Newer Older
1 2 3 4
#!/bin/bash
#


5
  gawk 'function printfasta(seq) {                                            \
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
             seqlen=length(seq);                                             \
             for (i=1; i <= seqlen; i+=60)                                   \
                 print substr(seq,i,60);                                    \
             }                                                               \
       function comp(seq) {                   						                   \
              "echo "seq" | tr acgtACGT tgcaTGCA " | getline res; 	         \
              close("echo "seq" | tr acgtACGT tgcaTGCA ");                   \
              return res;                        						                 \
             }                                      						             \
       function rev(seq) {                    						                   \
              "echo "seq" | rev " | getline res; 						                 \
              close("echo "seq" | rev ");                                    \
              return res;                        						                 \
             }                                      						             \
       function revcomp(seq) {                						                   \
              res=rev(comp(seq));                						                 \
              return res;                        						                 \
             }                                      						             \
                                                                             \
       /^LOCUS / {AC=$2; sequence=""; seqon=0; FROM="";TO=""}                \
       /^     rRNA  / {LOCUS=$2; STRAND=1}                             \
       /^     rRNA  / && /complement/ {STRAND=0;                       \
                                             sub("complement\\(","",LOCUS);  \
                                             sub("\\)","",LOCUS);    \
                                            }                      \
       /16S/ {split(LOCUS,POS,".");         \
                                     FROM=POS[1];                  \
                                     TO=POS[3];                    \
                                     LENGTH=TO-FROM+1              \
                                    }                              \
       /^ORIGIN/ {seqon=1}                                         \
       /^ *[1-9][0-9]* [a-z ]+$/ && seqon {seq=$2 $3 $4 $5 $6 $7;  \
                                           gsub("[^acgt]","n",seq);\
                                           sequence=sequence seq   \
                                          }                        \
       /^\/\// && FROM    \
42
                        {print ">RRNA16S_"AC" Strand="STRAND";",       \
43 44 45 46 47 48 49 50
                               "cut="FROM".."TO";",                \
                               "seq_length="LENGTH";";             \
                         SS=substr(sequence,FROM,LENGTH);          \
                         if (! STRAND)                             \
                           SS=revcomp(SS);                         \
                         printfasta(SS);                           \
       }             \
      ' $*