Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
ORG.Annotate
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
11
Issues
11
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
ORG.Asm
ORG.Annotate
Commits
f2a75cf9
Commit
f2a75cf9
authored
Oct 11, 2015
by
Eric Coissac
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First version of the tRNA detector and of the global organnot.sh script
parent
443a0446
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
175 additions
and
133 deletions
+175
-133
annotate_plastid.sh
annotate_plastid.sh
+0
-96
detectors/ir/bin/go_ir.sh
detectors/ir/bin/go_ir.sh
+1
-1
detectors/normalize/bin/go_normalize.sh
detectors/normalize/bin/go_normalize.sh
+2
-0
detectors/normalize/lib/lookforIR.lib.sh
detectors/normalize/lib/lookforIR.lib.sh
+5
-3
detectors/normalize/tools/buildSCDB.sh
detectors/normalize/tools/buildSCDB.sh
+4
-4
detectors/trna/bin/go_trna.sh
detectors/trna/bin/go_trna.sh
+42
-0
detectors/trna/lib/aragorn_wrapper.awk
detectors/trna/lib/aragorn_wrapper.awk
+49
-21
organnote.sh
organnote.sh
+47
-0
scripts/bash_init.sh
scripts/bash_init.sh
+25
-8
No files found.
annotate_plastid.sh
deleted
100755 → 0
View file @
443a0446
#!/bin/bash
#
#
export
ORGANNOT_HOME
=
`
dirname
$0
`
REPSEEK
=
${
ORGANNOT_HOME
}
/repseek
SUMATRA
=
${
ORGANNOT_HOME
}
/sumatra
ARAGORN
=
${
ORGANNOT_HOME
}
/aragorn
WRAPARAGORN
=
${
ORGANNOT_HOME
}
/aragorn_wrapper.awk
ECOFIND
=
${
ORGANNOT_HOME
}
/ecofind
function
annotateCAU
{
QUERY
=
"
$$
.query.fasta"
echo
$1
|
sed
's/&/ /'
|
tr
'@'
'\n'
>
${
QUERY
}
${
SUMATRA
}
-d
-n
${
QUERY
}
$2
2> /dev/null |
\
awk
' {n[$2]+=1;d[$2]+=$3} \
END {for (i in n) \
print i, n[i],d[i], d[i]/n[i]\
}'
|
\
sort
-rnk4
|
\
egrep
'^trn(I|M|fM)'
|
\
tail
-1
|
\
awk
'{print $1,$NF}'
rm
-rf
${
QUERY
}
}
function
gffTRNA
{
${
ARAGORN
}
-w
-io
-seq
$3
|
awk
-v
gid
=
${
1
}
-f
${
WRAPARAGORN
}
}
# s'alimente avec un fichier.fasta
# $3 : nb de caractere du fichier, t : nb de caractere du titre,
# $1+1 : nb de retour chariot du fichier
function
seqlength
{
cat
$1
|
\
wc
|
\
awk
-v
t
=
"
`
head
-1
$1
|
wc
-c
`
"
'{print $3 - t - $1 + 1}'
}
# recupere les informations issues du programme repseek avec l'origine des deux
# IR et leur taille
function
lookforIR
{
${
REPSEEK
}
-c
-p
0.001
$1
|
\
grep
'Distant.inv'
|
\
sort
-n
-k4
|
\
tail
-1
|
\
awk
'{print $7}'
|
\
sed
's/-/ /g'
}
# recupere le nom de la sequence analyse
function
seqName
{
head
-n1
$1
|
\
awk
'{print $1}'
|
\
sed
's/^>//'
|
\
sed
-E
's/.*\|([^|]+)\|/\1/'
}
# cree un resume du fichier analyse au format gff
# ex : GFF (NC_*** Repseek IR1 start end . + . )
function
gffIR
{
lseq
=
$2
nom
=
$1
lookforIR
$3
|
\
awk
-v
nom
=
"
$nom
"
-v
lseq
=
"
$lseq
"
\
'BEGIN {OFS="\t"} \
{ startIR1=$1; \
startIR2=$2; \
endIR1=startIR1 + $3 -1; \
endIR2=startIR2 + $3 -1; \
startSSC=1; \
endSSC=startIR1-1; \
startLSC=endIR1+1; \
endLSC=startIR2-1; \
\
print nom,"RepSeek","misc_feature",startSSC,endSSC,"\.","+","\.","ID=SSC;note=small single copy region";\
print nom,"RepSeek","repeat_region",startIR1,endIR1,"\.","+","\.","ID=IRA;note=inverted repeat A";\
print nom,"RepSeek","misc_feature",startLSC,endLSC,"\.","+","\.","ID=LSC;note=large single copy region";\
print nom,"RepSeek","repeat_region",startIR2,endIR2,"\.","-","\.","ID=IRB;note=inverted repeat B";\
}'
}
echo
"##gff-version 3"
genome
=
$1
genome_name
=
`
seqName
$1
`
genome_length
=
`
seqlength
$1
`
gffIR
${
genome_name
}
${
genome_length
}
${
genome
}
|
grep
-v
'^ *$'
gffTRNA
${
genome_name
}
${
genome_length
}
${
genome
}
|
grep
-v
'^ *$'
detectors/ir/bin/go_ir.sh
View file @
f2a75cf9
...
...
@@ -61,4 +61,4 @@ pushTmpDir ORG.ir
popTmpDir
exit
0
exit
0
\ No newline at end of file
detectors/normalize/bin/go_normalize.sh
View file @
f2a75cf9
...
...
@@ -27,12 +27,14 @@
SCRIPT_DIR
=
"
$(
dirname
${
BASH_SOURCE
[0]
}
)
"
source
${
SCRIPT_DIR
}
/../lib/lookforIR.lib.sh
ORG_DEBUG
=
1
pushTmpDir ORG.normalize
tmpfasta1
=
"tmp_
$$
_1.fasta"
tmpfasta2
=
"tmp_
$$
_2.fasta"
logdebug
"Running on :
$QUERY
"
loginfo
"Computing the genome size..."
genome_length
=
$(
seqlength
$QUERY
)
...
...
detectors/normalize/lib/lookforIR.lib.sh
View file @
f2a75cf9
...
...
@@ -47,7 +47,9 @@ function lookForIR {
}
SCDB
=
"
${
IR_DATA_DIR
}
/SC_RefDB"
QUERY
=
"
${
CALL_DIR
}
/
$1
"
openLogFile
"
${
QUERY
/.*/
}
.log"
if
[[
!
"
$1
"
=
~ ^/
]]
;
then
QUERY
=
"
${
CALL_DIR
}
/
$1
"
else
QUERY
=
"
$1
"
fi
detectors/normalize/tools/buildSCDB.sh
View file @
f2a75cf9
...
...
@@ -31,7 +31,7 @@ pushTmpDir ORG.buildSCDB
loginfo
"Building LSC coorientation graph..."
${
PROG_DIR
}
/coorienteSC.sh LSC.fasta 20000
${
LOGFILE
}
>
LSC.tgf
${
PROG_DIR
}
/coorienteSC.sh LSC.fasta 20000
${
ORG_
LOGFILE
}
>
LSC.tgf
${
PROG_DIR
}
/cc.py LSC.tgf
>
LSC.cc
loginfo
" -->
$(
awk
'{print $1}'
LSC.cc |
uniq
|
wc
-l
)
connected componants"
loginfo
"Done"
...
...
@@ -73,7 +73,7 @@ pushTmpDir ORG.buildSCDB
loginfo
"Checking LCS homogeneity..."
${
PROG_DIR
}
/coorienteSC.sh LSC.direct.fasta 20000
${
LOGFILE
}
>
LSC_RefDB.tgf
${
PROG_DIR
}
/coorienteSC.sh LSC.direct.fasta 20000
${
ORG_
LOGFILE
}
>
LSC_RefDB.tgf
${
PROG_DIR
}
/cc.py LSC_RefDB.tgf
>
LSC_RefDB.cc
NCC
=
$(
awk
'{print $1}'
LSC_RefDB.cc |
uniq
|
wc
-l
)
if
((
$NCC
==
1
))
;
then
...
...
@@ -103,7 +103,7 @@ pushTmpDir ORG.buildSCDB
loginfo
"Building SSC coorientation graph..."
${
PROG_DIR
}
/coorienteSC.sh SSC.fasta 5000
${
LOGFILE
}
>
SSC.tgf
${
PROG_DIR
}
/coorienteSC.sh SSC.fasta 5000
${
ORG_
LOGFILE
}
>
SSC.tgf
${
PROG_DIR
}
/cc.py SSC.tgf
>
SSC.cc
loginfo
" -->
$(
awk
'{print $1}'
SSC.cc |
uniq
|
wc
-l
)
connected componants"
loginfo
"Done"
...
...
@@ -146,7 +146,7 @@ pushTmpDir ORG.buildSCDB
loginfo
"Checking SSC homogeneity..."
${
PROG_DIR
}
/coorienteSC.sh SSC.direct.fasta 5000
${
LOGFILE
}
>
SSC_RefDB.tgf
${
PROG_DIR
}
/coorienteSC.sh SSC.direct.fasta 5000
${
ORG_
LOGFILE
}
>
SSC_RefDB.tgf
${
PROG_DIR
}
/cc.py SSC_RefDB.tgf
>
SSC_RefDB.cc
NCC
=
$(
awk
'{print $1}'
SSC_RefDB.cc |
uniq
|
wc
-l
)
if
((
$NCC
==
1
))
;
then
...
...
detectors/trna/bin/go_trna.sh
0 → 100755
View file @
f2a75cf9
#!/bin/bash
#
# Annotate tRNA
#
#========================================================================================
#
# Annotate tRNA based on the Aragorn software predictions.
# go_trna.sh <FASTAFILE>
#
# - <FASTAFILE> : The fasta file containing the genome to annotate
#
# Results are printed to the standart output
#
#========================================================================================
# -- CAUTION -- Works as long than the script
# is not called through a symlink
SCRIPT_DIR
=
"
$(
dirname
${
BASH_SOURCE
[0]
}
)
"
source
"
${
SCRIPT_DIR
}
/../../../scripts/bash_init.sh"
pushTmpDir ORG.trna
CAUTRNADB
=
"
${
TRNA_DATA_DIR
}
/CAU_tRNA_DB.fasta"
export
CAUTRNADB
if
[[
!
"
$1
"
=
~ ^/
]]
;
then
QUERY
=
"
${
CALL_DIR
}
/
$1
"
else
QUERY
=
"
$1
"
fi
TRNA
=
$(
basename
${
QUERY
}
)
aragorn
-i
-w
-seq
${
QUERY
}
|
\
${
PROG_DIR
}
/../lib/aragorn_wrapper.awk
popTmpDir
exit
0
detectors/trna/aragorn_wrapper.awk
→
detectors/trna/
lib/
aragorn_wrapper.awk
View file @
f2a75cf9
...
...
@@ -7,17 +7,9 @@ function genomeid() {
return
gid
;
}
function
home
()
{
"echo $ORGANNOT_HOME"
|
getline
homedir
;
return
homedir
;
}
function
prog
(
program
)
{
return
home
()
"/"
program
;
}
function
trnalib
(
prognam
)
{
return
home
()
"/lib/trnaCAU.ref.fasta"
;
function
trnalib
()
{
"echo $CAUTRNADB"
|
getline
ref
;
return
ref
}
function
awkPID
()
{
...
...
@@ -65,19 +57,20 @@ function patchtRNA(anticodon,trna,seq) {
if
(
anticodon
==
"cat"
)
{
file
=
printfasta
(
trna
"_"
anticodon
,
seq
,
""
);
command
=
prog
(
"sumatra"
)
" -d -n "
file
" "
trnalib
()
;
command
=
"sumatra -t 0.9 -x -n "
file
" "
trnalib
()
" 2>> /dev/null"
;
while
((
command
|
getline
output
)
>
0
)
{
split
(
output
,
field
,
" "
);
n
[
field
[
2
]]
++
;
d
[
field
[
2
]]
=
field
[
3
];
match
(
field
[
2
],
"trn.M?"
);
trna
=
substr
(
field
[
2
],
RSTART
,
RLENGTH
);
n
[
trna
]
+=
field
[
5
];
}
close
(
command
)
dmin
=
1
;
nmax
=
0
;
for
(
i
in
n
)
{
dist
=
d
[
i
]
/
n
[
i
];
if
(
dist
<
dmin
)
{
dmin
=
dist
;
dist
=
n
[
i
];
if
(
n
[
i
]
>
nmax
)
{
nmax
=
n
[
i
]
;
trna
=
i
;
}
}
...
...
@@ -94,6 +87,42 @@ function gene2product(gene) {
return
"tRNA-"
AA3
[
substr
(
gene
,
4
,
1
)];
}
function
emblTRNA
(
geneid
,
trna
,
loc
,
anti
,
intron
,
seq
)
{
if
(
loc
~
/^c/
)
{
sub
(
"c\\["
,
"complement("
,
loc
);
sub
(
"\\]"
,
")"
,
loc
);
sub
(
","
,
".."
,
loc
)}
else
{
sub
(
"\\["
,
""
,
loc
);
sub
(
"\\]"
,
""
,
loc
);
sub
(
","
,
".."
,
loc
)}
anti
=
toupper
(
anti
);
gsub
(
"T"
,
"U"
,
anti
);
product
=
gene2product
(
trna
);
if
(
intron
!=
""
)
{
l
=
length
(
intron
);
intron
=
substr
(
intron
,
2
,
l
-
2
);
split
(
intron
,
intronpos
,
","
);
ib
=
intronpos
[
0
];
ie
=
intronpos
[
1
];
match
(
loc
,
"[0-9][0-9]*"
);
gb
=
substr
(
loc
,
RSTART
,
RLENGTH
);
sub
(
"\\.\\."
,
".."
(
gb
+
ib
-
2
)
","
(
gb
+
ie
)
".."
,
loc
);
\
sub
(
"complement"
,
"complement(join"
,
loc
);
\
if
(
substr
(
loc
,
1
,
1
)
~
/
[
0-9
]
/
)
{
loc
=
"join("
loc
}
loc
=
loc
")"
;
}
print
"FT tRNA "
loc
;
print
"FT /gene=\""
trna
"\""
;
print
"FT /anticodon=\""
anti
"\""
;
print
"FT /product=\""
product
"("
anti
")\""
;
}
function
gffTRNA
(
geneid
,
trna
,
loc
,
anti
,
intron
,
seq
)
{
if
(
loc
~
/^c/
)
{
complement
=
"-"
;
...
...
@@ -139,7 +168,6 @@ function gffTRNA(geneid,trna,loc,anti,intron,seq) {
}
BEGIN
{
print
ARGV
[
1
];
AA1
[
"Ala"
]
=
"A"
;
AA1
[
"Cys"
]
=
"C"
;
AA1
[
"Asp"
]
=
"D"
;
...
...
@@ -201,7 +229,7 @@ BEGIN {
{
seq
=
epissage
(
intron
,
seq
);
trna
=
patchtRNA
(
anti
,
trna
,
seq
);
# print geneid,trna,loc,anti,"'"intron"'",seq;
gff
TRNA
(
geneid
,
trna
,
loc
,
anti
,
intron
,
seq
);
embl
TRNA
(
geneid
,
trna
,
loc
,
anti
,
intron
,
seq
);
seq
=
""
}
...
...
@@ -225,5 +253,5 @@ BEGIN {
END
{
seq
=
epissage
(
intron
,
seq
);
trna
=
patchtRNA
(
anti
,
trna
,
seq
);
# print geneid,trna,loc,anti,"'"intron"'",seq;
gff
TRNA
(
geneid
,
trna
,
loc
,
anti
,
intron
,
seq
);
embl
TRNA
(
geneid
,
trna
,
loc
,
anti
,
intron
,
seq
);
}
organnote.sh
0 → 100755
View file @
f2a75cf9
#!/bin/bash
#
#
#
# Annotate tRNA
#
#========================================================================================
#
# Annotate tRNA based on the Aragorn software predictions.
# go_trna.sh <FASTAFILE>
#
# - <FASTAFILE> : The fasta file containing the genome to annotate
#
# Results are printed to the standart output
#
#========================================================================================
# -- CAUTION -- Works as long than the script
# is not called through a symlink
SCRIPT_DIR
=
"
$(
dirname
${
BASH_SOURCE
[0]
}
)
"
source
"
${
SCRIPT_DIR
}
/scripts/bash_init.sh"
pushTmpDir ORG.organnot
if
[[
!
"
$1
"
=
~ ^/
]]
;
then
QUERY
=
"
${
CALL_DIR
}
/
$1
"
else
QUERY
=
"
$1
"
fi
RESULTS
=
$(
basename
${
QUERY
/.*/
}
)
LOG
=
"
${
CALL_DIR
}
/
${
RESULTS
}
.log"
rm
-f
${
LOG
}
openLogFile
${
LOG
}
${
PROG_DIR
}
/detectors/normalize/bin/go_normalize.sh
${
QUERY
}
>
"
${
RESULTS
}
.norm.fasta"
${
PROG_DIR
}
/detectors/ir/bin/go_ir.sh
${
QUERY
}
>
"
${
RESULTS
}
.annot"
${
PROG_DIR
}
/detectors/trna/bin/go_trna.sh
${
QUERY
}
>>
"
${
RESULTS
}
.annot"
cat
"
${
RESULTS
}
.annot"
popTmpDir
scripts/bash_init.sh
View file @
f2a75cf9
...
...
@@ -19,11 +19,18 @@ function getAbsolutePath {
function
pushTmpDir
{
TMP_DIR
=
$(
mktemp
-d
-t
"
$1_proc_$$
_"
)
pushd
$TMP_DIR
>
& /dev/null
TMP_DIR_STACK
=
"
$TMP_DIR
$TMP_DIR_STACK
"
logdebug
"Pushing temp directory
$TMP_DIR
"
logdebug
"Stack :
${
TMP_DIR_STACK
}
"
}
function
popTmpDir
{
TMP_DIR
=
$(
echo
$TMP_DIR_STACK
|
awk
'{print $1}'
)
TMP_DIR_STACK
=
$(
echo
$TMP_DIR_STACK
|
awk
'{$1="";print $0}'
)
popd
>
& /dev/null
rm
-rf
$TMP_DIR
>
& /dev/null
logdebug
"Poping temp directory
$TMP_DIR
"
logdebug
"Stack :
${
TMP_DIR_STACK
}
"
}
# Logging functions
...
...
@@ -34,32 +41,42 @@ function errcho {
}
function
openLogFile
{
LOGFILE
=
$1
touch
${
LOGFILE
}
ORG_LOGFILE
=
$1
export
ORG_LOGFILE
touch
${
ORG_LOGFILE
}
}
function
loginfo
{
errcho
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA INFO ]
$$
--
$1
"
if
[[
!
-z
${
LOGFILE
}
]]
;
then
echo
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA INFO ]
$$
--
$1
"
>>
${
LOGFILE
}
if
[[
!
-z
${
ORG_
LOGFILE
}
]]
;
then
echo
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA INFO ]
$$
--
$1
"
>>
${
ORG_
LOGFILE
}
fi
}
function
logerror
{
errcho
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA ERROR ]
$$
--
$1
"
if
[[
!
-z
${
LOGFILE
}
]]
;
then
echo
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA ERROR ]
$$
--
$1
"
>>
${
LOGFILE
}
if
[[
!
-z
${
ORG_
LOGFILE
}
]]
;
then
echo
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA ERROR ]
$$
--
$1
"
>>
${
ORG_
LOGFILE
}
fi
}
function
logwarning
{
errcho
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA WARNING]
$$
--
$1
"
if
[[
!
-z
${
LOGFILE
}
]]
;
then
echo
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA WARNING]
$$
--
$1
"
>>
${
LOGFILE
}
if
[[
!
-z
${
ORG_
LOGFILE
}
]]
;
then
echo
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA WARNING]
$$
--
$1
"
>>
${
ORG_
LOGFILE
}
fi
}
function
logdebug
{
if
[[
!
-z
${
ORG_DEBUG
}
]]
;
then
errcho
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA DEBUG ]
$$
--
$1
"
if
[[
!
-z
${
ORG_LOGFILE
}
]]
;
then
echo
`
date
+
'%Y-%m-%d %H:%M:%S'
`
"[OA DEBUG ]
$$
--
$1
"
>>
${
ORG_LOGFILE
}
fi
fi
}
# Sequence related functions
# Counts how many sequences are stored in a fasta file
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment