Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
OBITools
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
28
Issues
28
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
OBITools
OBITools
Commits
46b446f0
Commit
46b446f0
authored
Jan 12, 2016
by
Eric Coissac
2
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add capacity to filter on merged sequences
parent
6208cbee
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
96 additions
and
73 deletions
+96
-73
src/obiselect.py
src/obiselect.py
+96
-73
No files found.
src/obiselect.py
View file @
46b446f0
...
...
@@ -23,8 +23,11 @@ def minimum(seqs):
return
min
(
s
[
'select'
]
for
s
in
seqs
)
def
maximum
(
seqs
):
return
max
(
s
[
'select'
]
for
s
in
seqs
)
try
:
return
max
(
s
[
'select'
]
for
s
in
seqs
)
except
TypeError
,
e
:
print
>>
sys
.
stderr
,
seqs
raise
e
def
mean
(
seqs
):
ss
=
reduce
(
lambda
x
,
y
:
x
+
y
,(
s
[
'select'
]
for
s
in
seqs
),
0
)
return
float
(
ss
)
/
len
(
seqs
)
...
...
@@ -101,6 +104,13 @@ def addSelectOptions(optionManager):
default
=
[],
help
=
"attributes to merge within each group"
)
group
.
add_option
(
'-s'
,
'--sample'
,
action
=
"store"
,
dest
=
"sample"
,
metavar
=
"<TAGNAME>"
,
type
=
"str"
,
default
=
None
,
help
=
"Tag containing sample descriptions, the default value is set to *merged_sample*"
)
group
.
add_option
(
'--merge-ids'
,
action
=
"store_true"
,
dest
=
"mergeids"
,
default
=
False
,
...
...
@@ -129,16 +139,22 @@ if __name__ == '__main__':
classes
=
{}
print
>>
sys
.
stderr
,
"
\n
Loading sequences...
\n
"
with_taxonomy
=
hasattr
(
options
,
'taxonomy'
)
and
options
.
taxonomy
is
not
None
nbseq
=
0
for
s
in
entries
:
nbseq
+=
1
category
=
[]
if
with_taxonomy
:
environ
=
{
'taxonomy'
:
options
.
taxonomy
,
'sequence'
:
s
,
'random'
:
random
()}
else
:
environ
=
{
'sequence'
:
s
,
'random'
:
random
()}
for
c
in
options
.
categories
:
try
:
if
hasattr
(
options
,
'taxonomy'
)
and
options
.
taxonomy
is
not
None
:
environ
=
{
'taxonomy'
:
options
.
taxonomy
,
'sequence'
:
s
,
'random'
:
random
()}
else
:
environ
=
{
'sequence'
:
s
,
'random'
:
random
()}
v
=
eval
(
c
,
environ
,
s
)
category
.
append
(
v
)
except
:
...
...
@@ -149,11 +165,6 @@ if __name__ == '__main__':
group
.
append
(
s
)
classes
[
category
]
=
group
if
hasattr
(
options
,
'taxonomy'
)
and
options
.
taxonomy
is
not
None
:
environ
=
{
'taxonomy'
:
options
.
taxonomy
,
'sequence'
:
s
,
'random'
:
random
()}
else
:
environ
=
{
'sequence'
:
s
,
'random'
:
random
()}
try
:
select
=
eval
(
options
.
function
,
environ
,
s
)
s
[
'select'
]
=
select
...
...
@@ -181,75 +192,87 @@ if __name__ == '__main__':
i
+=
1
progressBar
(
i
,
lclasses
,
False
,
"%15s"
%
(
"/"
.
join
(
map
(
str
,
c
)),))
seqs
=
classes
[
c
]
sortclass
(
seqs
,
options
)
if
len
(
c
)
==
1
:
c
=
c
[
0
]
if
options
.
sample
is
not
None
:
subsets
=
{}
for
s
in
seqs
:
for
sid
in
s
[
options
.
sample
]:
ss
=
subsets
.
get
(
sid
,[])
ss
.
append
(
s
)
subsets
[
sid
]
=
ss
else
:
subsets
=
{
"all"
:
seqs
}
if
options
.
number
==
1
:
s
=
seqs
[
0
]
for
seqs
in
subsets
.
values
()
:
s
ortclass
(
seqs
,
options
)
for
key
in
mergedKey
:
if
key
==
'taxid'
and
mergeIds
:
if
'taxid_dist'
not
in
s
:
s
[
"taxid_dist"
]
=
{}
if
'taxid'
in
s
:
s
[
"taxid_dist"
][
s
.
id
]
=
s
[
'taxid'
]
mkey
=
"merged_%s"
%
key
if
mkey
not
in
s
:
if
key
in
s
:
s
[
mkey
]
=
{
s
[
key
]:
1
}
else
:
s
[
mkey
]
=
{}
if
'count'
not
in
s
:
s
[
'count'
]
=
1
if
mergeIds
:
s
[
'merged'
]
=
[
s
.
id
]
for
seq
in
seqs
[
1
:]:
if
len
(
c
)
==
1
:
c
=
c
[
0
]
s
[
'count'
]
+=
seq
[
'count'
]
if
options
.
number
==
1
and
options
.
sample
is
None
:
s
=
seqs
[
0
]
for
key
in
mergedKey
:
if
key
==
'taxid'
and
mergeIds
:
if
'taxid_dist'
in
seq
:
s
[
"taxid_dist"
].
update
(
seq
[
"taxid_dist"
])
if
'taxid'
in
seq
:
s
[
"taxid_dist"
][
seq
.
id
]
=
seq
[
'taxid'
]
if
'taxid_dist'
not
in
s
:
s
[
"taxid_dist"
]
=
{}
if
'taxid'
in
s
:
s
[
"taxid_dist"
][
s
.
id
]
=
s
[
'taxid'
]
mkey
=
"merged_%s"
%
key
if
mkey
in
seq
:
m
=
seq
[
mkey
]
else
:
if
key
in
seq
:
m
=
{
seq
[
key
]:
1
}
allmkey
=
set
(
m
.
keys
())
|
set
(
s
[
mkey
].
keys
())
s
[
mkey
]
=
dict
((
k
,
m
.
get
(
k
,
0
)
+
s
[
mkey
].
get
(
k
,
0
))
for
k
in
allmkey
)
# if mkey in seq:
# for skey in seq[mkey]:
# if skey in s:
# s[mkey][skey]=s[mkey].get(seq[skey],0)+seq[mkey][skey]
# else:
# s[mkey][skey]=seq[mkey][skey]
#for key in seq.iterkeys():
# # Merger proprement l'attribut merged s'il exist
# if key in s and s[key]!=seq[key] and key!='count' and key[0:7]!='merged_' and key!='merged' and key!='select':
# del(s[key])
if
mkey
not
in
s
:
if
key
in
s
:
s
[
mkey
]
=
{
s
[
key
]:
1
}
else
:
s
[
mkey
]
=
{}
if
'count'
not
in
s
:
s
[
'count'
]
=
1
if
mergeIds
:
s
[
'merged'
].
append
(
seq
.
id
)
if
taxonomy
is
not
None
:
mergeTaxonomyClassification
(
seqs
,
taxonomy
)
s
[
'merged'
]
=
[
s
.
id
]
for
seq
in
seqs
[
1
:]:
for
s
in
seqs
[
0
:
options
.
number
]:
s
[
'class'
]
=
c
del
s
[
'select'
]
writer
(
s
)
s
[
'count'
]
+=
seq
[
'count'
]
for
key
in
mergedKey
:
if
key
==
'taxid'
and
mergeIds
:
if
'taxid_dist'
in
seq
:
s
[
"taxid_dist"
].
update
(
seq
[
"taxid_dist"
])
if
'taxid'
in
seq
:
s
[
"taxid_dist"
][
seq
.
id
]
=
seq
[
'taxid'
]
mkey
=
"merged_%s"
%
key
if
mkey
in
seq
:
m
=
seq
[
mkey
]
else
:
if
key
in
seq
:
m
=
{
seq
[
key
]:
1
}
allmkey
=
set
(
m
.
keys
())
|
set
(
s
[
mkey
].
keys
())
s
[
mkey
]
=
dict
((
k
,
m
.
get
(
k
,
0
)
+
s
[
mkey
].
get
(
k
,
0
))
for
k
in
allmkey
)
if
mergeIds
:
s
[
'merged'
].
append
(
seq
.
id
)
if
taxonomy
is
not
None
:
mergeTaxonomyClassification
(
seqs
,
taxonomy
)
for
s
in
seqs
[
0
:
options
.
number
]:
s
[
'class'
]
=
c
s
[
'__@TOWRITE@__'
]
=
True
print
>>
sys
.
stderr
,
"\Writing sequences...
\n
"
progressBar
(
1
,
nbseq
,
True
,
'Writing'
)
i
=
0
for
c
in
classes
:
seqs
=
classes
[
c
]
for
s
in
seqs
:
i
+=
1
progressBar
(
i
,
nbseq
,
False
,
"Writing"
)
if
'__@TOWRITE@__'
in
s
:
del
s
[
'__@TOWRITE@__'
]
del
s
[
'select'
]
writer
(
s
)
print
>>
sys
.
stderr
Aurélie Bonin
@abonin
mentioned in issue
#23 (closed)
·
Feb 17, 2016
mentioned in issue
#23 (closed)
mentioned in issue #23
Toggle commit list
Aurélie Bonin
@abonin
mentioned in issue
#23 (closed)
·
Feb 17, 2016
mentioned in issue
#23 (closed)
mentioned in issue #23
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment