Commit 49e3eb3c by Eric Coissac

Switch to the version 1.1 of obitools

parent c0eb996d
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<projectDescription> <projectDescription>
<name>OBITools</name> <name>OBITools-1.1</name>
<comment></comment> <comment></comment>
<projects> <projects>
</projects> </projects>
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?eclipse-pydev version="1.0"?><pydev_project> <?eclipse-pydev version="1.0"?><pydev_project>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH"> <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/OBITools-1.0/src</path> <path>/OBITools-1.1/src</path>
<path>/${PROJECT_DIR_NAME}/distutils.ext</path> <path>/${PROJECT_DIR_NAME}/distutils.ext</path>
</pydev_pathproperty> </pydev_pathproperty>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property> <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
include include
recursive-include distutils.ext *.py *.c recursive-include distutils.ext *.py *.c *.pem
recursive-include src *.pyx *.pxd *.c *.h *.cfiles recursive-include src *.pyx *.pxd *.c *.h *.cfiles
recursive-include doc/sphinx/source *.txt *.rst *.py recursive-include doc/sphinx/source *.txt *.rst *.py
include doc/sphinx/make.bat include doc/sphinx/make.bat
...@@ -4,6 +4,7 @@ Sequence sampling and filtering ...@@ -4,6 +4,7 @@ Sequence sampling and filtering
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 2
scripts/obigrep scripts/obigrep
scripts/obihead scripts/obihead
scripts/obisample scripts/obisample
.. automodule:: obiextract
:py:mod:`obiextract` specific options
.. cmdoption:: -s <KEY>, --sample=<KEY>
Attribute containing sample descriptions. By default the attribute
name used for describing samples is set to ``merged_sample``.
.. cmdoption:: -e <SAMPLE_NAME>, --extract=<KEY>
Attribute indicating which <SAMPLE_NAME> have to be extracted.
Several ``-p`` options can be added for specifying several samples.
If you want to extract a large number of samples, please refer to the ``-E``
option described below
.. TIP:: The ``<KEY>`` can be simply the key of an attribute, or a *Python* expression
similarly to the ``-p`` option of :py:mod:`obigrep`.
.. code-block:: bash
> obiextract -e sampleA -e sampleB allseqs.fasta > samplesAB.fasta
This command extracts from the ``allseqs.fasta`` file data related to samples ``A`` and ``B``.
.. cmdoption:: -E <FILENAME>, --extract-list=<FILENAME>
Allows for indicating a file name where a list of sample is stored. The file must be a simple
text file with a sample name per line.
.. code-block:: bash
> obiextract -E subset.txt allseqs.fasta > subset_samples.fasta
This command extracts from the ``allseqs.fasta`` file data related to samples listed in the ``subset.txt`` file.
.. include:: ../optionsSet/inputformat.txt
.. include:: ../optionsSet/outputformat.txt
.. include:: ../optionsSet/defaultoptions.txt
:py:mod:`obiextract` modified sequence attributes
- :doc:`count <../attributes/count>`
:py:mod:`obiextract` used sequence attribute
- :doc:`count <../attributes/count>`
\ No newline at end of file
...@@ -19,7 +19,7 @@ from os import path ...@@ -19,7 +19,7 @@ from os import path
VERSION = "1.0.012" VERSION = "1.1.000"
AUTHOR = 'Eric Coissac' AUTHOR = 'Eric Coissac'
EMAIL = '' EMAIL = ''
URL = '' URL = ''
...@@ -122,8 +122,8 @@ if __name__ == '__main__': ...@@ -122,8 +122,8 @@ if __name__ == '__main__':
if (options.onlyhead): if (options.onlyhead):
options.clustermode=True options.clustermode=True
globalIndex = Indexer() # I keep correspondances for all graphs between globalIndex = Indexer() # I keep correspondances for all graphs between
# node id and sequence # node id and sequence
db = [] # sequences are stored in a list. The indexes in the list db = [] # sequences are stored in a list. The indexes in the list
# are corresponding to the node index in graphs # are corresponding to the node index in graphs
:py:mod:`obiextract`: extract samples from a dataset
.. codeauthor:: Eric Coissac <>
The :py:mod:`obiextract` command extract a subset of samples from a complete
Extracted sample names can be specified or by indicating their names using option
on the command line or by indicating a file name containing a sample name per line
The count attribute of the sequence and the slot describing distribution of the sample
occurrences among samples are modified according to the selected samples.
A sequence not present in at least one of the selected samples is not conserved in the
output of :py:mod:`obiextract`.
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
from obitools.options import getOptionManager
def addExtractOptions(optionManager):
action="store", dest="sample",
help="Tag containing sample descriptions")
help="which <SAMPLE_NAME> have to be extracted")
action="store", dest="sample_file",
help="File name where a list of sample is stored")
def selectSamples(entry,key,samples):
newsamples = {}
oldsamples = entry.get(key,{})
for k in samples:
if k in oldsamples:
s = sum(newsamples.values())
if s > 0:
if len(newsamples)==1 and key[0:7]=='merged_':
return entry
if __name__ == '__main__':
optionParser = getOptionManager([addExtractOptions,addInOutputOption],progdoc=__doc__)
(options, entries) = optionParser()
if options.sample_file is not None:
s = [x.strip() for x in open(options.sample_file)]
writer = sequenceWriterGenerator(options)
print options.sample_list
for seq in entries:
seq = selectSamples(seq,options.sample,options.sample_list)
if seq is not None:
...@@ -698,7 +698,7 @@ cdef class DNAComplementSequence(WrappedBioSequence): ...@@ -698,7 +698,7 @@ cdef class DNAComplementSequence(WrappedBioSequence):
def __iter__(self): def __iter__(self):
return (self.getSymbolAt(x) for x in xrange(len(self))) return (self.getSymbolAt(x) for x in xrange(len(self)))
cpdef int _posInWrapped(self, int position): cpdef int _posInWrapped(self, int position) except *:
return -(position+1) return -(position+1)
cpdef getSymbolAt(self, int position): cpdef getSymbolAt(self, int position):
major = 1 major = 1
minor = 0 minor = 1
serial= '012' serial= '000'
version = "%2d.%02d %s" % (major,minor,serial) version = "%2d.%02d %s" % (major,minor,serial)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment