Commit 29c524e6 by Eric Coissac

--no commit message

parent 6e61a3be
......@@ -15,7 +15,11 @@ class build(ori_build):
def has_executables(self):
return self.distribution.has_executables()
def has_ext_modules(self):
return self.distribution.has_ext_modules()
sub_commands = [('build_ctools', has_ctools),
('build_cexe', has_executables)] \
('build_cexe', has_executables),
('build_ext', has_ext_modules)] \
+ ori_build.sub_commands
......@@ -192,7 +192,7 @@ class build_cexe(Command):
macros = build_info.get('macros')
include_dirs = build_info.get('include_dirs')
objects = self.compiler.compile(sources,
Created on 13 fevr. 2014
@author: coissac
from Cython.Distutils import build_ext as ori_build_ext
except ImportError:
from distutils.command.build_ext import build_ext as ori_build_ext
class build_ext(ori_build_ext):
def modifyDocScripts(self):
print >>open("doc/sphinx/build_dir.txt","w"),self.build_lib
def run(self):
\ No newline at end of file
......@@ -13,7 +13,8 @@ from distutils.extension import Extension
from import build
from obidistutils.command import build_cexe
from obidistutils.command.build_cexe import build_cexe
from obidistutils.command.build_ext import build_ext
from obidistutils.command.build_ctools import build_ctools
from distutils.command.build_scripts import build_scripts
from obidistutils.command.install_scripts import install_scripts
......@@ -21,11 +22,9 @@ from obidistutils.dist import Distribution
import sys
from Cython.Distutils import build_ext
from Cython.Compiler import Main
except ImportError:
from distutils.command.build_ext import build_ext
def findPackage(root,base=None):
# Makefile for Sphinx documentation
# You can set these variables from the command line.
SPHINXBUILD = sphinx-build
BUILDDIR = build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
# the i18n builder cannot share the environment and doctrees with the others
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
rm -rf $(BUILDDIR)/*
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
@echo "Build finished; now you can process the pickle files."
@echo "Build finished; now you can process the JSON files."
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OrganelleAssembler.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OrganelleAssembler.qhc"
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/OrganelleAssembler"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/OrganelleAssembler"
@echo "# devhelp"
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
@echo "Build finished. The text files are in $(BUILDDIR)/text."
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
@echo "The overview file is in $(BUILDDIR)/changes."
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
.. Organelle Assembler documentation master file, created by
sphinx-quickstart on Thu Feb 13 16:10:03 2014.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to Organelle Assembler's documentation!
.. toctree::
:maxdepth: 2
Assembling a mitochondrion genome <mitochondrion>
User API <userapi>
The organelle assembler indexer <orgasmi>
Indices and tables
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
Assembling a mitochondrion genome from IPython
.. _mitoindex:
Step 1 : indexing the reads
To assemble a genome from sequence reads, you need first to index them. This step allows an efficient access
to the reads during the assembling process. The organel assembler only works with pair ended reads of equal length.
Considering two fastq files ``forward.fastq`` and ``reverse.fastq`` containing respectively the forward and the
reverse reads of the paired reads, to build the index named ``readindex`` from a UNIX terminal you have to run the
:ref:`orgasmi <orgasmi>` command :
.. code-block:: bash
orgasmi -o readindex forward.fastq reverse.fastq
If the ``forward.fastq`` and ``reverse.fastq`` files are compressed using ``gzip`` (``forward.fastq.gz`` and ``reverse.fastq.gz``)
you can index them without uncompressed both the files on you disk. To achieve that you have to run the following commands :
.. code-block:: bash
$ mkfifo forward
$ mkfifo reverse
$ gzip -dc forward.fastq.gz > forward &
$ gzip -dc reverse.fastq.gz > reverse &
$ orgasmi -o readindex -M 30 -l 100 forward reverse
$ rm forward reverse
This will index the 30 first millions of reads stored in the ``forward.fastq.gz`` and ``reverse.fastq.gz`` files over their first 100bp.
Step 2 : Running the organelle assembler from IPython
Launching Ipython
The ipython environment has to be launched from a UNIX terminal using the :command:`ipython`
.. code-block:: bash
$ ipython
Python 2.7 (r27:82500, Jul 6 2010, 10:43:34)
Type "copyright", "credits" or "license" for more information.
IPython 0.13.1 -- An enhanced Interactive Python.
? -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help -> Python's own help system.
object? -> Details about 'object', use 'object??' for extra details.
In [1]:
Loading the main functions and classes need from the assembling
We first need to import a set of classes and functions that we will use during the assembling process.
.. code-block:: ipython
In [1]: from orgasm.indexer._orgasm import Index
In [2]: from orgasm.multialign import *
In [3]: from orgasm.tango import *
In [4]: from orgasm.assembler import Assembler
In [5]: from orgasm.backtranslate.fasta import fasta
Loading the indexed reads
We assume that following the method described :ref:`previously <mitoindex>` we have
indexed our read in an indexed library called ``readindex <readindex>``. This library
has now to be loaded into the computer memory.
.. code-block:: ipython
In [6]: r = Index('readindex')
Loading global data...
Reading indexed sequence reads...
30000000 sequences read
Reading indexed pair data...
Loading reverse index...
Indexing reverse complement sequences ...
Fast indexing forward reads...
Fast indexing reverse reads...
Looking for the reads to initiate the assembling process
To target the assembling on the mitochondrion genome we need to select a set
of reads belonging it. This is done by looking for reads encoding for a well
conserved mitochondrion gene.
We first load the protein sequence of the :abbr:`COXI (cytochrom oxidase I)` gene.
from a file ``COX1.fasta`` file.
.. code-block:: ipython
In [7]: p = fasta('COX1.fasta')
The set of reads matching the loaded protein sequences is selected using the
.. code-block:: ipython
In [8]: m = r.lookForSeeds(p)
99.8 % |#################################################/ ] remain : 00:00:00
Running the assembler
We have to create an instance of the :py:class:`Assembler` class
.. code-block:: ipython
In [9]: asm = Assembler(r)
Then the selected set of reads has to be converted into a set of seeds usable by the assembler.
This is assumed by the :py:func:`orgasm.tango.matchtoseed` function.
.. code-block:: ipython
In [10]: s = matchtoseed(m,r)
The assembling process can then be initiated using the :py:func:`~organsm.tango.tango` function.
.. code-block:: ipython
In [11]: a = tango(asm,s,mincov=1,minread=3,minoverlap=30)
Cycle : 220 (438 nodes / 46.5% fake) Waiting points : 8 / 6.97 Gene: None
JumpGap on read 14233691
Cycle : 1230 (2454 nodes / 61.0% fake) Waiting points : 11 / 8.31 Gene: None
JumpGap on read 25800833
Cycle : 10798 (21586 nodes / 51.2% fake) Waiting points : 7 / 25.46 Gene: None
JumpGap on read 22888545
Cycle : 16390 (32766 nodes / 52.5% fake) Waiting points : 5 / 23.90 Gene: None
JumpGap on read 22812874
Cleaning the assembling
The assembling process create an assembling graph representing the relationship between the reads.
The sequence of the assembled sequence can be determined by followng a path of this graph.
Further the main path corresponding to the true sequence, many short path exist. They correspond
to aborted extension created notably by all the sequencing errors.
The :py:meth:`~orgasm.assembler.Assembler.cleanDeadBranches` method of the :py:class:`~orgasm.assembler.Assembler`
class remove from the assembling graph all short path corresponding to those spurious extensions.
.. code-block:: ipython
In [12]: asm.cleanDeadBranches()
Remaining edges : 32498 node : 32500
Out[12]: 134
Compacting the assembling graph
We can now post-process the assembling graph to produce a compact
graph where each edge corresponds to an unambiguous path in the
original assembling graph.
.. code-block:: ipython
In [13]: cg = asm.compactAssembling()
Compacting graph :
Stem 1 : 16249 bp (total : 16249) coverage : 71.43
Stem -1 : 16249 bp (total : 32498) coverage : 71.43
Minimum stem coverage = 71
The resulting compact graph can be stored in a file to be analyzed using a standard
graph editor like :program:`yed`.
.. code-block:: ipython
In [17]: print >>open('UGBT-B8-0101.clean.gml','w'),cg.gml()
.. figure:: mito-clean-1.png
:scale: 50 %
:alt: Assembling graph layout.
The assembling graph display two edges corresponding to two sequences of 16249bp.
They correspond to the same sequence in the complemented/reversed orientation.
This can be easily recognized with the ``stemid`` which start the edge labels :
``-1`` and ``1``. Two opposite ``stemid`` indicate two reverse-complemented
The assembled sequence is linear because of a polyG sequence blocking the assembling
.. code-block:: ipython
In [18]: ex = getPairedRead(asm,cg,1,300)
In [19]: exr = getPairedRead(asm,cg,-1,300)
In [20]: exr = set(-i for i in exr)
In [21]: ali = multiAlignReads(ex|exr,r)
In [22]: len(ali)
Out[22]: 1
.. code-block:: ipython
In [23]: s = insertFragment(asm,c)
In [24]: asm.cleanDeadBranches()
Remaining edges : 32676 node : 32676
Out[25]: 0
In [26]: cg = asm.compactAssembling()
Compacting graph :
Circle 1 : 16337 bp (total : 16337) coverage : 71.05
Circle -1 : 16337 bp (total : 32674) coverage : 71.05
Minimum stem coverage = 71
.. code-block:: ipython
In [27]: print assembling2fasta(cg)
>stem@1 : GATTA->(16337)->TTCGA [71]
.. code-block:: ipython
In [28]: print >>open('UGBT-B8-0101.fasta','w'),assembling2fasta(cg)
.. _orgasmi:
The organelle assembler indexer
the :program:`orgasmi` program indexes lexicographicaly a set of paired Illumina
reads stored in fastq format. All the indexed reads must have the same length.
Two options of the indexer allow to index only a subset of the reads and/or
only the beginning of the reads over a specified length.
.. code-block:: bash
$ orgasmi -o <index> <forward_fastq_file> <reverse_fastq_file>
.. program:: obitools
.. cmdoption:: -h
Shows the help message and exits.
.. cmdoption:: -o <index> : The name of the index ouput files
orgasmi creates four files :
- <index>.ogx : contains information concerning the index
- <index>.ofx : contains the sequences themselves and the forward index
- <index>.orx : contains reverse index
- <index>.opx : contains read pairing data
The assembler will need all these file to process assembling
.. cmdoption:: -M ###
If specified, ### represents the count in million of reads to index
.. cmdoption:: -l ###
If specified ### represents the read length to consider. Only reads
with a length greater or equal to ### will be indexed. Reads longer
than the specified length are truncated at the specified length.
If the :option:`-l <orgasmi -l>` is not used the length is estimated from
the length of the first read of the forward file.
Main functions usable by end users during the assembling process
.. automodule:: orgasm.assembler
.. autoclass:: orgasm.assembler.Assembler
.. automodule:: orgasm.tango
\ No newline at end of file
import re, inspect, textwrap, pydoc
from docscrape import NumpyDocString, FunctionDoc, ClassDoc
class SphinxDocString(NumpyDocString):
# string conversion routines
def _str_header(self, name, symbol='`'):
return ['.. rubric:: ' + name, '']
def _str_field_list(self, name):
return [':' + name + ':']
def _str_indent(self, doc, indent=4):
out = []
for line in doc:
out += [' '*indent + line]
return out
def _str_signature(self):
return ['']
if self['Signature']:
return ['``%s``' % self['Signature']] + ['']
return ['']
def _str_summary(self):
return self['Summary'] + ['']
def _str_extended_summary(self):
return self['Extended Summary'] + ['']
def _str_param_list(self, name):
out = []
if self[name]:
out += self._str_field_list(name)
out += ['']
for param,param_type,desc in self[name]:
out += self._str_indent(['**%s** : %s' % (param.strip(),
out += ['']
out += self._str_indent(desc,8)
out += ['']
return out
def _str_section(self, name):
out = []
if self[name]:
out += self._str_header(name)
out += ['']
content = textwrap.dedent("\n".join(self[name])).split("\n")
out += content
out += ['']
return out
def _str_see_also(self, func_role):
out = []
if self['See Also']:
see_also = super(SphinxDocString, self)._str_see_also(func_role)
out = ['.. seealso::', '']
out += self._str_indent(see_also[2:])
return out
def _str_warnings(self):
out = []
if self['Warnings']:
out = ['.. warning::', '']
out += self._str_indent(self['Warnings'])
return out
def _str_index(self):
idx = self['index']
out = []
if len(idx) == 0:
return out
out += ['.. index:: %s' % idx.get('default','')]
for section, references in idx.iteritems():
if section == 'default':
elif section == 'refguide':
out += [' single: %s' % (', '.join(references))]
out += [' %s: %s' % (section, ','.join(references))]
return out
def _str_references(self):
out = []
if self['References']:
out += self._str_header('References')
if isinstance(self['References'], str):
self['References'] = [self['References']]
out += ['']
return out
def __str__(self, indent=0, func_role="obj"):
out = []
out += self._str_signature()
out += self._str_index() + ['']
out += self._str_summary()
out += self._str_extended_summary()
for param_list in ('Parameters', 'Attributes', 'Methods',
out += self._str_param_list(param_list)
out += self._str_warnings()
out += self._str_see_also(func_role)
out += self._str_section('Notes')
out += self._str_references()
out += self._str_section('Examples')
out = self._str_indent(out,indent)
return '\n'.join(out)
class SphinxFunctionDoc(SphinxDocString, FunctionDoc):
class SphinxClassDoc(SphinxDocString, ClassDoc):