distance.R 6.05 KB
Newer Older
Eric Coissac committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
#' @include taxonomy.R
NULL

#' @export
setGeneric("longest.path", function(taxonomy,taxid) {
  return(standardGeneric("longest.path"))
})

#' Returns the longuest path from a taxon.
#' 
#' The method \code{longest.path} returns the length of the
#' path linking a taxid to the farest leaf belonging this taxid.
#' 
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#' 
#' @param taxid an \code{integer} vector containing the list of taxids.
#' 
#' @return an \code{integer} vector containing the list length.
#' 
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#' 
#' # returns the longest path in the taxonomy (from the root node)
#' longest.path(taxo,1)
#' 
#' 
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#' 
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname longest.path-method
#' @aliases longest.path,obitools.taxonomy
#' 
setMethod("longest.path", "obitools.taxonomy",
          function(taxonomy,taxid) {
            getp = function(t)   {	
              if (is.na(t))
                return(NA)
              else
                return(.Call('R_longest_path',
                             taxonomy,
                             t,
                             PACKAGE="ROBITaxonomy"))
            }
            
            taxid = as.integer(taxid)
            sapply(taxid,getp)
          })


#' @export
setGeneric("distance.taxonomy", function(taxonomy,taxid1,taxid2=NULL,name=F) {
  return(standardGeneric("distance.taxonomy"))
})


#' Computes a distance matrix between taxids
#' 
#' The method \code{taxonomy.distance} computes a distance matrix between a
#' set of taxids. The distance between two taxa is based on the topology of
#' the taxonomomy tree.
#' 
#' \deqn{ d(Taxon_A,Taxon_B) = \frac{longest.path(lca(Taxon_A,Taxon_B))}{max(longest.path(Taxon_A),longest.path(Taxon_B))}}
#'                                  { longest.path(lca(Taxon_A,Taxon_B)) / max(longest.path(Taxon_A),longest.path(Taxon_B)) }
#' 
#' 
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#' 
#' @param taxid1 an \code{integer} vector containing a list of taxids.
#' 
#' @param taxid2 an \code{integer} vector containing a list of taxids.
#'               If \code{taxid2} is set to \code{NULL} (it's default value)
#'               then the \code{taxid2} list is considered as equal to 
#'               \code{taxid1} list. 
#' @param name  A logical value \code{TRUE} or \code{FALSE} indicating 
#'          if the method return distance matrix annotated by taxids or 
#'          by scientific names.
#'          
#' @return the distance matrix between taxids specified in the \code{taxid1}
#'         set and the \code{taxid2} set.
#'         
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#' 
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#' 
#' # computes the distance matrix between taxids
#' distance.taxonomy(taxo,sp.taxid)
#' 
#' # Same thing but the matrix is annotated by scientific names
#' distance.taxonomy(taxo,sp.taxid,name=TRUE)
#' 
#' @seealso \code{\link{longest.path}}
#' 
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname distance.taxonomy-method
#' @aliases taxonomy.distance,obitools.taxonomy
#' 
setMethod("distance.taxonomy", "obitools.taxonomy",
          function(taxonomy,taxid1,taxid2=NULL,name=F) {
            taxdist = function(r)
            {	
              t1=r[1]
              t2=r[2]
              if (is.na(t1) | is.na(t2))
                return(NA)
              
              p1 = path(taxonomy,t1)
              p2 = path(taxonomy,t2)
              
              minp = min(length(p1),length(p2))
              common = sum(p1[1:minp] == p2[1:minp])
              lca = p1[common]
              lp = longest.path(taxonomy,lca)
              return(lp/(lp+common))
            }
            
            multitaxdist=function(t1,t2) {
              apply(data.frame(t1,t2),1,taxdist)
            }
            
            taxid1 = taxid1[! is.na(validate(taxonomy,taxid1))]
            t1 = path(taxonomy,taxid1)
            
            same = is.null(taxid2)
            
            if (same)
            {
              ntaxon = length(taxid1) 
              t2 = t1[unlist(sapply(2:ntaxon,
                                    function(x) x:ntaxon))]
              t1 = t1[rep(1:(ntaxon-1),(ntaxon-1):1)]
            }
            else
            {
              taxid2 = taxid2[! is.na(validate(taxonomy,taxid2))]
              t2 = path(taxonomy,taxid2)
              nt1 = length(taxid1)
              nt2 = length(taxid2)
              t1 = t1[rep(1:nt1,nt2)]
              t2 = t2[rep(1:nt2,rep(nt1,nt2))]
            }
            
            lmin = mapply(function(a,b) min(length(a),length(b)),
                          t1,
                          t2)
            
            llca = mapply(function(x,y,l) sum(x[1:l]==y[1:l]),
                          t1,
                          t2,
                          lmin)
            
            lb   = longest.path(taxonomy,mapply(function(x,y) x[y],t1,llca))
            d    = as.double(lb / (lb + llca))
            
            if (same) {
              attr(d, "Size") <- ntaxon
              if (name)
                attr(d, "Labels") <- scientificname(taxonomy,taxid1)
              else
                attr(d, "Labels") <- as.character(taxid1)
              attr(d, "Diag") <- FALSE
              attr(d, "Upper") <- FALSE
              attr(d, "method") <- NULL
              attr(d, "call") <- match.call()
              class(d) <- "dist"
            }
            else {
              if (name)
                d = matrix(d,nt1,nt2,
                           dimnames=list(scientificname(taxonomy,taxid1),
                                         scientificname(taxonomy,taxid2)))
              else
                d = matrix(d,nt1,nt2,
                           dimnames=list(as.character(taxid1),
                                         as.character(taxid2)))
              
            }
            
            return(d)
          })