1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
|
################################################################################
################################################################################
#' Read a biom-format file, returning a \code{biom-class}.
#'
#' Import the data from a biom-format file into R, represented as an instance
#' of the \code{\link{biom-class}}; essentially a \code{\link{list}} with
#' special constraints that map to \href{http://biom-format.org/documentation/biom_format.html}{the biom-format definition}.
#'
#' The BIOM file format (canonically pronounced biome) is designed to be a general-use format for representing biological sample by observation contingency tables. BIOM is a recognized standard for the \href{http://www.earthmicrobiome.org/}{Earth Microbiome Project} and is a \href{http://gensc.org/}{Genomics Standards Consortium} candidate project. Please see \href{http://biom-format.org/}{the biom-format home page} for more details.
#'
#' It is tempting to include an argument identifying the
#' biom-format version number of the data file being imported.
#' However, the biom-format version number is a required
#' field in the biom-format definition.
#' Rather than duplicate this formal specification
#' and allow the possibility of a conflict, the version
#' number of the biom format will be referred to only by
#' the "format" field in the biom formatted data,
#' or its representation in R.
#'
#' @usage read_biom(biom_file)
#'
#' @param biom_file (Required). A character string indicating the
#' file location of the biom formatted file. This is a HDF5 or JSON formatted file
#' specific to biological datasets.
#' The format is formally defined at \href{http://biom-format.org/documentation/biom_format.html}{the biom-format definition}
#' and depends on the versioning.
#'
#' @return An instance of the \code{biom-class}.
#'
#' @seealso
#'
#' Function to create a biom object from R data,
#' \code{\link{make_biom}}.
#'
#' Definition of the
#' \code{\link{biom-class}}.
#'
#' Function to write a biom format file from a biom object,
#' \code{\link{write_biom}}
#'
#' Accessor functions like \code{\link{header}}.
#'
#' @references \url{http://biom-format.org/}
#'
#' @importFrom jsonlite fromJSON
#' @export
#' @examples
#' # # # import with default parameters, specify a file
#' biom_file <- system.file("extdata", "rich_sparse_otu_table.biom", package = "biomformat")
#' biom_file
#' read_biom(biom_file)
#' biom_file <- system.file("extdata", "min_sparse_otu_table.biom", package = "biomformat")
#' biom_file
#' read_biom(biom_file)
#' ## The previous examples use system.file() because of constraints in specifying a fixed
#' ## path within a reproducible example in a package.
#' ## In practice, however, you can simply provide "hard-link"
#' ## character string path to your file:
#' # mybiomfile <- "path/to/my/biomfile.biom"
#' # read_biom(mybiomfile)
read_biom <- function(biom_file){
# Read the file, storing as a list
# generated by jsonlite w/ default JSON parsing params
trash = try(silent=TRUE,
expr = {
x <- fromJSON(biom_file, simplifyDataFrame = FALSE, simplifyMatrix = FALSE)
})
if(inherits(trash, "try-error")){
# If JSON interpretation attempt failed, try HDF5
trash = try(silent=TRUE,
expr = {x <- read_hdf5_biom(biom_file)})
}
if(inherits(trash, "try-error")){
# If still bad, throw helpful error.
stop("Both attempts to read input file:\n",
biom_file, "\n",
"either as JSON (BIOM-v1) or HDF5 (BIOM-v2).\n",
"Check file path, file name, file itself, then try again.")
}
# Use the biom() constructor function to
# instantiate a biom-class, perform validity checks. Return.
return( biom(x) )
}
################################################################################
#' Write a biom-format v1 file, returning a \code{biom-class}.
#'
#' @param x (Required). A biom object that is going to be written to file
#' as a proper biom formatted file, adhering to
#' \href{http://biom-format.org/documentation/biom_format.html}{the biom-format definition}.
#'
#' @param biom_file (Required). A character string indicating the
#' file location of the biom formatted file. This is a JSON formatted file
#' specific to biological datasets.
#' The format is formally defined at
#' \href{http://biom-format.org/documentation/biom_format.html}{the biom-format definition}
#'
#' @return Nothing. The first argument, \code{x}, is written to a file.
#'
#' @seealso
#'
#' Function to create a biom object from R data,
#' \code{\link{make_biom}}.
#'
#' Definition of the
#' \code{\link{biom-class}}.
#'
#' The \code{\link{read_biom}} import function.
#'
#' Accessor functions like \code{\link{header}}.
#'
#' @references \url{http://biom-format.org/}
#'
#' @export
#' @importFrom jsonlite toJSON
#' @examples
#' biom_file <- system.file("extdata", "rich_sparse_otu_table.biom", package = "biomformat")
#' x = read_biom(biom_file)
#' outfile = tempfile()
#' write_biom(x, outfile)
#' y = read_biom(outfile)
#' identical(x, y)
write_biom <- function(x, biom_file){
cat(toJSON(x, always_decimal=TRUE, auto_unbox=TRUE), file=biom_file)
}
################################################################################
#' Read in a biom-format vs 2 file, returning a \code{list}.
#'
#' This function is meant only to be used if the user knows the file is
#' a particular version / hdf5 format. Otherwise, the `read_biom` file should be used.
#'
#' @param biom_file (Required). A biom object that is going to be written to file
#' as a proper biom formatted file, adhering to
#' \href{http://biom-format.org/documentation/biom_format.html}{the biom-format definition}.
#'
#' @return Nothing. The first argument, \code{x}, is written to a file.
#'
#' @seealso
#'
#' Function to create a biom object from R data,
#' \code{\link{make_biom}}.
#'
#' Definition of the
#' \code{\link{biom-class}}.
#'
#' The \code{\link{read_hdf5_biom}} import function.
#'
#' Accessor functions like \code{\link{header}}.
#'
#' @references \url{http://biom-format.org/}
#'
#' @export
#' @importFrom rhdf5 h5read
#' @examples
#' biom_file <- system.file("extdata", "rich_sparse_otu_table_hdf5.biom", package = "biomformat")
#' x = read_hdf5_biom(biom_file)
#' x = biom(x)
#' outfile = tempfile()
#' write_biom(x, outfile)
#' y = read_biom(outfile)
#' identical(observation_metadata(x),observation_metadata(y))
#' identical(sample_metadata(x),sample_metadata(y))
#' identical(biom_data(x), biom_data(y))
read_hdf5_biom<-function(biom_file){
x = h5read(biom_file,"/",read.attributes = TRUE)
data = generate_matrix(x)
rows = generate_metadata(x$observation)
columns = generate_metadata(x$sample)
shape = c(length(data),length(data[[1]]))
id = attr(x,"id")
vs = attr(x,"format-version")
format = sprintf("Biological Observation Matrix %s.%s",vs[1],vs[2])
format_url = attr(x,"format-url")
type = "OTU table"
generated_by = attr(x,"generated-by")
date = attr(x,"creation-date")
matrix_type = "dense"
matrix_element_type = "int"
namedList(id,format,format_url,type,generated_by,date,matrix_type,matrix_element_type,
rows,columns,shape,data)
}
################################################################################
|