File: write.big.matrix.Rd

package info (click to toggle)
r-cran-bigmemory 4.6.4-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 648 kB
  • sloc: cpp: 4,930; ansic: 131; sh: 13; makefile: 2
file content (181 lines) | stat: -rw-r--r-- 5,597 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bigmemory.R
\name{write.big.matrix}
\alias{write.big.matrix}
\alias{write.big.matrix,big.matrix,character-method}
\alias{read.big.matrix}
\alias{read.big.matrix,character-method}
\title{File interface for a ``big.matrix''}
\usage{
write.big.matrix(x, filename, row.names = FALSE, col.names = FALSE, sep = ",")

\S4method{write.big.matrix}{big.matrix,character}(x, filename, row.names = FALSE, col.names = FALSE, sep = ",")

read.big.matrix(
  filename,
  sep = ",",
  header = FALSE,
  col.names = NULL,
  row.names = NULL,
  has.row.names = FALSE,
  ignore.row.names = FALSE,
  type = NA,
  skip = 0,
  separated = FALSE,
  backingfile = NULL,
  backingpath = NULL,
  descriptorfile = NULL,
  binarydescriptor = FALSE,
  extraCols = NULL,
  shared = options()$bigmemory.default.shared
)

\S4method{read.big.matrix}{character}(
  filename,
  sep = ",",
  header = FALSE,
  col.names = NULL,
  row.names = NULL,
  has.row.names = FALSE,
  ignore.row.names = FALSE,
  type = NA,
  skip = 0,
  separated = FALSE,
  backingfile = NULL,
  backingpath = NULL,
  descriptorfile = NULL,
  binarydescriptor = FALSE,
  extraCols = NULL,
  shared = options()$bigmemory.default.shared
)
}
\arguments{
\item{x}{a \code{\link{big.matrix}}.}

\item{filename}{the name of an input/output file.}

\item{row.names}{a vector of names, use them even if row names appear to
exist in the file.}

\item{col.names}{a vector of names, use them even if column names exist
in the file.}

\item{sep}{a field delimiter.}

\item{header}{if \code{TRUE}, the first line (after a possible skip)
should contain column names.}

\item{has.row.names}{if \code{TRUE}, then the first column contains row
names.}

\item{ignore.row.names}{if \code{TRUE} when \code{has.row.names==TRUE},
the row names will be ignored.}

\item{type}{preferably specified, \code{"integer"} for example.}

\item{skip}{number of lines to skip at the head of the file.}

\item{separated}{use separated column organization of the data instead of
column-major organization.}

\item{backingfile}{the root name for the file(s) for the cache of \code{x}.}

\item{backingpath}{the path to the directory containing the file backing
cache.}

\item{descriptorfile}{the file to be used for the description of the
filebacked matrix.}

\item{binarydescriptor}{the flag to specify if the binary RDS format should
be used for the backingfile description, for subsequent use with
\code{\link{attach.big.matrix}}; if \code{NULL} of \code{FALSE}, the
\code{dput()} file format is used.}

\item{extraCols}{the optional number of extra columns to be appended to the
matrix for future use.}

\item{shared}{if \code{TRUE}, the resulting \code{big.matrix} can be shared
across processes.}
}
\value{
a \code{\link{big.matrix}} object is returned by \code{read.big.matrix},
while \code{write.big.matrix} creates an output file (a path could be part
of \code{filename}).
}
\description{
Create a \code{\link{big.matrix}} by reading from a
suitably-formatted ASCII file, or
write the contents of a \code{\link{big.matrix}} to a file.
}
\details{
Files must contain only one atomic type
(all \code{integer}, for example).  You, the user, should know whether
your file has row and/or column names, and various combinations of options
should be helpful in obtaining the desired behavior.

When reading from a file, if \code{type} is not specified we try to
make a reasonable guess for you without
making any guarantees at this point.
Unless you have really large integer values, we recommend
you consider \code{"short"}.  If you have something that is essentially
categorical, you might even be able use \code{"char"}, with huge memory
savings for large data sets.

Any non-numeric entry will be ignored and replaced with \code{NA},
so reading something that traditionally would be a \code{data.frame}
won't cause an error.  A warning is issued.

Wishlist: we'd like to provide an option to ignore specified columns while
doing reads.
Or perhaps to specify columns targeted for factor or character conversion
to numeric values.  Would you use such features?  Email us and let us know!
}
\examples{
# Without specifying the type, this big.matrix x will hold integers.

x <- as.big.matrix(matrix(1:10, 5, 2))
x[2,2] <- NA
x[,]
temp_dir = tempdir()
if (!dir.exists(temp_dir)) dir.create(temp_dir)
write.big.matrix(x, file.path(temp_dir, "foo.txt"))

# Just for fun, I'll read it back in as character (1-byte integers):
y <- read.big.matrix(file.path(temp_dir, "foo.txt"), type="char")
y[,]

# Other examples:
w <- as.big.matrix(matrix(1:10, 5, 2), type='double')
w[1,2] <- NA
w[2,2] <- -Inf
w[3,2] <- Inf
w[4,2] <- NaN
w[,]
write.big.matrix(w, file.path(temp_dir, "bar.txt"))
w <- read.big.matrix(file.path(temp_dir, "bar.txt"), type="double")
w[,]
w <- read.big.matrix(file.path(temp_dir, "bar.txt"), type="short")
w[,]

# Another example using row names (which we don't like).
x <- as.big.matrix(as.matrix(iris), type='double')
rownames(x) <- as.character(1:nrow(x))
head(x)
write.big.matrix(x, file.path(temp_dir, 'IrisData.txt'), col.names=TRUE, 
                 row.names=TRUE)
y <- read.big.matrix(file.path(temp_dir, "IrisData.txt"), header=TRUE, 
                     has.row.names=TRUE)
head(y)

# The following would fail with a dimension mismatch:
if (FALSE) y <- read.big.matrix(file.path(temp_dir, "IrisData.txt"), 
                                header=TRUE)
}
\seealso{
\code{\link{big.matrix}}
}
\author{
John W. Emerson and Michael J. Kane
\email{bigmemoryauthors@gmail.com}
}
\keyword{methods}