1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
|
## -----------------------------------------------------------------------
##
## IGraph R package
## Copyright (C) 2015 Gabor Csardi <csardi.gabor@gmail.com>
## 334 Harvard street, Cambridge, MA 02139 USA
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
## 02110-1301 USA
##
## -----------------------------------------------------------------------
#' Spectral Embedding of Adjacency Matrices
#'
#' Spectral decomposition of the adjacency matrices of graphs.
#'
#' This function computes a `no`-dimensional Euclidean representation of
#' the graph based on its adjacency matrix, \eqn{A}. This representation is
#' computed via the singular value decomposition of the adjacency matrix,
#' \eqn{A=UDV^T}.In the case, where the graph is a random dot product graph
#' generated using latent position vectors in \eqn{R^{no}} for each vertex, the
#' embedding will provide an estimate of these latent vectors.
#'
#' For undirected graphs the latent positions are calculated as
#' \eqn{X=U^{no}D^{1/2}}{U[no] sqrt(D[no])}, where \eqn{U^{no}}{U[no]} equals
#' to the first `no` columns of \eqn{U}, and \eqn{D^{1/2}}{sqrt(D[no])} is
#' a diagonal matrix containing the top `no` singular values on the
#' diagonal.
#'
#' For directed graphs the embedding is defined as the pair
#' \eqn{X=U^{no}D^{1/2}}{U[no] sqrt(D[no])} and \eqn{Y=V^{no}D^{1/2}}{V[no]
#' sqrt(D[no])}. (For undirected graphs \eqn{U=V}, so it is enough to keep one
#' of them.)
#'
#' @param graph The input graph, directed or undirected.
#' @param no An integer scalar. This value is the embedding dimension of the
#' spectral embedding. Should be smaller than the number of vertices. The
#' largest `no`-dimensional non-zero singular values are used for the
#' spectral embedding.
#' @param weights Optional positive weight vector for calculating a weighted
#' embedding. If the graph has a `weight` edge attribute, then this is
#' used by default. In a weighted embedding, the edge weights are used instead
#' of the binary adjacencny matrix.
#' @param which Which eigenvalues (or singular values, for directed graphs) to
#' use. \sQuote{lm} means the ones with the largest magnitude, \sQuote{la} is
#' the ones (algebraic) largest, and \sQuote{sa} is the (algebraic) smallest
#' eigenvalues. The default is \sQuote{lm}. Note that for directed graphs
#' \sQuote{la} and \sQuote{lm} are the equivalent, because the singular values
#' are used for the ordering.
#' @param scaled Logical scalar, if `FALSE`, then \eqn{U} and \eqn{V} are
#' returned instead of \eqn{X} and \eqn{Y}.
#' @param cvec A numeric vector, its length is the number vertices in the
#' graph. This vector is added to the diagonal of the adjacency matrix.
#' @param options A named list containing the parameters for the SVD
#' computation algorithm in ARPACK. By default, the list of values is assigned
#' the values given by [arpack_defaults()].
#' @return A list containing with entries: \item{X}{Estimated latent positions,
#' an `n` times `no` matrix, `n` is the number of vertices.}
#' \item{Y}{`NULL` for undirected graphs, the second half of the latent
#' positions for directed graphs, an `n` times `no` matrix, `n`
#' is the number of vertices.} \item{D}{The eigenvalues (for undirected graphs)
#' or the singular values (for directed graphs) calculated by the algorithm.}
#' \item{options}{A named list, information about the underlying ARPACK
#' computation. See [arpack()] for the details.}
#' @seealso [sample_dot_product()]
#' @references Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. A
#' Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,
#' *Journal of the American Statistical Association*, Vol. 107(499), 2012
#' @keywords graphs
#' @examples
#'
#' ## A small graph
#' lpvs <- matrix(rnorm(200), 20, 10)
#' lpvs <- apply(lpvs, 2, function(x) {
#' return(abs(x) / sqrt(sum(x^2)))
#' })
#' RDP <- sample_dot_product(lpvs)
#' embed <- embed_adjacency_matrix(RDP, 5)
#' @family embedding
#' @export
#' @cdocs igraph_adjacency_spectral_embedding
embed_adjacency_matrix <- adjacency_spectral_embedding_impl
#' Dimensionality selection for singular values using profile likelihood.
#'
#' Select the number of significant singular values, by finding the
#' \sQuote{elbow} of the scree plot, in a principled way.
#'
#' The input of the function is a numeric vector which contains the measure of
#' \sQuote{importance} for each dimension.
#'
#' For spectral embedding, these are the singular values of the adjacency
#' matrix. The singular values are assumed to be generated from a Gaussian
#' mixture distribution with two components that have different means and same
#' variance. The dimensionality \eqn{d} is chosen to maximize the likelihood
#' when the \eqn{d} largest singular values are assigned to one component of
#' the mixture and the rest of the singular values assigned to the other
#' component.
#'
#' This function can also be used for the general separation problem, where we
#' assume that the left and the right of the vector are coming from two Normal
#' distributions, with different means, and we want to know their border. See
#' examples below.
#'
#' @param sv A numeric vector, the ordered singular values.
#' @return A numeric scalar, the estimate of \eqn{d}.
#' @author Gabor Csardi \email{csardi.gabor@@gmail.com}
#' @seealso [embed_adjacency_matrix()]
#' @references M. Zhu, and A. Ghodsi (2006). Automatic dimensionality selection
#' from the scree plot via the use of profile likelihood. *Computational
#' Statistics and Data Analysis*, Vol. 51, 918--930.
#' @keywords graphs
#' @examples
#'
#' # Generate the two groups of singular values with
#' # Gaussian mixture of two components that have different means
#' sing.vals <- c(rnorm(10, mean = 1, sd = 1), rnorm(10, mean = 3, sd = 1))
#' dim.chosen <- dim_select(sing.vals)
#' dim.chosen
#'
#' # Sample random vectors with multivariate normal distribution
#' # and normalize to unit length
#' lpvs <- matrix(rnorm(200), 10, 20)
#' lpvs <- apply(lpvs, 2, function(x) {
#' (abs(x) / sqrt(sum(x^2)))
#' })
#' RDP.graph <- sample_dot_product(lpvs)
#' dim_select(embed_adjacency_matrix(RDP.graph, 10)$D)
#'
#' # Sample random vectors with the Dirichlet distribution
#' lpvs.dir <- sample_dirichlet(n = 20, rep(1, 10))
#' RDP.graph.2 <- sample_dot_product(lpvs.dir)
#' dim_select(embed_adjacency_matrix(RDP.graph.2, 10)$D)
#'
#' # Sample random vectors from hypersphere with radius 1.
#' lpvs.sph <- sample_sphere_surface(dim = 10, n = 20, radius = 1)
#' RDP.graph.3 <- sample_dot_product(lpvs.sph)
#' dim_select(embed_adjacency_matrix(RDP.graph.3, 10)$D)
#'
#' @family embedding
#' @export
#' @cdocs igraph_dim_select
dim_select <- dim_select_impl
#' Spectral Embedding of the Laplacian of a Graph
#'
#' Spectral decomposition of Laplacian matrices of graphs.
#'
#' This function computes a `no`-dimensional Euclidean representation of
#' the graph based on its Laplacian matrix, \eqn{L}. This representation is
#' computed via the singular value decomposition of the Laplacian matrix.
#'
#' They are essentially doing the same as [embed_adjacency_matrix()],
#' but work on the Laplacian matrix, instead of the adjacency matrix.
#'
#' @param graph The input graph, directed or undirected.
#' @param no An integer scalar. This value is the embedding dimension of the
#' spectral embedding. Should be smaller than the number of vertices. The
#' largest `no`-dimensional non-zero singular values are used for the
#' spectral embedding.
#' @param weights Optional positive weight vector for calculating a weighted
#' embedding. If the graph has a `weight` edge attribute, then this is
#' used by default. For weighted embedding, edge weights are used instead
#' of the binary adjacency matrix, and vertex strength (see
#' [strength()]) is used instead of the degrees.
#' @param which Which eigenvalues (or singular values, for directed graphs) to
#' use. \sQuote{lm} means the ones with the largest magnitude, \sQuote{la} is
#' the ones (algebraic) largest, and \sQuote{sa} is the (algebraic) smallest
#' eigenvalues. The default is \sQuote{lm}. Note that for directed graphs
#' \sQuote{la} and \sQuote{lm} are the equivalent, because the singular values
#' are used for the ordering.
#' @param type The type of the Laplacian to use. Various definitions exist for
#' the Laplacian of a graph, and one can choose between them with this
#' argument.
#'
#' Possible values: `D-A` means \eqn{D-A} where \eqn{D} is the degree
#' matrix and \eqn{A} is the adjacency matrix; `DAD` means
#' \eqn{D^{1/2}}{D^1/2} times \eqn{A} times \eqn{D^{1/2}{D^1/2}},
#' \eqn{D^{1/2}}{D^1/2} is the inverse of the square root of the degree matrix;
#' `I-DAD` means \eqn{I-D^{1/2}}{I-D^1/2}, where \eqn{I} is the identity
#' matrix. `OAP` is \eqn{O^{1/2}AP^{1/2}}{O^1/2 A P^1/2}, where
#' \eqn{O^{1/2}}{O^1/2} is the inverse of the square root of the out-degree
#' matrix and \eqn{P^{1/2}}{P^1/2} is the same for the in-degree matrix.
#'
#' `OAP` is not defined for undirected graphs, and is the only defined type
#' for directed graphs.
#'
#' The default (i.e. type `default`) is to use `D-A` for undirected
#' graphs and `OAP` for directed graphs.
#' @param scaled Logical scalar, if `FALSE`, then \eqn{U} and \eqn{V} are
#' returned instead of \eqn{X} and \eqn{Y}.
#' @param options A named list containing the parameters for the SVD
#' computation algorithm in ARPACK. By default, the list of values is assigned
#' the values given by [arpack_defaults()].
#' @return A list containing with entries: \item{X}{Estimated latent positions,
#' an `n` times `no` matrix, `n` is the number of vertices.}
#' \item{Y}{`NULL` for undirected graphs, the second half of the latent
#' positions for directed graphs, an `n` times `no` matrix, `n`
#' is the number of vertices.} \item{D}{The eigenvalues (for undirected graphs)
#' or the singular values (for directed graphs) calculated by the algorithm.}
#' \item{options}{A named list, information about the underlying ARPACK
#' computation. See [arpack()] for the details.}
#' @author Gabor Csardi \email{csardi.gabor@@gmail.com}
#' @seealso [embed_adjacency_matrix()],
#' [sample_dot_product()]
#' @references Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. A
#' Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,
#' *Journal of the American Statistical Association*, Vol. 107(499), 2012
#' @keywords graphs
#' @export
#' @examples
#'
#' ## A small graph
#' lpvs <- matrix(rnorm(200), 20, 10)
#' lpvs <- apply(lpvs, 2, function(x) {
#' return(abs(x) / sqrt(sum(x^2)))
#' })
#' RDP <- sample_dot_product(lpvs)
#' embed <- embed_laplacian_matrix(RDP, 5)
#' @family embedding
#' @cdocs igraph_laplacian_spectral_embedding
embed_laplacian_matrix <- laplacian_spectral_embedding_impl
#' Sample vectors uniformly from the surface of a sphere
#'
#' Sample finite-dimensional vectors to use as latent position vectors in
#' random dot product graphs
#'
#' `sample_sphere_surface()` generates uniform samples from \eqn{S^{dim-1}}
#' (the `(dim-1)`-sphere) with radius `radius`, i.e. the Euclidean
#' norm of the samples equal `radius`.
#'
#' @param dim Integer scalar, the dimension of the random vectors.
#' @param n Integer scalar, the sample size.
#' @param radius Numeric scalar, the radius of the sphere to sample.
#' @param positive Logical scalar, whether to sample from the positive orthant
#' of the sphere.
#' @return A `dim` (length of the `alpha` vector for
#' `sample_dirichlet()`) times `n` matrix, whose columns are the sample
#' vectors.
#'
#' @family latent position vector samplers
#'
#' @export
#' @examples
#' lpvs.sph <- sample_sphere_surface(dim = 10, n = 20, radius = 1)
#' RDP.graph.3 <- sample_dot_product(lpvs.sph)
#' vec.norm <- apply(lpvs.sph, 2, function(x) {
#' sum(x^2)
#' })
#' vec.norm
sample_sphere_surface <- function(dim, n = 1, radius = 1, positive = TRUE) {
# Argument checks
dim <- as.numeric(dim)
n <- as.numeric(n)
radius <- as.numeric(radius)
positive <- as.logical(positive)
on.exit(.Call(R_igraph_finalizer))
# Function call
res <- .Call(R_igraph_sample_sphere_surface, dim, n, radius, positive)
res
}
#' Sample vectors uniformly from the volume of a sphere
#'
#' Sample finite-dimensional vectors to use as latent position vectors in
#' random dot product graphs
#'
#' `sample_sphere_volume()` generates uniform samples from \eqn{S^{dim-1}}
#' (the `(dim-1)`-sphere) i.e. the Euclidean norm of the samples is
#' smaller or equal to `radius`.
#'
#' @param dim Integer scalar, the dimension of the random vectors.
#' @param n Integer scalar, the sample size.
#' @param radius Numeric scalar, the radius of the sphere to sample.
#' @param positive Logical scalar, whether to sample from the positive orthant
#' of the sphere.
#' @return A `dim` (length of the `alpha` vector for
#' `sample_dirichlet()`) times `n` matrix, whose columns are the sample
#' vectors.
#'
#' @family latent position vector samplers
#'
#' @export
#' @examples
#' lpvs.sph.vol <- sample_sphere_volume(dim = 10, n = 20, radius = 1)
#' RDP.graph.4 <- sample_dot_product(lpvs.sph.vol)
#' vec.norm <- apply(lpvs.sph.vol, 2, function(x) {
#' sum(x^2)
#' })
#' vec.norm
sample_sphere_volume <- function(dim, n = 1, radius = 1, positive = TRUE) {
# Argument checks
dim <- as.numeric(dim)
n <- as.numeric(n)
radius <- as.numeric(radius)
positive <- as.logical(positive)
on.exit(.Call(R_igraph_finalizer))
# Function call
res <- .Call(R_igraph_sample_sphere_volume, dim, n, radius, positive)
res
}
#' Sample from a Dirichlet distribution
#'
#' Sample finite-dimensional vectors to use as latent position vectors in
#' random dot product graphs
#'
#' `sample_dirichlet()` generates samples from the Dirichlet distribution
#' with given \eqn{\alpha}{alpha} parameter. The sample is drawn from
#' `length(alpha)-1`-simplex.
#'
#' @param n Integer scalar, the sample size.
#' @param alpha Numeric vector, the vector of \eqn{\alpha}{alpha} parameter for
#' the Dirichlet distribution.
#' @return A `dim` (length of the `alpha` vector for
#' `sample_dirichlet()`) times `n` matrix, whose columns are the sample
#' vectors.
#'
#' @family latent position vector samplers
#'
#' @export
#' @examples
#' lpvs.dir <- sample_dirichlet(n = 20, alpha = rep(1, 10))
#' RDP.graph.2 <- sample_dot_product(lpvs.dir)
#' colSums(lpvs.dir)
sample_dirichlet <- function(n, alpha) {
# Argument checks
n <- as.numeric(n)
alpha <- as.numeric(alpha)
on.exit(.Call(R_igraph_finalizer))
# Function call
res <- .Call(R_igraph_sample_dirichlet, n, alpha)
res
}
|