File: hommc.R

package info (click to toggle)
r-cran-markovchain 0.8.5-4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,060 kB
  • sloc: cpp: 2,854; sh: 13; makefile: 2
file content (379 lines) | stat: -rw-r--r-- 11,110 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
#' An S4 class for representing High Order Multivariate Markovchain (HOMMC)
#' 
#' @slot order an integer equal to order of Multivariate Markovchain
#' @slot states a vector of states present in the HOMMC model
#' @slot P array of transition matrices
#' @slot Lambda a vector which stores the weightage of each transition matrices in P
#' @slot byrow if FALSE each column sum of transition matrix is 1 else row sum = 1
#' @slot name a name given to hommc
#' 
#' @author Giorgio Spedicato, Deepak Yadav
#' 
#' @examples 
#' statesName <- c("a", "b")
#' 
#' P <- array(0, dim = c(2, 2, 4), dimnames = list(statesName, statesName))
#' P[,,1] <- matrix(c(0, 1, 1/3, 2/3), byrow = FALSE, nrow = 2)
#' P[,,2] <- matrix(c(1/4, 3/4, 0, 1), byrow = FALSE, nrow = 2)
#' P[,,3] <- matrix(c(1, 0, 1/3, 2/3), byrow = FALSE, nrow = 2)
#' P[,,4] <- matrix(c(3/4, 1/4, 0, 1), byrow = FALSE, nrow = 2)
#' 
#' Lambda <- c(0.8, 0.2, 0.3, 0.7)
#' 
#' ob <- new("hommc", order = 1, states = statesName, P = P, 
#'           Lambda = Lambda, byrow = FALSE, name = "FOMMC")
#'           
#'@export
hommc <- setClass("hommc",
                      slots = list(order = "numeric", states  =  "character",
                      P = "array", Lambda = "numeric", byrow = "logical",
                      name = "character")
)

# internal method to show hommc object in informative way
.showHommc <- function(object) {
  
  # whether data in transition matrices are stored in column-wise or row-wise fashion
  if(object@byrow == TRUE) {
    direction <- "(by rows)" 
  } else {
    direction <- "(by cols)" 
  }
  
  # display order and unique states
  cat("Order of multivariate markov chain =", object@order, "\n")
  cat("states =", object@states, "\n")
  
  cat("\n")
  cat("List of Lambda's and the corresponding transition matrix", direction,":\n")
  
  # display transition matrices and the corresponding lambdas
  n <- object@order
  s <- sqrt((dim(object@P))[3]/n)
  
  for(i in 1:s) {
    for(j in 1:s) {
      # t is the index of transition matrix for transition from i sequence to j sequence
      # order of transition matrices in P is P1{1,1},P2{1,1}..Pn{1,1},P1{1,2}....Pn{s,s}
      t <- n * s * (i-1) + (j-1) * n 
      for(k in 1:n) {
        cat("Lambda", k, "(", i, ",", j, ") : ", object@Lambda[t+k],"\n", sep = "")
        cat("P", k, "(", i, ",", j, ") : \n", sep = "")
        print(object@P[, , t+k])
        cat("\n")
      }  
    }
  }
}

#' @title Function to display the details of hommc object
#' @description This is a convenience function to display the slots of hommc object
#'              in proper format
#' 
#' @param object An object of class hommc
#' 
#' @rdname hommc-show
#' @export                             
setMethod("show", "hommc",
          function(object){
            .showHommc(object)
          }
)

# all transition matrices
# n*s*s n = order s = number of categorical sequences
# verified using two examples from research paper
.allTransMat <- function(data, order = 2) {
  n <- order # order
  uelement <- sort(unique(as.character(data))) # unique element
  m <- length(uelement) # dim of trans-matrix
  s <- nrow(data) # number of categorical sequence
  lseq <- ncol(data) # length of each categorical sequence
  
  # store all transition matrices
  allTmat <- array(dim = c(length(uelement), length(uelement), n*s*s), dimnames = list(uelement, uelement))
  
  t <- 1 # help
  
  for(i in 1:s) {
    for(j in 1:s) {
      x <- data[j, ] # jth sequence
      y <- data[i, ] # ith sequence
      
      # jumps
      for(h in 1:n) {
        # column wise
        allTmat[ , , t] <- t(createSequenceMatrix(matrix(c(x[1:(lseq-h)], y[-(1:h)]),
                            ncol = 2, byrow = FALSE), toRowProbs = TRUE, 
                            possibleStates = uelement, sanitize = TRUE))
        t <- t + 1
      }
    }
  }
  return(allTmat)
}

# distribution of each categorical sequence based on the frequency
# verified using two examples from research paper
.allFreqProbMat <- function(data) {
  
  uelement <- sort(unique(as.character(data))) # unique element
  m <- length(uelement) # dim of trans-matrix
  s <- nrow(data) # number of categorical sequence
  
  # frequency based probability for all sequences
  freqMat <- array(0, dim = c(m, 1, s), dimnames = list(uelement)) 
  
  for(i in 1:s) {
    idata <- data[i, ] # ith categorical sequence
    
    # populate frequency matrix
    for(j in idata) {
      freqMat[j, 1, i] <- freqMat[j, 1, i] + 1
    }
    
    # normalization
    freqMat[, , i] <- freqMat[, , i] / sum(freqMat[, , i])
  }
  
  return(freqMat)
}

# objective function to pass to solnp
.fn3 <- function(params, ...) {
  
  hdata <- list(...)
  
  # calculate error
  error <- 0
  
  # number of categorical sequence
  s <- hdata$s
  
  # order
  n <- hdata$n
  
  # number of uniq states || dimension of t-matrix
  m <- hdata$m
  
  # array of transition matrices
  allTmat <- hdata$allTmat
  
  # all frequency matrix
  freqMat <- hdata$freqMat
  
  # norm
  Norm <- hdata$Norm
  
  for(i in 1:s) {
    helper <- matrix(0, nrow = m*n, ncol = 1)
    for(j in 1:s) {
      helper2 <- matrix(0, nrow = m, ncol = 1)
      y <- n * (j - 1 + s * (i - 1))
      
      for(k in 1:n) {
        helper2 <- helper2 + params[y + k] * (allTmat[ , , y + k] %*% matrix(freqMat[ , , j]))
      }
      
      helper[1:m, ] <- helper[1:m, ] + helper2
      
      if(i == j && n>= 2) {
        for(k in 2:n) {
          p <- (k - 1) * m
          helper[(p + 1):(p + m)] <- freqMat[ , , j]
        }
      }
    }
    error <- error + sum(abs((helper - freqMat[ , , i]) ^ Norm))
  }
  
  return(error ^ (1 / Norm))
}

# equality constraint function to pass to solnp
.eqn3 <- function(params, ...) {
  
  hdata <- list(...)
  
  # number of categorical sequence
  s <- hdata$s
  
  # order
  n <- hdata$n
  
  toReturn <- numeric()
  
  for(i in 1:s) {
    toReturn[i] <- sum(params[((i - 1) * n * s + 1):(i * n * s)])
  }
  return(toReturn)
}

#' Function to fit Higher Order Multivariate Markov chain
#'
#' @description Given a matrix of categorical sequences it fits 
#'              Higher Order Multivariate Markov chain.
#'
#' @param seqMat a matrix or a data frame where each column 
#'               is a categorical sequence
#' @param order Multivariate Markov chain order. Default is 2.
#' @param Norm Norm to be used. Default is 2.
#' 
#' @return an hommc object
#' 
#' @examples 
#' data <- matrix(c('2', '1', '3', '3', '4', '3', '2', '1', '3', '3', '2', '1', 
#'                c('2', '4', '4', '4', '4', '2', '3', '3', '1', '4', '3', '3')), 
#'                ncol = 2, byrow = FALSE)
#'                
#' fitHighOrderMultivarMC(data, order = 2, Norm = 2)                
#' 
#' @references W.-K. Ching et al. / Linear Algebra and its Applications
#' 
#' @author Giorgio Spedicato, Deepak Yadav
#' 
#' @export
fitHighOrderMultivarMC <- function(seqMat, order = 2, Norm = 2) {
  
  message("This function is experimental")
  
  if(is.data.frame(seqMat) == TRUE) {
    seqMat <- as.matrix(seqMat)
  }
  
  seqMat <- t(seqMat)
  
  # array of transition matrices
  allTmat <- .allTransMat(seqMat, order = order)
  
  # array of freq probability
  freqMat <- .allFreqProbMat(seqMat)
  
  n <- order # order
  uelement <- sort(unique(as.character(seqMat))) # unique element
  m <- length(uelement) # dim of trans-matrix
  s <- nrow(seqMat) # number of categorical sequence
  
  lmbda <- rep(1 / (n * s), n * s * s)
  
  fit <- Rsolnp::solnp(pars = lmbda, fun =  .fn3, eqfun = .eqn3, eqB = rep(1, s),
                       LB = rep(0, n * s * s), control = list(trace = 0), 
                       allTmat = allTmat, freqMat = freqMat, n = n, m = m, s = s, Norm = Norm)
  
  
  return(new("hommc", order = order, Lambda = fit$pars, P = allTmat, states = uelement, byrow = FALSE))
}


#' Simulate a higher order multivariate markovchain
#' 
#' @description 
#' This function provides a prediction of states for a higher order 
#' multivariate markovchain object
#' 
#' @usage predictHommc(hommc,t,init)
#' 
#' @param hommc a hommc-class object
#' @param t no of iterations to predict
#' @param init matrix of previous states size of which depends on hommc
#' 
#' @details 
#' The user is required to provide a matrix of giving n previous coressponding
#' every categorical sequence. Dimensions of the init are s X n, where s is 
#' number of categorical sequences and n is order of the homc.
#' 
#' @return 
#' The function returns a matrix of size s X t displaying t predicted states 
#' in each row coressponding to every categorical sequence. 
#' 
#' @author Vandit Jain
#' 
#' 
#' @export
predictHommc <- function(hommc, t, init) {
  ## order of markovchain 
  n <- hommc@order
  
  ## number of categorical sequences
  s <- sqrt((dim(hommc@P))[3]/n)
  
  ## list of states
  states <- hommc@states
  
  ## size of set of all possible states
  m <- length(states)
  
  ## if initial states not provided take statndard example
  if(missing(init)) {
    init <- matrix(rep(states[1],s*n),nrow = s,byrow = TRUE)
  }
  
  if(!all(dim(init) == c(s,n))){
    stop("Please provide sufficient number of previous states")
  }
  
  if(class(hommc)!= "hommc") {
    stop("Please provide a valid hommc-class object")
  }
  
  if(t <=0)
    stop("T should be a positive integer")
  
  for(i in 1:s)
  {
    for(j in 1:n)
    {
      if(!(init[i,j] %in% states))
        stop("invalid states in provided state matrix init")
    }
  }
  
  ## initialize result matrix
  result <- matrix(NA,nrow = s,ncol = t)
  
  ## runs loop according to hommc class structure
  for(i in 1:t)
  {
    for(j in 1:s)
    {
      ## initialises probability according
      rowProbs <- rep(0,m)
      
      ## probability for current sequence depends all sequence
      for(k in 1:s)
      {
        ## gets index of coressponding in the 3-D array P 
        # index is the index of transition matrix for transition from i sequence to j sequence
        # order of transition matrices in P is P1{1,1},P2{1,1}..Pn{1,1},P1{1,2}....Pn{s,s}
        index <- n * s * (j-1) + n * (k-1)
        
        ## iterates for all order 1 to n
        for(h in 1:n)
        {
          prev <- init[j,n-h+1]
          label <- which(prev == states)
          rowProbs <- rowProbs + hommc@Lambda[h + index] * hommc@P[label, ,h + index]
        }
      }
      
      ## uses sample function from base package
      curr <- sample(size = 1, x = states, prob = rowProbs)
      
      ## changes init for next t iteration
      for(temp in 2:n)
      {
        if(temp <= n)
        init[j,temp-1] = init[j,temp]
      }
      init[j,n] = curr;
      result[j,i] = curr;
    }
  }
  
  ## returns result
  return(result)
}