File: test_cd.R

package info (click to toggle)
r-cran-plm 2.6-2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 6,032 kB
  • sloc: sh: 13; makefile: 4
file content (607 lines) | stat: -rw-r--r-- 24,736 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607

############## Pesaran's CD test and Breusch/Pagan LM Test (also scaled) ###############

  ## Pesaran's CD test for cross-sectional dependence in panel data models
  ## (and Breusch and Pagan's LM and scaled LM)
  ## ref. Pesaran, General diagnostic tests..., CESifo WP 1229, 2004

  ## In case K+1>T the group-specific model is not estimable;
  ## as in Greene 11.7.2, formula (11.23) we use the group-specific residuals
  ## of a consistent estimator. This may be pooled OLS, RE, FE. Here the
  ## default is set to FE.

  ## Note that the test can be performed on the results of plm objects with
  ## any kind of effects: having "time" effects means checking for
  ## xs-dependence *after* introducing time dummies.

  ## In principle, the test can be performed on the results of *any*
  ## panelmodel object. Some issues remain regarding standardization of
  ## model output: some missing pieces are, e.g., the 'model$indexes'
  ## in ggls. ''fd'' models are also not compatible because of indexes
  ## keeping the original timespan, while data lose the first period.

## production version, generic and based on plm

## version 11: added test = "bcsclm"
##
## version 10:
## substantial optimization for speed, now fast (few seconds) on N=3000
## all methods pass on a pseries to pcdres()

## make toy example
#dati <- data.frame(ind=rep(1:7, 4), time=rep(1:4, each=7), x=rnorm(28),
#                   group=rep(c(1,1,2,2,2,3,3), 4))
#pdati <- pdata.frame(dati)

#' Tests of cross-section dependence for panel models
#' 
#' Pesaran's CD or Breusch--Pagan's LM (local or global) tests for cross
#' sectional dependence in panel models
#' 
#' These tests are originally meant to use the residuals of separate
#' estimation of one time--series regression for each cross-sectional
#' unit in order to check for cross--sectional dependence (`model = NULL`).
#' If a different model specification (`model = "within"`, `"random"`, 
#' \ldots{}) is assumed consistent, one can resort to its residuals for
#' testing (which is common, e.g., when the time dimension's length is
#' insufficient for estimating the heterogeneous model).
#' 
#' If the time
#' dimension is insufficient and `model = NULL`, the function defaults
#' to estimation of a `within` model and issues a warning. The main
#' argument of this function may be either a model of class
#' `panelmodel` or a `formula` and `data frame`; in the second case,
#' unless `model` is set to `NULL`, all usual parameters relative to
#' the estimation of a `plm` model may be passed on. The test is
#' compatible with any consistent `panelmodel` for the data at hand,
#' with any specification of `effect` (except for `test = "bcsclm"` which
#' requires a within model with either individual or two-ways effect).
#' E.g., specifying  `effect = "time"` or `effect = "twoways"` allows
#' to test for residual cross-sectional dependence after the introduction
#' of time fixed effects to account for common shocks.
#' 
#' A **local** version of either test can be computed by supplying a
#' proximity matrix (elements coercible to `logical`) with argument
#' `w` which provides information on whether any pair of individuals
#' are neighbours or not. If `w` is supplied, only neighbouring pairs
#' will be used in computing the test; else, `w` will default to
#' `NULL` and all observations will be used. The matrix need not be
#' binary, so commonly used "row--standardized" matrices can be
#' employed as well. `nb` objects from \CRANpkg{spdep} must instead be
#' transformed into matrices by \CRANpkg{spdep}'s function `nb2mat`
#' before using.
#' 
#' The methods implemented are suitable also for unbalanced panels.
#' 
#' Pesaran's CD test (`test="cd"`), Breusch and Pagan's LM test
#' (`test="lm"`), and its scaled version (`test="sclm"`) are all
#' described in \insertCite{PESA:04;textual}{plm} (and complemented by
#' Pesaran (2005)). The bias-corrected scaled test (`test="bcsclm"`)
#' is due to \insertCite{BALT:FENG:KAO:12}{plm} and only valid for
#' within models including the individual effect (it's unbalanced
#' version uses max(Tij) for T) in the bias-correction term).
#' \insertCite{BREU:PAGA:80;textual}{plm} is the original source for
#' the LM test.
#' 
#' The test on a `pseries` is the same as a test on a pooled
#' regression model of that variable on a constant, i.e.,
#' `pcdtest(some_pseries)` is equivalent to `pcdtest(plm(some_var ~ 1,
#' data = some_pdata.frame, model = "pooling")` and also equivalent to
#' `pcdtest(some_var ~ 1, data = some_data)`, where `some_var` is
#' the variable name in the data which corresponds to `some_pseries`.
#' 
#' @aliases pcdtest
#' @param x an object of class `formula`, `panelmodel`, or `pseries`
#'     (depending on the respective interface) describing the model to
#'     be tested,
#' @param data a `data.frame`,
#' @param index an optional numerical index, if `NULL`, the first two
#'     columns of the data.frame provided in argument `data` are
#'     assumed to be the index variables; for further details see
#'     [pdata.frame()],
#' @param model an optional character string indicating which type of
#'     model to estimate; if left to `NULL`, the original
#'     heterogeneous specification of Pesaran is used,
#' @param test the type of test statistic to be returned. One of
#'     \itemize{ \item `"cd"` for Pesaran's CD statistic, \item `"lm"`
#'     for Breusch and Pagan's original LM statistic, \item `"sclm"`
#'     for the scaled version of Breusch and Pagan's LM statistic,
#'     \item `"bcsclm"` for the bias-corrected scaled version of
#'     Breusch and Pagan's LM statistic, \item `"rho"` for the average
#'     correlation coefficient, \item `"absrho"` for the average
#'     absolute correlation coefficient,}
#' @param w either `NULL` (default) for the global tests or -- for the
#'     local versions of the statistics -- a `n x n` `matrix`
#'     describing proximity between individuals, with \eqn{w_ij = a}
#'     where \eqn{a} is any number such that `as.logical(a)==TRUE`, if
#'     \eqn{i,j} are neighbours, \eqn{0} or any number \eqn{b} such
#'     that `as.logical(b)==FALSE` elsewhere. Only the lower
#'     triangular part (without diagonal) of `w` after coercing by
#'     `as.logical()` is evaluated for neighbouring information (but
#'     `w` can be symmetric). See also **Details** and
#'     **Examples**,
#' @param \dots further arguments to be passed on for model estimation to `plm`,
#'    such as `effect` or `random.method`.
#' @return An object of class `"htest"`.
#' @export
#' @references
#'
#' \insertRef{BALT:FENG:KAO:12}{plm}
#' 
#' \insertRef{BREU:PAGA:80}{plm}
#' 
#' \insertRef{PESA:04}{plm}
#'
#' \insertRef{PESA:15}{plm}
#' 
#' @keywords htest
#' @examples
#' 
#' data("Grunfeld", package = "plm")
#' ## test on heterogeneous model (separate time series regressions)
#' pcdtest(inv ~ value + capital, data = Grunfeld,
#'         index = c("firm", "year"))
#' 
#' ## test on two-way fixed effects homogeneous model
#' pcdtest(inv ~ value + capital, data = Grunfeld, model = "within",
#'         effect = "twoways", index = c("firm", "year"))
#' 
#' ## test on panelmodel object
#' g <- plm(inv ~ value + capital, data = Grunfeld, index = c("firm", "year"))
#' pcdtest(g)
#' 
#' ## scaled LM test
#' pcdtest(g, test = "sclm")
#' 
#' ## test on pseries
#' pGrunfeld <- pdata.frame(Grunfeld)
#' pcdtest(pGrunfeld$value)
#' 
#' ## local test
#' ## define neighbours for individual 2: 1, 3, 4, 5 in lower triangular matrix
#' w <- matrix(0, ncol= 10, nrow=10)
#' w[2,1] <- w[3,2] <- w[4,2] <- w[5,2] <- 1
#' pcdtest(g, w = w)
#' 
pcdtest <- function(x, ...)
{
    UseMethod("pcdtest")
}

## this formula method here only for adding "rho" and "absrho"
## arguments

#' @rdname pcdtest
#' @export
pcdtest.formula <- function(x, data, index = NULL, model = NULL, 
                            test = c("cd", "sclm", "bcsclm", "lm", "rho", "absrho"),
                            w = NULL, ...) {
    #data <- pdata.frame(data, index = index)
    test <- match.arg(test)
    if(test == "bcsclm" && (is.null(model) || model != "within"))
      stop("for test = 'bcsclm', set argument model = 'within'")

    # evaluate formula in parent frame
    cl <- match.call(expand.dots = TRUE)
    cl$model  <- if(test != "bcsclm") "pooling" else "within"
      if(test == "bcsclm") {
        # check args model and effect for test = "bcsclm"
        if(is.null(cl$effect)) cl$effect <- "individual" # make default within model is individual within
        eff <- isTRUE(cl$effect == "individual" || cl$effect == "twoways")
        if(model != "within" || !eff) stop("for test = 'bcsclm', requirement is model = \"within\" and effect = \"individual\" or \"twoways\"")
      }
    names(cl)[2L] <- "formula"
    m <- match(plm.arg, names(cl), 0L)
    cl <- cl[c(1L, m)]
    cl[[1L]] <- as.name("plm")
    mymod <- eval(cl, parent.frame()) # mymod is either "pooling" or "within" (the latter iff for test = "bcsclm")
    
    hetero.spec <- if(is.null(model)) TRUE else FALSE
    
    if(hetero.spec && min(pdim(mymod)$Tint$Ti) < length(mymod$coefficients)+1) {
      warning("Insufficient number of observations in time to estimate heterogeneous model: using within residuals",
            call. = FALSE)
      hetero.spec <- FALSE
      model <- "within"
    }

    ind0 <- unclass(attr(model.frame(mymod), "index")) # unclass for speed
    tind <- as.numeric(ind0[[2L]])
    ind  <- as.numeric(ind0[[1L]])

    unind <- unique(ind)
    n <- length(unind)
    
    if(hetero.spec) {
        ## estimate individual normal regressions one by one
        ## (original heterogeneous specification of Pesaran)
        X <- model.matrix(mymod)
        y <- model.response(model.frame(mymod))
 
        # split X, y per individual
        X.ncol <- NCOL(X)
        tX <- split(X, ind)
        tX <- lapply(tX, function(m) matrix(m, ncol = X.ncol))
        ty <- split(y, ind)

        # calc. residuals
        res.i <- mapply(function(X, y) lm.fit(X, y)$residuals, tX, ty, SIMPLIFY = FALSE)
        
        # construct indexes
        ind.i <- rep(seq_len(n), lengths(res.i))
        tind.i <- split(tind, ind)
        tind.i <- unlist(tind.i, use.names = FALSE)
        
        ## make pseries of (all) residuals
        resdata <- data.frame(ee   = unlist(res.i,  use.names = FALSE),
                              ind  = ind.i,
                              tind = tind.i)
        
        pee <- pdata.frame(resdata, index = c("ind", "tind"))
        tres <- pee$ee
    } else {
      # else case is one of:
      # a) insufficient number of observations for heterogen. spec. or
      # b) model specified when function was called (incl. case test = "bcsclm")
      if(test != "bcsclm") {
        # Estimate the model specified originally in function call or due to
        # forced model switch to within model by insufficient number of
        # observations for heterogen. spec.
        # (for test = "bcsclm" it is ensured that a within model was already
        # estimated -> no need to estimate again a within model)
        cl$model <- model
        mymod <- eval(cl, parent.frame())
      }
      
      tres <- resid(mymod)
    }

    return(pcdres(tres = tres, n = n, w = w,
                  form = paste(deparse(x)),
                  test = test))
}


## panelmodel method: just fetch resid (as a pseries) and hand over to pcdres
 
#' @rdname pcdtest
#' @export
pcdtest.panelmodel <- function(x, test = c("cd", "sclm", "bcsclm", "lm", "rho", "absrho"),
                               w = NULL, ...) {
    
    test <- match.arg(test)
    model <- describe(x, "model")
    effect <- describe(x, "effect")
    eff <- (effect == "individual" || effect == "twoways")
    if (test == "bcsclm" && (model != "within" || !eff))
      stop("for test = 'bcsclm', model x must be a within individual or twoways model")
    
    tres <- resid(x)
    index <- unclass(attr(model.frame(x), "index")) # unclass for speed
    #tind <- as.numeric(index[[2L]])
    ind <- as.numeric(index[[1L]])
    unind <- unique(ind)
    n <- length(unind)
    #t <- pdim(x)$Tint$Ti
    #nT <- length(ind)
    #k <- length(x$coefficients)
    return(pcdres(tres = tres, n = n, w = w,
                  form = paste(deparse(x$formula)),
                  test = test))
}

#' @rdname pcdtest
#' @export
pcdtest.pseries <- function(x, test = c("cd", "sclm", "bcsclm", "lm", "rho", "absrho"),
                             w = NULL, ...) {
  
    ## calculates local or global CD test on a pseries 'x' just as it
    ## would on model residuals
    ## important difference here: a pseries _can_ have NAs
  
    # input check
    if (!inherits(x, "pseries")) stop("input 'x' needs to be of class \"pseries\"")
    form <- paste(deparse(substitute(x)))
  
    pos.na <- is.na(x)
    if (any(pos.na)) {
      x <- subset_pseries(x, !pos.na) # TODO: use [.pseries (pseries subsetting) once implemented
      warning("NA values encountered in input and removed")
      if (length(x) == 0L) stop("input is empty after removal of NA values")
    }
  
    ## get indices
    ix <- unclass(attr(x, "index")) # unclass for speed
    tind <- as.numeric(ix[[2L]])
    ind  <- as.numeric(ix[[1L]])

    ## det. number of groups and df
    unind <- unique(ind)
    n <- length(unind)

    return(pcdres(tres = x, n = n, w = w,
                  form = form,
                  test = match.arg(test)))
}

pcdres <- function(tres, n, w, form, test) {
  # 'form' is a character describing the formula (not a formula object!)
  # and goes into htest_object$data.name

  ## Take model residuals as pseries, and calc. test
  ## (from here on, what's needed for rho_ij is ok)
    
  ## this function is the modulus calculating the test,
  ## to be called from pcdtest.formula,
  ## pcdtest.panelmodel or pcdtest.pseries

  ## now (since v10) tres is the pseries of model residuals
    
    ## calc matrix of all possible pairwise corr.
    ## coeffs. (200x speedup from using cor())
    wideres <- t(preshape(tres, na.rm = FALSE))
    rho <- cor(wideres, use = "pairwise.complete.obs")
    
    ## find length of intersecting pairs
    ## fast method, times down 200x
    ix <- unclass(attr(tres, "index")) # unclass for speed
    ## tabulate which obs in time for each ind are !na
    presence.tab <- collapse::qtable(ix[[2L]], ix[[1L]])
    ## calculate t.ij
    t.ij <- crossprod(presence.tab)
    
  # input check
  if (!is.null(w)) {
    dims.w <- dim(w)
    if(dims.w[1L] != n || dims.w[2L] != n)
      stop(paste0("matrix 'w' describing proximity of individuals has wrong dimensions: ",
           "should be ", n, " x ", n, " (no. of individuals) but is ", dims.w[1L], " x ", dims.w[2L]))
  }
  

  ## begin features for local test ####################
  ## higher orders omitted for now, use wlag() explicitly

  ## if global test, set all elements in w to 1
  if(is.null(w)) {
    w <- matrix(1, ncol = n, nrow = n)
    dep <- ""
  } else { dep <- "local" }

  ## make (binary) selector matrix based on the contiguity matrix w
  ## and extracting elements corresponding to ones in the lower triangle
  ## excluding the diagonal

  ## transform in logicals (0=FALSE, else=TRUE: no need to worry
  ## about row-std. matrices)
  selector.mat <- matrix(as.logical(w), ncol = n)
  
  ## some sanity checks for 'w' (not perfect sanity, but helps)
  if (sum(selector.mat[lower.tri(selector.mat, diag = FALSE)]) == 0) {
    stop(paste0("no neighbouring individuals defined in proximity matrix 'w'; ",
                "only lower triangular part of 'w' (w/o diagonal) is evaluated"))
  } else {
    if (sum(selector.mat[upper.tri(selector.mat, diag = FALSE)]) != 0) {
      if (!isSymmetric((unname(selector.mat)))) { # unname needed to ignore rownames and colnames
        stop(paste0("proximity matrix 'w' is ambiguous: upper and lower triangular part ",
                    "define different neighbours (it is sufficient to provide information ",
                    "about neighbours only in the lower triangluar part of 'w'"))
      }
    }
  }
  
  ## if no intersection or only 1 shared period of e_it and e_jt
  ## => exclude from calculation and issue a warning.
  ## In general, length(m.ij) gives the number of shared periods by individuals i, j
  ## Thus, non intersecting pairs are indicated by length(m.ij) == 0 (t.ij[i,j] == 0)
  no.one.intersect <- (t.ij <= 1)
  if (any(no.one.intersect, na.rm = TRUE)) {
    # t.ij is a lower triangular matrix: do not divide by 2 to get the number of non-intersecting pairs!
    number.of.non.one.intersecting.pairs <- sum(no.one.intersect, na.rm = TRUE)
    number.of.total.pairs <- (n*(n-1))/2
    share.on.one.intersect.pairs <- number.of.non.one.intersecting.pairs / number.of.total.pairs * 100
    warning(paste("Some pairs of individuals (",
                  signif(share.on.one.intersect.pairs, digits = 2),
                  " percent) do not have any or just one time period in common and have been omitted from calculation", sep=""))
    selector.mat[no.one.intersect] <- FALSE
  }

  ## set upper tri and diagonal to FALSE
  selector.mat[upper.tri(selector.mat, diag = TRUE)] <- FALSE

  ## number of elements in selector.mat
  ## elem.num = 2*(N*(N-1)) in Pesaran (2004), formulae (6), (7), (31), ...
  elem.num <- sum(selector.mat)

  ## end features for local test ######################

  ## Breusch-Pagan or Pesaran statistic for cross-sectional dependence,
  ## robust vs. unbalanced panels:

  switch(test,
   lm = {
    CDstat        <- sum((t.ij*rho^2)[selector.mat])
    pCD           <- pchisq(CDstat, df = elem.num, lower.tail = FALSE)
    names(CDstat) <- "chisq"
    parm          <- elem.num
    names(parm)   <- "df"
    testname      <- "Breusch-Pagan LM test"
   },
   sclm = {
    CDstat        <- sqrt(1/(2*elem.num))*sum((t.ij*rho^2-1)[selector.mat])
    pCD           <- 2*pnorm(abs(CDstat), lower.tail = FALSE)
    names(CDstat) <- "z"
    parm          <- NULL
    testname      <- "Scaled LM test"
   },
   bcsclm = {
     # Baltagi/Feng/Kao (2012), formula (11)
     # (unbalanced case as sclm + bias correction as EViews: max(T_ij) instead of T)
      CDstat        <- sqrt(1/(2*elem.num))*sum((t.ij*rho^2-1)[selector.mat]) - (n/(2*(max(t.ij)-1)))
      pCD           <- 2*pnorm(abs(CDstat), lower.tail = FALSE)
      names(CDstat) <- "z"
      parm          <- NULL
      testname      <- "Bias-corrected Scaled LM test"
   },
   cd = {
     # (Pesaran (2004), formula (31))
    CDstat        <- sqrt(1/elem.num)*sum((sqrt(t.ij)*rho)[selector.mat]) 
    pCD           <- 2*pnorm(abs(CDstat), lower.tail = FALSE)
    names(CDstat) <- "z"
    parm          <- NULL
    testname      <- "Pesaran CD test"
   },
   rho = {
    CDstat        <- sum(rho[selector.mat])/elem.num
    pCD           <- NULL
    names(CDstat) <- "rho"
    parm          <- NULL
    testname      <- "Average correlation coefficient"
   },
   absrho = {
    CDstat        <- sum(abs(rho)[selector.mat])/elem.num
    pCD           <- NULL
    names(CDstat) <- "|rho|"
    parm          <- NULL
    testname      <- "Average absolute correlation coefficient"
   })

  ##(insert usual htest features)
  RVAL <- list(statistic = CDstat,
               parameter = parm,
               method    = paste(testname, "for", dep,
                            "cross-sectional dependence in panels"),
               alternative = "cross-sectional dependence",
               p.value     = pCD,
               data.name   = form)
  class(RVAL) <- "htest"
  return(RVAL)
}

preshape <- function(x, na.rm = TRUE, ...) {
    ## reshapes pseries,
    ## e.g., of residuals from a panelmodel,
    ## in wide form
    inames <- names(attr(x, "index"))
    mres <- reshape(cbind(as.vector(x),
                          attr(x, "index")),
                    direction = "wide",
                    timevar   = inames[2L],
                    idvar     = inames[1L])
    ## drop ind in first column
    mres <- mres[ , -1L, drop = FALSE]
    ## reorder columns (may be scrambled depending on first
    ## available obs in unbalanced panels)
    mres <- mres[ , order(dimnames(mres)[[2L]])]
    ## if requested, drop columns (time periods) with NAs
    if(na.rm) {
        na.cols <- vapply(mres, FUN = anyNA, FUN.VALUE = TRUE, USE.NAMES = FALSE)
        if(sum(na.cols) > 0L) mres <- mres[ , !na.cols, drop = FALSE]
    }
    return(mres)
}




#' Cross--sectional correlation matrix
#' 
#' Computes the cross--sectional correlation matrix
#' 
#' 
#' @param x an object of class `pseries`
#' @param grouping grouping variable,
#' @param groupnames a character vector of group names,
#' @param value to complete,
#' @param \dots further arguments.
#' @return A matrix with average correlation coefficients within a group
#' (diagonal) and between groups (off-diagonal).
#' @export
#' @keywords htest
#' @examples
#' 
#' data("Grunfeld", package = "plm")
#' pGrunfeld <- pdata.frame(Grunfeld)
#' grp <- c(rep(1, 100), rep(2, 50), rep(3, 50)) # make 3 groups
#' cortab(pGrunfeld$value, grouping = grp, groupnames = c("A", "B", "C"))
#' 
cortab <- function(x, grouping, groupnames = NULL,
                   value = "statistic", ...) {
    ## makes matrix containing within (diagonal) and between (off-diagonal)
    ## correlation
    ## needs a pseries and a groupings vector of **same length**

    ## would use a better naming, and also passing a char or factor as
    ## grouping index

    ## x must be a pseries
    if(!inherits(x, "pseries")) stop("First argument must be a pseries")
    if(length(x) != length(grouping)) stop("arguments 'x' and 'grouping' must have same length")

    fullind <- as.numeric(attr(x, "index")[ , 1L])
    ids <- unique(fullind)
    n <- length(ids)
    regs <- seq_along(unique(grouping))

    if(!(is.numeric(grouping))) grouping <- as.numeric(as.factor(grouping))
    
    idnames <- as.character(ids)
    if(is.null(groupnames)) {
        groupnames <- as.character(unique(grouping))
    }

    ## make matrices of between-regions correlations
    ## (includes within correlation on diagonal)
    ## for each pair of regions (nb: no duplicates, e.g., 3.1 but not 1.3)

    ## make w<1.n>:
    for(h in seq_along(regs)) {
      for(k in seq_len(h)) {
        statew <- matrix(0, ncol = n, nrow = n)
        ## make statew for cor. between h and k
        for(i in seq_len(n)) {
          ## get first region (all values equal, so take first one)
          ireg <- grouping[fullind == ids[i]][1L] # TODO: can be made faster via split()-approach
          if(ireg == h) {
            for(j in seq_len(n)) {
                jreg <- grouping[fullind == ids[j]][1L] # TODO: can be made faster via split()-approach
                if(jreg == k) statew[i, j] <- 1
            }
          }
        }
        if(h!=k) statew <- statew + t(statew)
        ## just for debugging reasons:
        dimnames(statew) <- list(idnames, idnames)
        ## eliminate self.correlation of states if i=j
        diag(statew) <- 0
        ## not needed: pcdtest seems to do this by construction
        eval(parse(text=paste("w", h, ".", k, " <- statew", sep="")))
      }
     }

     ## notice: without the line
     ## '' if(i!=j) statew <- statew + t(statew) ''
     ## all wn.n matrices would have values only on one half (upper
     ## or lower triangle)

     ## make generic table of regions' within and between correlation
     ## argument: a pseries
    #YC regnames is undefined, so is myw
    tab.g <- function(x, regs, regnames, test="rho", value) {
        myw <- 0
         tabg <- matrix(NA, ncol=length(regs), nrow=length(regs))
         for(i in seq_along(regs)) {
             for(j in seq_len(i)) {
                 ## take appropriate w matrix
                 eval(parse(text = paste("myw<-w", i, ".", j, sep = "")))
                 tabg[i, j] <- pcdtest(x, test = "rho", w = myw)[[value]]
             }
         }
         dimnames(tabg) <- list(groupnames, groupnames)
         return(tabg)
    }
    regnames <- ""
    mytab <- tab.g(x, regs = regs, regnames = regnames, test = "rho", value = value)
    return(mytab)
}