File: spss.R

package info (click to toggle)
foreign 0.8.91-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,712 kB
  • sloc: ansic: 7,572; asm: 4; makefile: 2
file content (110 lines) | stat: -rw-r--r-- 4,013 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
library(foreign)

sample100 <- read.spss("sample100.sav",FALSE)
summary(sample100)
str(sample100)
d.sample100 <- data.frame(sample100,stringsAsFactors=TRUE)
summary(d.sample100)
s100 <- sample100
sample100 <- read.spss("sample100.por",FALSE)
stopifnot(identical(s100, sample100)) # no need for further summary() etc

pbc <- read.spss("pbc.sav",FALSE)
summary(pbc)
str(pbc)
d.pbc <- data.frame(pbc)
summary(d.pbc)
pbco <- read.spss("pbcold.sav",FALSE)
stopifnot(identical(pbc, pbco))
## summary(pbco)
## str(pbco)
## d.pbco <- data.frame(pbco)
## summary(d.pbco)
pbc. <- read.spss("pbc.por",FALSE)
summary(pbc.)
str(pbc.) # has variable.labels
stopifnot(all.equal(d.pbc, data.frame(pbc.), tolerance = 1e-15))

electric.s  <- read.spss(system.file("files", "electric.sav", package = "foreign"), TRUE, TRUE)
electric.p  <- read.spss("electric.por",TRUE,TRUE)
electric.s4 <- read.spss(system.file("files", "electric.sav", package = "foreign"), TRUE, TRUE, max.value.labels = 4)
smmry <- summary(electric.s)
if (getRversion() < "4.6.0") # "backport" NA's -> NAs to match reference output
    smmry <- sub("NA's", "NAs ", smmry, fixed = TRUE)
smmry
ii <- c(2,10)
vl <- list(FIRSTCHD = c("OTHER   CHD"= 6, "FATAL   MI"= 5, "NONFATALMI"= 3,
            "SUDDEN  DEATH" = 2, "NO CHD" = 1),
       DAYOFWK = c(SATURDAY=7, FRIDAY=6, THURSDAY=5,
               WEDNSDAY=4, TUESDAY=3, MONDAY=2, SUNDAY=1))
stopifnot(identical(electric.s,      electric.p),
      identical(electric.s[-ii], electric.s4[-ii]),
      identical(vl, lapply(electric.s4[ii], attr, "value.labels")),
      identical(lapply(vl, names),
            lapply(electric.s[ii], function(.) rev(levels(.)))))


## after "long label patch":
## (from <https://stat.ethz.ch/pipermail/r-devel/2008-July/050165.html>)

##invisible(Sys.setlocale (locale="C")) ## to resolve locale problem  # ??
ldat <- read.spss("spss_long.sav", to.data.frame=TRUE)
ldat
nnms <- nms <- names(ldat)
names(nnms) <- nms
stopifnot(identical(nms,  c("variable1", "variable2")),
      identical(nnms, attr(ldat, "variable.labels")))


## some new arkward testcases for problems found in foreign <= 0.8-68 and duplicated value labels in general:

## Expect lots of warnings as value labels (corresponding to R factor labels) are uncomplete, 
## and an unsupported long string variable is present in the data

setwd(system.file("files", package = "foreign"))
sav <- "testdata.sav"

x.nodat <- read.spss(file=sav, to.data.frame = FALSE, reencode="UTF-8")
str(x.nodat)

x.sort <- read.spss(file=sav, to.data.frame = TRUE, reencode="UTF-8",
                    stringsAsFactors=TRUE)
str(x.sort)
x.append <- read.spss(file=sav, to.data.frame = TRUE, 
    add.undeclared.levels = "append", reencode="UTF-8")
x.no <- read.spss(file=sav, to.data.frame = TRUE, 
    add.undeclared.levels = "no", reencode="UTF-8")

levels(x.sort$factor_n_undeclared)
levels(x.append$factor_n_undeclared)
str(x.no$factor_n_undeclared)


### Examples for duplicated.value.labels:
## duplicated.value.labels = "append" (default)
x.append <- read.spss(file=sav, to.data.frame=TRUE, reencode="UTF-8")
## duplicated.value.labels = "condense"
x.condense <- read.spss(file=sav, to.data.frame=TRUE, 
    duplicated.value.labels = "condense", reencode="UTF-8")

levels(x.append$factor_n_duplicated)
levels(x.condense$factor_n_duplicated)

as.numeric(x.append$factor_n_duplicated)
as.numeric(x.condense$factor_n_duplicated)

### ToDo:    
## Long Strings (>255 chars) are imported in consecutive separate variables 
## (see warning about subtype 14)
## we should get that right in the import function in future versions
x <- read.spss(file=sav, to.data.frame=TRUE, stringsAsFactors=FALSE, reencode="UTF-8")

cat.long.string <- function(x, w=70) cat(paste(strwrap(x, width=w), "\n"))

## first part: x$string_500:
cat.long.string(x$string_500)
## second part: x$STRIN0:
cat.long.string(x$STRIN0)
## complete long string:
long.string <- apply(x[,c("string_500", "STRIN0")], 1, paste, collapse="")
cat.long.string(long.string)