File: UPexamples.Snw

package info (click to toggle)
r-cran-sampling 2.10-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,336 kB
  • sloc: ansic: 21; makefile: 2
file content (172 lines) | stat: -rw-r--r-- 4,643 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
\documentclass[a4paper]{article}
%\VignetteIndexEntry{UP - unequal probability sampling designs}
%\VignettePackage{sampling}
\newcommand{\sampling}{{\tt sampling}}
\newcommand{\R}{{\tt R}}
\setlength{\parindent}{0in}
\setlength{\parskip}{.1in}
\setlength{\textwidth}{140mm}
\setlength{\oddsidemargin}{10mm}
\title{Unequal probability sampling designs}
\author{}
\usepackage{Sweave} 
\usepackage[latin1]{inputenc}
\usepackage{amsmath}



\begin{document}
\maketitle

<<echo=FALSE, results=hide>>=
library(sampling)
ps.options(pointsize=12)
options(width=60)

@
\section{Examples of maximum entropy sampling design and related functions}

a) Example 1

@
Consider the Belgian municipalities data set as population, and a sample size n=50

<<entropy1, results=hide>>=
data(belgianmunicipalities)
attach(belgianmunicipalities)
n=50
@
Compute the inclusion probabilties proportional to the `averageincome' variable

<<entropy2, results=hide>>=
pik=inclusionprobabilities(averageincome,n)
@
Draw a random sample using the maximum entropy sampling design

<<entropy3, results=hide>>=
s=UPmaxentropy(pik)
@
The sample is

<<entropy4, results=hide>>=
as.character(Commune[s==1])
@
Compute the joint inclusion probabilities

<<entropy5, results=hide>>=
pi2=UPmaxentropypi2(pik)
@
Check the result

<<entropy6, results=hide>>=
rowSums(pi2)/pik/n
detach(belgianmunicipalities)
@
b) Example 2

@
Selection of samples from Belgian municipalities data set, sample size 50.
Once the matrix q (see below) is computed, a sample is quickly selected.
Monte Carlo simulation can be used to compare the true inclusion probabilities with the estimated ones.
 
<<entropy7, results=hide>>=
data(belgianmunicipalities)
attach(belgianmunicipalities)
pik=inclusionprobabilities(averageincome,50)
pik=pik[pik!=1]
n=sum(pik)
pikt=UPMEpiktildefrompik(pik)
w=pikt/(1-pikt)
q=UPMEqfromw(w,n)
@
Draw a sample using the q matrix

<<entropy8, results=hide>>= 
UPMEsfromq(q)
@
Monte Carlo simulation to check the sample selection; the difference between pik and the estimated inclusion prob. (object tt below) is almost 0.

<<entropy9, results=hide>>= 
sim=10000
N=length(pik)
tt=rep(0,N)
for(i in 1:sim) tt = tt+UPMEsfromq(q)
tt=tt/sim
max(abs(tt-pik))
detach(belgianmunicipalities)
@

\section{Example of unequal probability (UP) sampling designs} 

Selection of samples from the Belgian municipalities data set,
with equal or unequal probabilities, and study of the Horvitz-Thompson estimator accuracy using boxplots. The following sampling schemes are used: Poisson, random systematic, random
pivotal, Till\'e, Midzuno, systematic, pivotal, and simple random
sampling without replacement. Monte Carlo simulations are used to study the accuracy of the
Horvitz-Thompson estimator of a population total. The aim of this
example is to demonstrate the effect of using auxiliary information in sampling designs. We use:
\begin{itemize}
\item some $\pi$ps sampling designs with Horvitz-Thompson estimation, using auxiliary information in a sampling desing (size measurements of population units in 2004);
\item simple random sampling without replacement with Horvitz-Thompson
estimation, where no auxiliary information is used.
\end{itemize}

<<up1, results=hide>>=
b=data(belgianmunicipalities)
pik=inclusionprobabilities(belgianmunicipalities$Tot04,200)
N=length(pik)
n=sum(pik)
@
Number of simulations (for an accurate result, increase this value to 10000):

<<up2, results=hide>>= 
sim=10
ss=array(0,c(sim,8))
@
Defines the variable of interest:

<<up3, results=hide>>= 
y=belgianmunicipalities$TaxableIncome
@
Simulation and computation of the Horvitz-Thompson estimators:

<<up4, results=hide>>= 
ht=numeric(8)
for(i in 1:sim)
{
cat("Step ",i,"\n")
s=UPpoisson(pik)
ht[1]=HTestimator(y[s==1],pik[s==1])
s=UPrandomsystematic(pik)
ht[2]=HTestimator(y[s==1],pik[s==1])
s=UPrandompivotal(pik)
ht[3]=HTestimator(y[s==1],pik[s==1])
s=UPtille(pik)
ht[4]=HTestimator(y[s==1],pik[s==1])
s=UPmidzuno(pik)
ht[5]=HTestimator(y[s==1],pik[s==1])
s=UPsystematic(pik)
ht[6]=HTestimator(y[s==1],pik[s==1])
s=UPpivotal(pik)
ht[7]=HTestimator(y[s==1],pik[s==1])
s=srswor(n,N)
ht[8]=HTestimator(y[s==1],rep(n/N,n))
ss[i,]=ht
}
@
Boxplots of the estimators:

<<up5,fig=TRUE,height=7,width=6.5>>=
colnames(ss) <- 
c("poisson","rsyst","rpivotal","tille","midzuno","syst","pivotal","srswor")
boxplot(data.frame(ss), las=3)

<<eval=FALSE, echo=FALSE>>=
<<up1>>
<<up2>>
<<up3>>
<<up4>>
<<up5>>

sampling.newpage()
@
\end{document}