1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
\documentclass[a4paper]{article}
%\VignetteIndexEntry{UP - unequal probability sampling designs}
%\VignettePackage{sampling}
\newcommand{\sampling}{{\tt sampling}}
\newcommand{\R}{{\tt R}}
\setlength{\parindent}{0in}
\setlength{\parskip}{.1in}
\setlength{\textwidth}{140mm}
\setlength{\oddsidemargin}{10mm}
\title{Unequal probability sampling designs}
\author{}
\usepackage{Sweave}
\usepackage[latin1]{inputenc}
\usepackage{amsmath}
\begin{document}
\maketitle
<<echo=FALSE, results=hide>>=
library(sampling)
ps.options(pointsize=12)
options(width=60)
@
\section{Examples of maximum entropy sampling design and related functions}
a) Example 1
@
Consider the Belgian municipalities data set as population, and a sample size n=50
<<entropy1, results=hide>>=
data(belgianmunicipalities)
attach(belgianmunicipalities)
n=50
@
Compute the inclusion probabilties proportional to the `averageincome' variable
<<entropy2, results=hide>>=
pik=inclusionprobabilities(averageincome,n)
@
Draw a random sample using the maximum entropy sampling design
<<entropy3, results=hide>>=
s=UPmaxentropy(pik)
@
The sample is
<<entropy4, results=hide>>=
as.character(Commune[s==1])
@
Compute the joint inclusion probabilities
<<entropy5, results=hide>>=
pi2=UPmaxentropypi2(pik)
@
Check the result
<<entropy6, results=hide>>=
rowSums(pi2)/pik/n
detach(belgianmunicipalities)
@
b) Example 2
@
Selection of samples from Belgian municipalities data set, sample size 50.
Once the matrix q (see below) is computed, a sample is quickly selected.
Monte Carlo simulation can be used to compare the true inclusion probabilities with the estimated ones.
<<entropy7, results=hide>>=
data(belgianmunicipalities)
attach(belgianmunicipalities)
pik=inclusionprobabilities(averageincome,50)
pik=pik[pik!=1]
n=sum(pik)
pikt=UPMEpiktildefrompik(pik)
w=pikt/(1-pikt)
q=UPMEqfromw(w,n)
@
Draw a sample using the q matrix
<<entropy8, results=hide>>=
UPMEsfromq(q)
@
Monte Carlo simulation to check the sample selection; the difference between pik and the estimated inclusion prob. (object tt below) is almost 0.
<<entropy9, results=hide>>=
sim=10000
N=length(pik)
tt=rep(0,N)
for(i in 1:sim) tt = tt+UPMEsfromq(q)
tt=tt/sim
max(abs(tt-pik))
detach(belgianmunicipalities)
@
\section{Example of unequal probability (UP) sampling designs}
Selection of samples from the Belgian municipalities data set,
with equal or unequal probabilities, and study of the Horvitz-Thompson estimator accuracy using boxplots. The following sampling schemes are used: Poisson, random systematic, random
pivotal, Till\'e, Midzuno, systematic, pivotal, and simple random
sampling without replacement. Monte Carlo simulations are used to study the accuracy of the
Horvitz-Thompson estimator of a population total. The aim of this
example is to demonstrate the effect of using auxiliary information in sampling designs. We use:
\begin{itemize}
\item some $\pi$ps sampling designs with Horvitz-Thompson estimation, using auxiliary information in a sampling desing (size measurements of population units in 2004);
\item simple random sampling without replacement with Horvitz-Thompson
estimation, where no auxiliary information is used.
\end{itemize}
<<up1, results=hide>>=
b=data(belgianmunicipalities)
pik=inclusionprobabilities(belgianmunicipalities$Tot04,200)
N=length(pik)
n=sum(pik)
@
Number of simulations (for an accurate result, increase this value to 10000):
<<up2, results=hide>>=
sim=10
ss=array(0,c(sim,8))
@
Defines the variable of interest:
<<up3, results=hide>>=
y=belgianmunicipalities$TaxableIncome
@
Simulation and computation of the Horvitz-Thompson estimators:
<<up4, results=hide>>=
ht=numeric(8)
for(i in 1:sim)
{
cat("Step ",i,"\n")
s=UPpoisson(pik)
ht[1]=HTestimator(y[s==1],pik[s==1])
s=UPrandomsystematic(pik)
ht[2]=HTestimator(y[s==1],pik[s==1])
s=UPrandompivotal(pik)
ht[3]=HTestimator(y[s==1],pik[s==1])
s=UPtille(pik)
ht[4]=HTestimator(y[s==1],pik[s==1])
s=UPmidzuno(pik)
ht[5]=HTestimator(y[s==1],pik[s==1])
s=UPsystematic(pik)
ht[6]=HTestimator(y[s==1],pik[s==1])
s=UPpivotal(pik)
ht[7]=HTestimator(y[s==1],pik[s==1])
s=srswor(n,N)
ht[8]=HTestimator(y[s==1],rep(n/N,n))
ss[i,]=ht
}
@
Boxplots of the estimators:
<<up5,fig=TRUE,height=7,width=6.5>>=
colnames(ss) <-
c("poisson","rsyst","rpivotal","tille","midzuno","syst","pivotal","srswor")
boxplot(data.frame(ss), las=3)
<<eval=FALSE, echo=FALSE>>=
<<up1>>
<<up2>>
<<up3>>
<<up4>>
<<up5>>
sampling.newpage()
@
\end{document}
|