forked from bcaffo/Caffo-Coursera
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlecture5.tex
371 lines (335 loc) · 11.8 KB
/
lecture5.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
\documentclass[aspectratio=169]{beamer}
\mode<presentation>
% \usetheme{Warsaw}
% \usetheme{Goettingen}
\usetheme{Hannover}
% \useoutertheme{default}
% \useoutertheme{infolines}
\useoutertheme{sidebar}
\usecolortheme{dolphin}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{enumerate}
% some bold math symbosl
\newcommand{\Cov}{\mathrm{Cov}}
\newcommand{\Cor}{\mathrm{Cor}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\brho}{\boldsymbol{\rho}}
\newcommand{\bSigma}{\boldsymbol{\Sigma}}
\newcommand{\btheta}{\boldsymbol{\theta}}
\newcommand{\bbeta}{\boldsymbol{\beta}}
\newcommand{\bmu}{\boldsymbol{\mu}}
\newcommand{\bW}{\mathbf{W}}
\newcommand{\one}{\mathbf{1}}
\newcommand{\bH}{\mathbf{H}}
\newcommand{\by}{\mathbf{y}}
\newcommand{\bolde}{\mathbf{e}}
\newcommand{\bx}{\mathbf{x}}
\newcommand{\cpp}[1]{\texttt{#1}}
\title{Mathematical Biostatistics Boot Camp: Lecture 5, Conditional Probability}
\author{Brian Caffo}
\date{\today}
\institute[Department of Biostatistics]{
Department of Biostatistics \\
Johns Hopkins Bloomberg School of Public Health\\
Johns Hopkins University
}
\begin{document}
\frame{\titlepage}
\section{Table of contents}
\frame{
\frametitle{Table of contents}
\tableofcontents
}
\section{Conditional probability}
\begin{frame}\frametitle{Conditional probability, motivation}
\begin{itemize}
\item The probability of getting a one when rolling a (standard) die
is usually assumed to be one sixth
\item Suppose you were given the extra information that the die roll
was an odd number (hence 1, 3 or 5)
\item {\em conditional on this new information}, the probability of a
one is now one third
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Conditional probability, definition}
\begin{itemize}
\item Let $B$ be an event so that $P(B) > 0$
\item Then the conditional probability of an event $A$ given that $B$
has occurred is
$$
P(A ~|~ B) = \frac{P(A \cap B)}{P(B)}
$$
\item Notice that if $A$ and $B$ are independent, then
$$
P(A ~|~ B) = \frac{P(A) P(B)}{P(B)} = P(A)
$$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Example}
\begin{itemize}
\item Consider our die roll example
\item $B = \{1, 3, 5\}$
\item $A = \{1\}$
\begin{eqnarray*}
P(\mbox{one given that roll is odd}) & = & P(A ~|~ B) \\ \\
& = & \frac{P(A \cap B)}{P(B)} \\ \\
& = & \frac{P(A)}{P(B)} \\ \\
& = & \frac{1/6}{3/6} = \frac{1}{3}
\end{eqnarray*}
\end{itemize}
\end{frame}
\section{Conditional densities}
\begin{frame}\frametitle{Conditional densities and mass functions}
\begin{itemize}
\item Conditional densities or mass functions of one variable conditional on
the value of another
\item Let $f(x,y)$ be a bivariate density or mass function for random variables
$X$ and $Y$
\item Let $f(x)$ and $f(y)$ be the associated marginal mass function
or densities disregarding the other variables
$$
f(y) = \int f(x, y)dx ~~~~\mbox{or}~~~~ f(y) = \sum_x f(x, y) dx.
$$
\item Then the {\bf conditional} density or mass function {\em given that $Y = y$} is given by
$$
f(x ~|~ y) = f(x, y) / f(y)
$$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Notes}
\begin{itemize}
\item It is easy to see that, in the discrete case, the definition of
conditional probability is exactly as in the definition for
conditional events where $A = $ the event that $X = x$ and $B = $
the event that $Y = y$
\item The continuous definition is a little harder to motivate, since
the events $X = x$ and $Y = y$ each have probability 0
\item However, a useful motivation can be performed by taking the
appropriate limits as follows
\item Define $A = \{X \leq x\}$ while $B = \{Y \in [y, y + \epsilon]\}$
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Continued}
\begin{eqnarray*}
P(X \leq x ~|~ Y \in [y, y + \epsilon]) & = & P(A ~|~ B) = \frac{P(A \cap B)}{P(B)} \\ \\ \\
& = & \frac{P(X \leq x, Y \in [y, y + \epsilon])}{P(Y \in [y, y + \epsilon])} \\ \\ \\
& = & \frac{\int_{y}^{y+\epsilon}\int_{-\infty}^{x}f(x,y)dxdy}
{\int_{y}^{y+\epsilon} f(y) dy} \\ \\ \\
& = & \frac{\epsilon\int_{y}^{y+\epsilon}\int_{-\infty}^{x}f(x,y)dxdy}
{\epsilon\int_{y}^{y+\epsilon} f(y) dy}
\end{eqnarray*}
\end{frame}
\begin{frame}
\frametitle{Continued}
\begin{eqnarray*}
& = & \frac{\frac{\int_{-\infty}^{y+\epsilon}\int_{\infty}^{x}f(x,y)dxdy -
\int_{-\infty}^{y}\int_{-\infty}^{x}f(x,y)dxdy}{\epsilon}}
{\frac{\int_{-\infty}^{y+\epsilon} f(y) dy - \int_{-\infty}^{y} f(y) dy}{\epsilon}}\\ \\ \\
& = & \frac{\frac{g_1(y + \epsilon) - g_1(y)}{\epsilon}}{\frac{g_2(y + \epsilon) - g_2(y)}{\epsilon}}
\end{eqnarray*}
where
$$
g_1(y) = \int_{-\infty}^{y}\int_{-\infty}^{x}f(x,y)dxdy ~~\mbox{and}~~
g_2(y) = \int_{-\infty}^{y} f(y) dy.
$$
\end{frame}
\begin{frame}
\begin{itemize}
\item Notice that the limit of the numerator and denominator tends to
$g_1'$ and $g_2'$ as $\epsilon$ gets smaller and smaller
\item Hence we have that the conditional distribution function is
$$
P(X \leq x ~|~ Y = y) = \frac{\int_{-\infty}^x f(x, y)dx}{f(y)}.
$$
\item Now, taking the derivative with respect to $x$ yields the
conditional density
$$
f(x ~|~ y) = \frac{f(x, y)}{f(y)}
$$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Geometrically}
\begin{itemize}
\item Geometrically, the conditional density is obtained by taking the
relevant slice of the joint density and appropriately renormalizing it
\item This idea extends to any other line, or even non-linear functions
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Example}
\begin{itemize}
\item Let $f(x, y) = ye^{-xy - y}$ for $0 \leq x$ and $0 \leq y$
\item Then note
$$
f(y) = \int_{0}^\infty f(x, y)dx = e^{-y}\int_{0}^\infty ye^{-xy}dx = e^{-y}
$$
\item Therefore
$$
f(x~|~ y) = f(x, y) / f(y) = \frac{ ye^{-xy - y}}{e^{-y}} = ye^{-xy}
$$
\end{itemize}
\end{frame}
%\begin{frame}\frametitle{Example}
%\begin{itemize}
%\item Let $f(x, y) = 1 / \pi r^2$ for $x^2 + y^2 \leq r^2$
%\item $X$ and $Y$ are uniform on a circle with radius $r$
%\item What is the conditional density of $X$ given that $Y=0$?
%\item Probably easiest to think geometrically
%$$
%f(x ~|~ y = 0) \propto 1 ~~\mbox{for}~~ -r^2 \leq x \leq r^2
%$$
%\item Therefore
%$$
%f(x ~|~ y = 0) = \frac{1}{2r^2}~~\mbox{for}~~ -r^2 \leq x \leq r^2
%$$
%\end{itemize}
%\end{frame}
\section{Bayes' Rule}
\begin{frame}\frametitle{Bayes' rule}
\begin{itemize}
\item Let $f(x ~|~ y)$ be the conditional density or mass function for $X$ given
that $Y = y$
\item Let $f(y)$ be the marginal distribution for $y$
\item Then if $y$ is continuous
$$
f(y ~|~ x) = \frac{f(x ~|~ y) f(y)}{\int f(x ~|~ t) f(t) dt}
$$
\item If $y$ is discrete
$$
f(y ~|~ x) = \frac{f(x ~|~ y) f(y)}{\sum_t f(x ~|~ t) f(t)}
$$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Notes}
\begin{itemize}
\item Bayes' rule relates the conditional density of $f(y ~|~ x)$ to
the $f(x ~|~ y)$ and $f(y)$
\item A special case of this kind relationship is for two sets $A$ and $B$,
which yields that
$$
P(B ~|~ A) = \frac{P(A ~|~ B) P(B)}{P(A ~|~ B) P(B) + P(A ~|~ B^c)P(B^c)}.
$$
Proof:
\begin{itemize}
\item Let $X$ be an indicator that event $A$ has occurred
\item Let $Y$ be an indicator that event $B$ has occurred
\item Plug into the discrete version of Bayes' rule
\end{itemize}
\end{itemize}
\end{frame}
\section{Diagnostic tests}
\begin{frame}\frametitle{Example: diagnostic tests}
\begin{itemize}
\item Let $+$ and $-$ be the events that the result of a diagnostic test
is positive or negative respectively
\item Let $D$ and $D^c$ be the event
that the subject of the test has or does not have the disease
respectively
\item The {\bf sensitivity} is the probability that the test is
positive given that the subject actually has the disease, $P(+ ~|~
D)$
\item The {\bf specificity} is the probability that the test is
negative given that the subject does not have the disease, $P(- ~|~ D^c)$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{More definitions}
\begin{itemize}
\item The {\bf positive predictive value} is the probability that the subject has the
disease given that the test is positive, $P(D ~|~ +)$
\item The {\bf negative predictive value} is the probability that the subject does not have
the disease given that the test is negative, $P(D^c ~|~ -)$
\item The {\bf prevalence of the disease} is the marginal probability of disease, $P(D)$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{More definitions}
\begin{itemize}
\item The {\bf diagnostic likelihood ratio of a positive test}, labeled $DLR_+$, is
$P(+ ~|~ D) / P(+ ~|~ D^c)$, which is the $$sensitivity / (1 - specificity)$$
\item The {\bf diagnostic likelihood ratio of a negative test},
labeled $DLR_-$, is $P(- ~|~ D) / P(- ~|~ D^c)$, which is the
$$(1 - sensitivity) / specificity$$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Example}
\begin{itemize}
\item A study comparing the efficacy of HIV tests, reports on an
experiment which concluded that HIV antibody tests have a
sensitivity of 99.7\% and a specificity of 98.5\%
\item Suppose that a subject, from a population with a .1\% prevalence
of HIV, receives a positive test result. What is the probability
that this subject has HIV?
\item Mathematically, we want $P(D ~|~ +)$ given the sensitivity, $P(+
~|~ D) = .997$, the specificity, $P(- ~|~ D^c) =.985$, and the
prevalence $P(D) = .001$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Using Bayes' formula}
\begin{eqnarray*}
P(D ~|~ +) & = &\frac{P(+~|~D)P(D)}{P(+~|~D)P(D) + P(+~|~D^c)P(D^c)}\\ \\
& = & \frac{P(+~|~D)P(D)}{P(+~|~D)P(D) + \{1-P(-~|~D^c)\}\{1 - P(D)\}} \\ \\
& = & \frac{.997\times .001}{.997 \times .001 + .015 \times .999}\\ \\
& = & .062
\end{eqnarray*}
\begin{itemize}
\item In this population a positive test result only suggests a 6\% probability that
the subject has the disease
\item (The positive predictive value is 6\% for this test)
\end{itemize}
\end{frame}
\begin{frame}\frametitle{More on this example}
\begin{itemize}
\item The low positive predictive value is due to low prevalence of disease and the
somewhat modest specificity
\item Suppose it was known that the subject was an intravenous drug
user and routinely had intercourse with an HIV infected partner
\item Notice that the evidence implied by a positive test result does not change
because of the prevalence of disease in the subject's population, only our
interpretation of that evidence changes
\end{itemize}
\end{frame}
\section{DLRs}
\begin{frame}\frametitle{Likelihood ratios}
\begin{itemize}
\item Using Bayes rule, we have
$$
P(D ~|~ +) = \frac{P(+~|~D)P(D)}{P(+~|~D)P(D) + P(+~|~D^c)P(D^c)}
$$
and
$$
P(D^c ~|~ +) = \frac{P(+~|~D^c)P(D^c)}{P(+~|~D)P(D) + P(+~|~D^c)P(D^c)}.
$$
\item Therefore
$$
\frac{P(D ~|~ +)}{P(D^c ~|~ +)} = \frac{P(+~|~D)}{P(+~|~D^c)}\times \frac{P(D)}{P(D^c)}
$$
ie
$$
\mbox{post-test odds of }D = DLR_+\times\mbox{pre-test odds of }D
$$
\item Similarly, $DLR_-$ relates the decrease in the odds of the
disease after a negative test result to the odds of disease prior to
the test.
\end{itemize}
\end{frame}
\begin{frame}\frametitle{HIV example revisited}
\begin{itemize}
\item Suppose a subject has a positive HIV test
\item $DLR_+ = .997 / (1 - .985) \approx 66$
\item The result of the positive test is that the odds of disease is
now 66 times the pretest odds
\item Or, equivalently, the hypothesis of disease is 66 times more
supported by the data than the hypothesis of no disease
\end{itemize}
\end{frame}
\begin{frame}\frametitle{HIV example revisited}
\begin{itemize}
\item Suppose that a subject has a negative test result
\item $DLR_- = (1 - .997) / .985 \approx .003$
\item Therefore, the post-test odds of disease is now $.3\%$ of the pretest
odds given the negative test.
\item Or, the hypothesis of disease is supported $.003$ times that of
the hypothesis of absence of disease given the negative test result
\end{itemize}
\end{frame}
\end{document}