+ - 0:00:00
Notes for current slide
Notes for next slide



Randomization Tests

Dr. Mine Dogucu

1 / 21
library(openintro)
library(tidyverse)
library(janitor)
glimpse(gender_discrimination)
## Rows: 48
## Columns: 2
## $ gender <fct> male, male, male, male, male, male, male, male, male, male, m…
## $ decision <fct> promoted, promoted, promoted, promoted, promoted, promoted, p…

Example from the OpenIntro Introductory Statistics with Randomization and Simulation Book

2 / 21
gender_discrimination %>%
tabyl(gender, decision) %>%
adorn_totals("row")
## gender promoted not promoted
## male 21 3
## female 14 10
## Total 35 13
3 / 21

Hypotheses

H0:πmπf0
HA:πmπf>0

4 / 21

Sample Statistic

summary_table <- gender_discrimination %>%
tabyl(gender, decision) %>%
adorn_totals("row")
5 / 21

Sample Statistic

summary_table
## gender promoted not promoted
## male 21 3
## female 14 10
## Total 35 13
p_m <- summary_table [1, 2] / 24
p_f <- summary_table [2, 2] / 24
p_m - p_f
## [1] 0.2916667
6 / 21
observed_diff_p <- p_m - p_f
7 / 21

Can this observed difference in promotion rates (0.2916667) be due to chance rather than gender discrimination?

8 / 21

Steps

1.Shuffle the 48 personnel files.
2.Deal the 48 files into to two stacks. Stack 1 will have 35 files that represent the promoted files. Stack 2 will have 13 files that are not promoted.
3.Calculate the differences in promotion rates of males and females.
4.Repeat this process multiple times.

9 / 21
gender_discrimination
## # A tibble: 48 x 2
## gender decision
## <fct> <fct>
## 1 male promoted
## 2 male promoted
## 3 male promoted
## 4 male promoted
## 5 male promoted
## 6 male promoted
## 7 male promoted
## 8 male promoted
## 9 male promoted
## 10 male promoted
## # … with 38 more rows
10 / 21
set.seed(12345)
gender_discrimination$simulated_decision <-
sample(gender_discrimination$decision)
11 / 21
gender_discrimination
## # A tibble: 48 x 3
## gender decision simulated_decision
## <fct> <fct> <fct>
## 1 male promoted promoted
## 2 male promoted promoted
## 3 male promoted promoted
## 4 male promoted promoted
## 5 male promoted not promoted
## 6 male promoted promoted
## 7 male promoted promoted
## 8 male promoted promoted
## 9 male promoted not promoted
## 10 male promoted promoted
## # … with 38 more rows
12 / 21
gender_discrimination %>%
tabyl(gender, simulated_decision) %>%
adorn_totals("row")
## gender promoted not promoted
## male 18 6
## female 17 7
## Total 35 13
13 / 21
summary_table <- gender_discrimination %>%
tabyl(gender, simulated_decision) %>%
adorn_totals("row")
p_m <- summary_table [1, 2] / 24
p_f <- summary_table [2, 2] / 24
p_m - p_f
## [1] 0.04166667
14 / 21
set.seed(12345)
for (i in 1:2){
simulated_decision <-
sample(gender_discrimination$decision)
print(simulated_decision)
rep = rep(i, 48)
print(rep)
}
## [1] promoted promoted promoted promoted not promoted
## [6] promoted promoted promoted not promoted promoted
## [11] not promoted not promoted promoted promoted promoted
## [16] not promoted promoted promoted promoted promoted
## [21] promoted promoted promoted not promoted promoted
## [26] promoted not promoted promoted promoted promoted
## [31] not promoted promoted promoted not promoted promoted
## [36] promoted promoted not promoted promoted promoted
## [41] not promoted not promoted promoted not promoted promoted
## [46] promoted promoted promoted
## Levels: promoted not promoted
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1
## [1] promoted promoted not promoted promoted promoted
## [6] promoted promoted not promoted promoted promoted
## [11] promoted promoted not promoted not promoted not promoted
## [16] promoted not promoted promoted promoted promoted
## [21] promoted promoted promoted not promoted not promoted
## [26] promoted promoted promoted promoted not promoted
## [31] promoted not promoted promoted promoted not promoted
## [36] promoted promoted promoted not promoted promoted
## [41] not promoted promoted promoted promoted promoted
## [46] promoted promoted promoted
## Levels: promoted not promoted
## [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [39] 2 2 2 2 2 2 2 2 2 2
15 / 21
set.seed(12345)
for (i in 1:2){
simulated_decision <-
sample(gender_discrimination$decision)
print(simulated_decision)
rep = rep(i, 48)
print(rep)
}
## [1] promoted promoted promoted promoted not promoted
## [6] promoted promoted promoted not promoted promoted
## [11] not promoted not promoted promoted promoted promoted
## [16] not promoted promoted promoted promoted promoted
## [21] promoted promoted promoted not promoted promoted
## [26] promoted not promoted promoted promoted promoted
## [31] not promoted promoted promoted not promoted promoted
## [36] promoted promoted not promoted promoted promoted
## [41] not promoted not promoted promoted not promoted promoted
## [46] promoted promoted promoted
## Levels: promoted not promoted
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1
## [1] promoted promoted not promoted promoted promoted
## [6] promoted promoted not promoted promoted promoted
## [11] promoted promoted not promoted not promoted not promoted
## [16] promoted not promoted promoted promoted promoted
## [21] promoted promoted promoted not promoted not promoted
## [26] promoted promoted promoted promoted not promoted
## [31] promoted not promoted promoted promoted not promoted
## [36] promoted promoted promoted not promoted promoted
## [41] not promoted promoted promoted promoted promoted
## [46] promoted promoted promoted
## Levels: promoted not promoted
## [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [39] 2 2 2 2 2 2 2 2 2 2
16 / 21
set.seed(12345)
prop_diff <- c()
for (i in 1:100){
gender_discrimination$simulated_decision <-
sample(gender_discrimination$decision)
summary_table <- gender_discrimination %>%
tabyl(gender, simulated_decision) %>%
adorn_totals("row")
p_m <- summary_table [1, 2] / 24
p_f <- summary_table [2, 2] / 24
prop_diff_calc <- p_m - p_f
prop_diff <- c(prop_diff, prop_diff_calc)
}
17 / 21
prop_diff
## [1] 0.04166667 -0.04166667 0.20833333 0.04166667 -0.12500000 -0.04166667
## [7] -0.12500000 0.12500000 0.12500000 0.04166667 0.04166667 0.04166667
## [13] -0.04166667 0.20833333 0.12500000 -0.12500000 0.04166667 0.12500000
## [19] 0.12500000 0.12500000 0.04166667 -0.04166667 0.04166667 0.12500000
## [25] -0.12500000 -0.12500000 0.12500000 0.12500000 -0.12500000 -0.04166667
## [31] -0.04166667 0.04166667 0.04166667 0.12500000 0.12500000 -0.04166667
## [37] 0.12500000 0.29166667 0.04166667 -0.12500000 0.04166667 -0.04166667
## [43] -0.12500000 0.20833333 0.29166667 0.04166667 -0.20833333 0.12500000
## [49] 0.12500000 0.04166667 -0.04166667 -0.04166667 -0.04166667 0.04166667
## [55] -0.20833333 -0.04166667 0.12500000 -0.04166667 -0.04166667 -0.12500000
## [61] -0.12500000 0.20833333 -0.20833333 0.04166667 -0.12500000 0.04166667
## [67] -0.12500000 0.12500000 -0.04166667 0.04166667 -0.04166667 0.04166667
## [73] 0.20833333 0.04166667 -0.29166667 -0.12500000 0.04166667 0.04166667
## [79] 0.04166667 0.29166667 -0.04166667 -0.04166667 0.12500000 -0.04166667
## [85] -0.04166667 -0.04166667 0.04166667 -0.12500000 0.04166667 -0.04166667
## [91] -0.12500000 -0.12500000 -0.04166667 -0.04166667 0.12500000 -0.04166667
## [97] 0.20833333 -0.12500000 -0.29166667 -0.20833333
18 / 21
hist(prop_diff)

19 / 21

p-value

which(prop_diff >= observed_diff_p)
## [1] 38 45 80
20 / 21

p-value

which(prop_diff >= observed_diff_p)
## [1] 38 45 80
length(which(prop_diff >= observed_diff_p)) / 100
## [1] 0.03
21 / 21
library(openintro)
library(tidyverse)
library(janitor)
glimpse(gender_discrimination)
## Rows: 48
## Columns: 2
## $ gender <fct> male, male, male, male, male, male, male, male, male, male, m…
## $ decision <fct> promoted, promoted, promoted, promoted, promoted, promoted, p…

Example from the OpenIntro Introductory Statistics with Randomization and Simulation Book

2 / 21
Paused

Help

Keyboard shortcuts

, , Pg Up, k Go to previous slide
, , Pg Dn, Space, j Go to next slide
Home Go to first slide
End Go to last slide
Number + Return Go to specific slide
b / m / f Toggle blackout / mirrored / fullscreen mode
c Clone slideshow
p Toggle presenter mode
t Restart the presentation timer
?, h Toggle this help
Esc Back to slideshow