library(openintro)library(tidyverse)library(janitor)glimpse(gender_discrimination)
## Rows: 48## Columns: 2## $ gender <fct> male, male, male, male, male, male, male, male, male, male, m…## $ decision <fct> promoted, promoted, promoted, promoted, promoted, promoted, p…
Example from the OpenIntro Introductory Statistics with Randomization and Simulation Book
gender_discrimination %>% tabyl(gender, decision) %>% adorn_totals("row")
## gender promoted not promoted## male 21 3## female 14 10## Total 35 13
H0:πm−πf≤0
HA:πm−πf>0
summary_table <- gender_discrimination %>% tabyl(gender, decision) %>% adorn_totals("row")
summary_table
## gender promoted not promoted## male 21 3## female 14 10## Total 35 13
p_m <- summary_table [1, 2] / 24p_f <- summary_table [2, 2] / 24p_m - p_f
## [1] 0.2916667
observed_diff_p <- p_m - p_f
Can this observed difference in promotion rates (0.2916667) be due to chance rather than gender discrimination?
1.Shuffle the 48 personnel files.
2.Deal the 48 files into to two stacks. Stack 1 will have 35 files that represent the promoted files. Stack 2 will have 13 files that are not promoted.
3.Calculate the differences in promotion rates of males and females.
4.Repeat this process multiple times.
gender_discrimination
## # A tibble: 48 x 2## gender decision## <fct> <fct> ## 1 male promoted## 2 male promoted## 3 male promoted## 4 male promoted## 5 male promoted## 6 male promoted## 7 male promoted## 8 male promoted## 9 male promoted## 10 male promoted## # … with 38 more rows
set.seed(12345)gender_discrimination$simulated_decision <- sample(gender_discrimination$decision)
gender_discrimination
## # A tibble: 48 x 3## gender decision simulated_decision## <fct> <fct> <fct> ## 1 male promoted promoted ## 2 male promoted promoted ## 3 male promoted promoted ## 4 male promoted promoted ## 5 male promoted not promoted ## 6 male promoted promoted ## 7 male promoted promoted ## 8 male promoted promoted ## 9 male promoted not promoted ## 10 male promoted promoted ## # … with 38 more rows
gender_discrimination %>% tabyl(gender, simulated_decision) %>% adorn_totals("row")
## gender promoted not promoted## male 18 6## female 17 7## Total 35 13
summary_table <- gender_discrimination %>% tabyl(gender, simulated_decision) %>% adorn_totals("row")p_m <- summary_table [1, 2] / 24p_f <- summary_table [2, 2] / 24p_m - p_f
## [1] 0.04166667
set.seed(12345)for (i in 1:2){ simulated_decision <- sample(gender_discrimination$decision) print(simulated_decision) rep = rep(i, 48) print(rep)}
## [1] promoted promoted promoted promoted not promoted## [6] promoted promoted promoted not promoted promoted ## [11] not promoted not promoted promoted promoted promoted ## [16] not promoted promoted promoted promoted promoted ## [21] promoted promoted promoted not promoted promoted ## [26] promoted not promoted promoted promoted promoted ## [31] not promoted promoted promoted not promoted promoted ## [36] promoted promoted not promoted promoted promoted ## [41] not promoted not promoted promoted not promoted promoted ## [46] promoted promoted promoted ## Levels: promoted not promoted## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1## [39] 1 1 1 1 1 1 1 1 1 1## [1] promoted promoted not promoted promoted promoted ## [6] promoted promoted not promoted promoted promoted ## [11] promoted promoted not promoted not promoted not promoted## [16] promoted not promoted promoted promoted promoted ## [21] promoted promoted promoted not promoted not promoted## [26] promoted promoted promoted promoted not promoted## [31] promoted not promoted promoted promoted not promoted## [36] promoted promoted promoted not promoted promoted ## [41] not promoted promoted promoted promoted promoted ## [46] promoted promoted promoted ## Levels: promoted not promoted## [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2## [39] 2 2 2 2 2 2 2 2 2 2
set.seed(12345)for (i in 1:2){ simulated_decision <- sample(gender_discrimination$decision) print(simulated_decision) rep = rep(i, 48) print(rep)}
## [1] promoted promoted promoted promoted not promoted## [6] promoted promoted promoted not promoted promoted ## [11] not promoted not promoted promoted promoted promoted ## [16] not promoted promoted promoted promoted promoted ## [21] promoted promoted promoted not promoted promoted ## [26] promoted not promoted promoted promoted promoted ## [31] not promoted promoted promoted not promoted promoted ## [36] promoted promoted not promoted promoted promoted ## [41] not promoted not promoted promoted not promoted promoted ## [46] promoted promoted promoted ## Levels: promoted not promoted## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1## [39] 1 1 1 1 1 1 1 1 1 1## [1] promoted promoted not promoted promoted promoted ## [6] promoted promoted not promoted promoted promoted ## [11] promoted promoted not promoted not promoted not promoted## [16] promoted not promoted promoted promoted promoted ## [21] promoted promoted promoted not promoted not promoted## [26] promoted promoted promoted promoted not promoted## [31] promoted not promoted promoted promoted not promoted## [36] promoted promoted promoted not promoted promoted ## [41] not promoted promoted promoted promoted promoted ## [46] promoted promoted promoted ## Levels: promoted not promoted## [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2## [39] 2 2 2 2 2 2 2 2 2 2
set.seed(12345)prop_diff <- c()for (i in 1:100){ gender_discrimination$simulated_decision <- sample(gender_discrimination$decision) summary_table <- gender_discrimination %>% tabyl(gender, simulated_decision) %>% adorn_totals("row") p_m <- summary_table [1, 2] / 24 p_f <- summary_table [2, 2] / 24 prop_diff_calc <- p_m - p_f prop_diff <- c(prop_diff, prop_diff_calc)}
prop_diff
## [1] 0.04166667 -0.04166667 0.20833333 0.04166667 -0.12500000 -0.04166667## [7] -0.12500000 0.12500000 0.12500000 0.04166667 0.04166667 0.04166667## [13] -0.04166667 0.20833333 0.12500000 -0.12500000 0.04166667 0.12500000## [19] 0.12500000 0.12500000 0.04166667 -0.04166667 0.04166667 0.12500000## [25] -0.12500000 -0.12500000 0.12500000 0.12500000 -0.12500000 -0.04166667## [31] -0.04166667 0.04166667 0.04166667 0.12500000 0.12500000 -0.04166667## [37] 0.12500000 0.29166667 0.04166667 -0.12500000 0.04166667 -0.04166667## [43] -0.12500000 0.20833333 0.29166667 0.04166667 -0.20833333 0.12500000## [49] 0.12500000 0.04166667 -0.04166667 -0.04166667 -0.04166667 0.04166667## [55] -0.20833333 -0.04166667 0.12500000 -0.04166667 -0.04166667 -0.12500000## [61] -0.12500000 0.20833333 -0.20833333 0.04166667 -0.12500000 0.04166667## [67] -0.12500000 0.12500000 -0.04166667 0.04166667 -0.04166667 0.04166667## [73] 0.20833333 0.04166667 -0.29166667 -0.12500000 0.04166667 0.04166667## [79] 0.04166667 0.29166667 -0.04166667 -0.04166667 0.12500000 -0.04166667## [85] -0.04166667 -0.04166667 0.04166667 -0.12500000 0.04166667 -0.04166667## [91] -0.12500000 -0.12500000 -0.04166667 -0.04166667 0.12500000 -0.04166667## [97] 0.20833333 -0.12500000 -0.29166667 -0.20833333
hist(prop_diff)
which(prop_diff >= observed_diff_p)
## [1] 38 45 80
which(prop_diff >= observed_diff_p)
## [1] 38 45 80
length(which(prop_diff >= observed_diff_p)) / 100
## [1] 0.03
library(openintro)library(tidyverse)library(janitor)glimpse(gender_discrimination)
## Rows: 48## Columns: 2## $ gender <fct> male, male, male, male, male, male, male, male, male, male, m…## $ decision <fct> promoted, promoted, promoted, promoted, promoted, promoted, p…
Example from the OpenIntro Introductory Statistics with Randomization and Simulation Book
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |