+ - 0:00:00
Notes for current slide
Notes for next slide



Transformations

Dr. Mine Dogucu

1 / 9
library(AmesHousing)
ames_raw <- janitor::clean_names(ames_raw)
glimpse(ames_raw)
## Rows: 2,930
## Columns: 82
## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ pid <chr> "0526301100", "0526350040", "0526351010", "0526353030"…
## $ ms_sub_class <chr> "020", "020", "020", "020", "060", "060", "120", "120"…
## $ ms_zoning <chr> "RL", "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
## $ lot_frontage <int> 141, 80, 81, 93, 74, 78, 41, 43, 39, 60, 75, NA, 63, 8…
## $ lot_area <int> 31770, 11622, 14267, 11160, 13830, 9978, 4920, 5005, 5…
## $ street <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave"…
## $ alley <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ lot_shape <chr> "IR1", "Reg", "IR1", "Reg", "IR1", "IR1", "Reg", "IR1"…
## $ land_contour <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "HLS"…
## $ utilities <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "All…
## $ lot_config <chr> "Corner", "Inside", "Corner", "Corner", "Inside", "Ins…
## $ land_slope <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl"…
## $ neighborhood <chr> "NAmes", "NAmes", "NAmes", "NAmes", "Gilbert", "Gilber…
## $ condition_1 <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm…
## $ condition_2 <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm"…
## $ bldg_type <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "Twnhs…
## $ house_style <chr> "1Story", "1Story", "1Story", "1Story", "2Story", "2St…
## $ overall_qual <int> 6, 5, 6, 7, 5, 6, 8, 8, 8, 7, 6, 6, 6, 7, 8, 8, 8, 9, …
## $ overall_cond <int> 5, 6, 6, 5, 5, 6, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 7, 2, …
## $ year_built <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, …
## $ year_remod_add <int> 1960, 1961, 1958, 1968, 1998, 1998, 2001, 1992, 1996, …
## $ roof_style <chr> "Hip", "Gable", "Hip", "Hip", "Gable", "Gable", "Gable…
## $ roof_matl <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg",…
## $ exterior_1st <chr> "BrkFace", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd",…
## $ exterior_2nd <chr> "Plywood", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd",…
## $ mas_vnr_type <chr> "Stone", "None", "BrkFace", "None", "None", "BrkFace",…
## $ mas_vnr_area <int> 112, 0, 108, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 603,…
## $ exter_qual <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "Gd", "Gd", "Gd", …
## $ exter_cond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ foundation <chr> "CBlock", "CBlock", "CBlock", "CBlock", "PConc", "PCon…
## $ bsmt_qual <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", …
## $ bsmt_cond <chr> "Gd", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ bsmt_exposure <chr> "Gd", "No", "No", "No", "No", "No", "Mn", "No", "No", …
## $ bsmt_fin_type_1 <chr> "BLQ", "Rec", "ALQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ"…
## $ bsmt_fin_sf_1 <int> 639, 468, 923, 1065, 791, 602, 616, 263, 1180, 0, 0, 9…
## $ bsmt_fin_type_2 <chr> "Unf", "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf"…
## $ bsmt_fin_sf_2 <int> 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1120, 0, 0…
## $ bsmt_unf_sf <int> 441, 270, 406, 1045, 137, 324, 722, 1017, 415, 994, 76…
## $ total_bsmt_sf <int> 1080, 882, 1329, 2110, 928, 926, 1338, 1280, 1595, 994…
## $ heating <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA"…
## $ heating_qc <chr> "Fa", "TA", "TA", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", …
## $ central_air <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",…
## $ electrical <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", …
## $ x1st_flr_sf <int> 1656, 896, 1329, 2110, 928, 926, 1338, 1280, 1616, 102…
## $ x2nd_flr_sf <int> 0, 0, 0, 0, 701, 678, 0, 0, 0, 776, 892, 0, 676, 0, 0,…
## $ low_qual_fin_sf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ gr_liv_area <int> 1656, 896, 1329, 2110, 1629, 1604, 1338, 1280, 1616, 1…
## $ bsmt_full_bath <int> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, …
## $ bsmt_half_bath <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ full_bath <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 2, 1, …
## $ half_bath <int> 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, …
## $ bedroom_abv_gr <int> 3, 2, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 1, 4, 4, 1, …
## $ kitchen_abv_gr <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ kitchen_qual <chr> "TA", "TA", "Gd", "Ex", "TA", "Gd", "Gd", "Gd", "Gd", …
## $ tot_rms_abv_grd <int> 7, 5, 6, 8, 6, 7, 6, 5, 5, 7, 7, 6, 7, 5, 4, 12, 8, 8,…
## $ functional <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ"…
## $ fireplaces <int> 2, 0, 0, 2, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, …
## $ fireplace_qu <chr> "Gd", NA, NA, "TA", "TA", "Gd", NA, NA, "TA", "TA", "T…
## $ garage_type <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Att…
## $ garage_yr_blt <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, …
## $ garage_finish <chr> "Fin", "Unf", "Unf", "Fin", "Fin", "Fin", "Fin", "RFn"…
## $ garage_cars <int> 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, …
## $ garage_area <int> 528, 730, 312, 522, 482, 470, 582, 506, 608, 442, 440,…
## $ garage_qual <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ garage_cond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ paved_drive <chr> "P", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",…
## $ wood_deck_sf <int> 210, 140, 393, 0, 212, 360, 0, 0, 237, 140, 157, 483, …
## $ open_porch_sf <int> 62, 0, 36, 0, 34, 36, 0, 82, 152, 60, 84, 21, 75, 0, 5…
## $ enclosed_porch <int> 0, 0, 0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ x3ssn_porch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ screen_porch <int> 0, 120, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 140, 210…
## $ pool_area <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pool_qc <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ fence <chr> NA, "MnPrv", NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, …
## $ misc_feature <chr> NA, NA, "Gar2", NA, NA, NA, NA, NA, NA, NA, NA, "Shed"…
## $ misc_val <int> 0, 0, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 500, 0, 0, 0, 0, …
## $ mo_sold <int> 5, 6, 6, 4, 3, 6, 4, 1, 3, 6, 4, 3, 5, 2, 6, 6, 6, 6, …
## $ yr_sold <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
## $ sale_type <chr> "WD ", "WD ", "WD ", "WD ", "WD ", "WD ", "WD ", "WD "…
## $ sale_condition <chr> "Normal", "Normal", "Normal", "Normal", "Normal", "Nor…
## $ sale_price <int> 215000, 105000, 172000, 244000, 189900, 195500, 213500…
2 / 9

3 / 9

Note that log is natural log in R.

4 / 9
model_y <- lm(log(sale_price) ~ year_built,
data = ames_raw)
tidy(model_y)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -4.33 0.387 -11.2 1.73e- 28
## 2 year_built 0.00829 0.000196 42.3 4.45e-305

log(y^i)=b0+b1x1i

log(y^i)=4.33+0.00829x1i

5 / 9

Estimated sale price of a house built in 1980

log(y^i)=4.33+0.00829×1980

6 / 9

Estimated sale price of a house built in 1980

log(y^i)=4.33+0.00829×1980

elog(y^i)=e4.33+0.00829×1980

7 / 9

Estimated sale price of a house built in 1980

log(y^i)=4.33+0.00829×1980

elog(y^i)=e4.33+0.00829×1980

y^i=e4.33×e0.00829×1980=177052.2

8 / 9

Estimated sale price of a house built in 1980

log(y^i)=4.33+0.00829×1980

elog(y^i)=e4.33+0.00829×1980

y^i=e4.33×e0.00829×1980=177052.2

For one-unit (year) increase in x, the y is multiplied by eb1.

9 / 9
library(AmesHousing)
ames_raw <- janitor::clean_names(ames_raw)
glimpse(ames_raw)
## Rows: 2,930
## Columns: 82
## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ pid <chr> "0526301100", "0526350040", "0526351010", "0526353030"…
## $ ms_sub_class <chr> "020", "020", "020", "020", "060", "060", "120", "120"…
## $ ms_zoning <chr> "RL", "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
## $ lot_frontage <int> 141, 80, 81, 93, 74, 78, 41, 43, 39, 60, 75, NA, 63, 8…
## $ lot_area <int> 31770, 11622, 14267, 11160, 13830, 9978, 4920, 5005, 5…
## $ street <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave"…
## $ alley <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ lot_shape <chr> "IR1", "Reg", "IR1", "Reg", "IR1", "IR1", "Reg", "IR1"…
## $ land_contour <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "HLS"…
## $ utilities <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "All…
## $ lot_config <chr> "Corner", "Inside", "Corner", "Corner", "Inside", "Ins…
## $ land_slope <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl"…
## $ neighborhood <chr> "NAmes", "NAmes", "NAmes", "NAmes", "Gilbert", "Gilber…
## $ condition_1 <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm…
## $ condition_2 <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm"…
## $ bldg_type <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "Twnhs…
## $ house_style <chr> "1Story", "1Story", "1Story", "1Story", "2Story", "2St…
## $ overall_qual <int> 6, 5, 6, 7, 5, 6, 8, 8, 8, 7, 6, 6, 6, 7, 8, 8, 8, 9, …
## $ overall_cond <int> 5, 6, 6, 5, 5, 6, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 7, 2, …
## $ year_built <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, …
## $ year_remod_add <int> 1960, 1961, 1958, 1968, 1998, 1998, 2001, 1992, 1996, …
## $ roof_style <chr> "Hip", "Gable", "Hip", "Hip", "Gable", "Gable", "Gable…
## $ roof_matl <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg",…
## $ exterior_1st <chr> "BrkFace", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd",…
## $ exterior_2nd <chr> "Plywood", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd",…
## $ mas_vnr_type <chr> "Stone", "None", "BrkFace", "None", "None", "BrkFace",…
## $ mas_vnr_area <int> 112, 0, 108, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 603,…
## $ exter_qual <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "Gd", "Gd", "Gd", …
## $ exter_cond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ foundation <chr> "CBlock", "CBlock", "CBlock", "CBlock", "PConc", "PCon…
## $ bsmt_qual <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", …
## $ bsmt_cond <chr> "Gd", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ bsmt_exposure <chr> "Gd", "No", "No", "No", "No", "No", "Mn", "No", "No", …
## $ bsmt_fin_type_1 <chr> "BLQ", "Rec", "ALQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ"…
## $ bsmt_fin_sf_1 <int> 639, 468, 923, 1065, 791, 602, 616, 263, 1180, 0, 0, 9…
## $ bsmt_fin_type_2 <chr> "Unf", "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf"…
## $ bsmt_fin_sf_2 <int> 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1120, 0, 0…
## $ bsmt_unf_sf <int> 441, 270, 406, 1045, 137, 324, 722, 1017, 415, 994, 76…
## $ total_bsmt_sf <int> 1080, 882, 1329, 2110, 928, 926, 1338, 1280, 1595, 994…
## $ heating <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA"…
## $ heating_qc <chr> "Fa", "TA", "TA", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", …
## $ central_air <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",…
## $ electrical <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", …
## $ x1st_flr_sf <int> 1656, 896, 1329, 2110, 928, 926, 1338, 1280, 1616, 102…
## $ x2nd_flr_sf <int> 0, 0, 0, 0, 701, 678, 0, 0, 0, 776, 892, 0, 676, 0, 0,…
## $ low_qual_fin_sf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ gr_liv_area <int> 1656, 896, 1329, 2110, 1629, 1604, 1338, 1280, 1616, 1…
## $ bsmt_full_bath <int> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, …
## $ bsmt_half_bath <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ full_bath <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 2, 1, …
## $ half_bath <int> 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, …
## $ bedroom_abv_gr <int> 3, 2, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 1, 4, 4, 1, …
## $ kitchen_abv_gr <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ kitchen_qual <chr> "TA", "TA", "Gd", "Ex", "TA", "Gd", "Gd", "Gd", "Gd", …
## $ tot_rms_abv_grd <int> 7, 5, 6, 8, 6, 7, 6, 5, 5, 7, 7, 6, 7, 5, 4, 12, 8, 8,…
## $ functional <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ"…
## $ fireplaces <int> 2, 0, 0, 2, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, …
## $ fireplace_qu <chr> "Gd", NA, NA, "TA", "TA", "Gd", NA, NA, "TA", "TA", "T…
## $ garage_type <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Att…
## $ garage_yr_blt <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, …
## $ garage_finish <chr> "Fin", "Unf", "Unf", "Fin", "Fin", "Fin", "Fin", "RFn"…
## $ garage_cars <int> 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, …
## $ garage_area <int> 528, 730, 312, 522, 482, 470, 582, 506, 608, 442, 440,…
## $ garage_qual <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ garage_cond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ paved_drive <chr> "P", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",…
## $ wood_deck_sf <int> 210, 140, 393, 0, 212, 360, 0, 0, 237, 140, 157, 483, …
## $ open_porch_sf <int> 62, 0, 36, 0, 34, 36, 0, 82, 152, 60, 84, 21, 75, 0, 5…
## $ enclosed_porch <int> 0, 0, 0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ x3ssn_porch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ screen_porch <int> 0, 120, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 140, 210…
## $ pool_area <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pool_qc <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ fence <chr> NA, "MnPrv", NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, …
## $ misc_feature <chr> NA, NA, "Gar2", NA, NA, NA, NA, NA, NA, NA, NA, "Shed"…
## $ misc_val <int> 0, 0, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 500, 0, 0, 0, 0, …
## $ mo_sold <int> 5, 6, 6, 4, 3, 6, 4, 1, 3, 6, 4, 3, 5, 2, 6, 6, 6, 6, …
## $ yr_sold <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
## $ sale_type <chr> "WD ", "WD ", "WD ", "WD ", "WD ", "WD ", "WD ", "WD "…
## $ sale_condition <chr> "Normal", "Normal", "Normal", "Normal", "Normal", "Nor…
## $ sale_price <int> 215000, 105000, 172000, 244000, 189900, 195500, 213500…
2 / 9
Paused

Help

Keyboard shortcuts

, , Pg Up, k Go to previous slide
, , Pg Dn, Space, j Go to next slide
Home Go to first slide
End Go to last slide
Number + Return Go to specific slide
b / m / f Toggle blackout / mirrored / fullscreen mode
c Clone slideshow
p Toggle presenter mode
t Restart the presentation timer
?, h Toggle this help
Esc Back to slideshow