# Setup:
library(tidymodels)
library(tidyverse)
library(tictoc) # Zeitmessung
# Data:
d_path <- "https://vincentarelbundock.github.io/Rdatasets/csv/palmerpenguins/penguins.csv"
d <- read_csv(d_path)
# recipe:
rec1 <- recipe(body_mass_g ~ ., data = d) |>
step_dummy(all_nominal_predictors()) |>
step_normalize(all_predictors()) |>
step_naomit(all_predictors()) tidymodels-remove-na
tidymodels
statlearning
template
string
Aufgabe
Erstellen Sie ein Rezept, dass die fehlenden Werte aus dem Datensatz penguins entfernt.
Hinweise:
- Verwenden Sie
tidymodels. - Verwenden Sie Standardwerte, wo nicht anders angegeben.
- Fixieren Sie Zufallszahlen auf den Startwert 42.
Lösung
Als Check: Das gepreppte/bebackene Rezept:
rec1_prepped <- prep(rec1)
d_train_baked <- bake(rec1_prepped, new_data = NULL)d_train_baked |>
head()| rownames | bill_length_mm | bill_depth_mm | flipper_length_mm | year | body_mass_g | species_Chinstrap | species_Gentoo | island_Dream | island_Torgersen | sex_male |
|---|---|---|---|---|---|---|---|---|---|---|
| -1.724511 | -0.8832047 | 0.7843001 | -1.4162715 | -1.257484 | 3750 | -0.4956416 | -0.7496652 | -0.7496652 | 2.366233 | 0.9895421 |
| -1.714456 | -0.8099390 | 0.1260033 | -1.0606961 | -1.257484 | 3800 | -0.4956416 | -0.7496652 | -0.7496652 | 2.366233 | -1.0075337 |
| -1.704400 | -0.6634077 | 0.4298326 | -0.4206603 | -1.257484 | 3250 | -0.4956416 | -0.7496652 | -0.7496652 | 2.366233 | -1.0075337 |
| -1.684289 | -1.3227986 | 1.0881294 | -0.5628905 | -1.257484 | 3450 | -0.4956416 | -0.7496652 | -0.7496652 | 2.366233 | -1.0075337 |
| -1.674234 | -0.8465718 | 1.7464261 | -0.7762357 | -1.257484 | 3650 | -0.4956416 | -0.7496652 | -0.7496652 | 2.366233 | 0.9895421 |
| -1.664178 | -0.9198375 | 0.3285561 | -1.4162715 | -1.257484 | 3625 | -0.4956416 | -0.7496652 | -0.7496652 | 2.366233 | -1.0075337 |
library(easystats)
describe_distribution(d_train_baked)| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| rownames | 0.0183444 | 0.9893218 | 1.709428 | -1.7245110 | 1.7245110 | 0.0112758 | -1.1916974 | 333 | 0 |
| bill_length_mm | 0.0129796 | 1.0016640 | 1.694268 | -2.1653537 | 2.8716604 | 0.0453405 | -0.8834182 | 333 | 0 |
| bill_depth_mm | 0.0069350 | 0.9971857 | 1.569785 | -2.0514400 | 2.2021701 | -0.1497203 | -0.8919598 | 333 | 0 |
| flipper_length_mm | 0.0036811 | 0.9967324 | 1.635647 | -2.0563073 | 2.1394829 | 0.3601480 | -0.9612410 | 333 | 0 |
| year | 0.0158516 | 0.9933867 | 2.443924 | -1.2574843 | 1.1864400 | -0.0772613 | -1.4826040 | 333 | 0 |
| body_mass_g | 4207.0570571 | 805.2158019 | 1237.500000 | 2700.0000000 | 6300.0000000 | 0.4722461 | -0.7334890 | 333 | 0 |
| species_Chinstrap | 0.0163725 | 1.0122867 | 0.000000 | -0.4956416 | 2.0117218 | 1.4741850 | 0.1742318 | 333 | 0 |
| species_Gentoo | -0.0064633 | 0.9981442 | 2.079716 | -0.7496652 | 1.3300511 | 0.5980072 | -1.6523478 | 333 | 0 |
| island_Dream | 0.0185183 | 1.0052525 | 2.079716 | -0.7496652 | 1.3300511 | 0.5437762 | -1.7146419 | 333 | 0 |
| island_Torgersen | -0.0279365 | 0.9720172 | 0.000000 | -0.4213840 | 2.3662335 | 2.0707569 | 2.3018230 | 333 | 0 |
| sex_male | 0.0000000 | 1.0000000 | 1.997076 | -1.0075337 | 0.9895421 | -0.0181004 | -2.0117916 | 333 | 0 |
Categories:
- tidymodels
- statlearning
- template
- string