Skip to contents

Transforms categorical variables in a data frame into one-hot encoded format

Usage

data_to_onehot(data, iblm_model, remove_target = TRUE)

Arguments

data

Input data frame to be transformed. This will typically be the "train" data subset

iblm_model

Object of class 'iblm'

remove_target

Logical, whether to remove the response_var variable from the output (default TRUE).

Value

A data frame in wide format with one-hot encoded categorical variables, an intercept column, and all variables ordered according to "coeff_names$all" from `iblm_model`

Examples

df_list <- freMTPLmini |> split_into_train_validate_test(seed = 9000)

iblm_model <- train_iblm_xgb(
  df_list,
  response_var = "ClaimRate",
  family = "poisson"
)

wide_input_frame <- data_to_onehot(df_list$test, iblm_model)

wide_input_frame |> dplyr::glimpse()
#> Rows: 3,764
#> Columns: 24
#> $ `(Intercept)` <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
#> $ VehPower      <int> 11, 5, 6, 5, 7, 4, 6, 5, 7, 4, 7, 6, 5, 11, 7, 4, 7, 6, 
#> $ VehAge        <dbl> 1, 1, 8, 14, 12, 15, 12, 1, 1, 1, 0, 0, 1, 2, 4, 2, 0, 3…
#> $ DrivAge       <int> 58, 25, 42, 72, 47, 19, 45, 34, 37, 36, 41, 49, 55, 65, 
#> $ BonusMalus    <int> 50, 95, 50, 50, 50, 100, 90, 76, 50, 50, 50, 50, 50, 50,
#> $ AreaA         <int> 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ AreaB         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
#> $ AreaC         <int> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,
#> $ AreaD         <int> 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
#> $ AreaE         <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
#> $ AreaF         <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ VehBrandB1    <int> 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ VehBrandB10   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ VehBrandB11   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ VehBrandB12   <int> 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
#> $ VehBrandB13   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ VehBrandB14   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ VehBrandB2    <int> 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,
#> $ VehBrandB3    <int> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ VehBrandB4    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ VehBrandB5    <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ VehBrandB6    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> $ VehGasDiesel  <int> 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1,
#> $ VehGasRegular <int> 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,