본문 바로가기
데이터분석/R

[ADP] 정규화 모델

by 버섯도리 2022. 1. 15.

> # 01. 정규화 모델 [릿지(Ridge), 라쏘(Lasso), 엘라스틱넷(ElasticNet)]

> # 1. 정규화 개념

> library(ridge)

> data("longley")
> head(longley)
     GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
1947         83.0 234.289      235.6        159.0    107.608 1947   60.323
1948         88.5 259.426      232.5        145.6    108.632 1948   61.122
1949         88.2 258.054      368.2        161.6    109.773 1949   60.171
1950         89.5 284.599      335.1        165.0    110.929 1950   61.187
1951         96.2 328.975      209.9        309.9    112.075 1951   63.221
1952         98.1 346.999      193.2        359.4    113.270 1952   63.639

> names(longley)[1] <- 'y'
> head(longley)
        y     GNP Unemployed Armed.Forces Population Year Employed
1947 83.0 234.289      235.6        159.0    107.608 1947   60.323
1948 88.5 259.426      232.5        145.6    108.632 1948   61.122
1949 88.2 258.054      368.2        161.6    109.773 1949   60.171
1950 89.5 284.599      335.1        165.0    110.929 1950   61.187
1951 96.2 328.975      209.9        309.9    112.075 1951   63.221
1952 98.1 346.999      193.2        359.4    113.270 1952   63.639

> mod <- linearRidge(y~.-1, data = longley, lambda = "automatic")
> options(scipen = 999)
summary(mod)

Call:
linearRidge(formula = y ~ . - 1, data = longley, lambda = "automatic")


Coefficients:
             Estimate Scaled estimate Std. Error (scaled) t value (scaled)  Pr(>|t|)    
GNP           0.04338        16.69895             3.68931            4.526 0.0000060 ***
Unemployed    0.01184         4.28639             2.50693            1.710    0.0873 .  
Armed.Forces  0.01381         3.72087             1.90482            1.953    0.0508 .  
Population   -0.02831        -0.76273             5.28549            0.144    0.8853    
Year          0.65665        12.10811             2.69073            4.500 0.0000068 ***
Employed      0.67454         9.17494             4.99599            1.836    0.0663 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Ridge parameter: 0.01046912, chosen automatically, computed using 2 PCs

Degrees of freedom: model 3.67 , variance 3.218 , residual 4.123 


> library(genridge)
> lambda <- c(0, 0.005, 0.01, 0.02, 0.04, 0.08)
> r <- ridge(y~., longley, lambda = lambda)
경고메시지(들): 
In model.matrix.default(Terms, m, contrasts) :
  non-list contrasts argument ignored
> traceplot(r)

> # 람다값이 커지면서 베타 계수 값은 작아진다.

 

 

 

 

 

출처 : 2020 데이터 분석 전문가 ADP 필기 한 권으로 끝내기