본문 바로가기
DNN with Keras/Regularization and Dropout

정규화 기술 벤치마킹 (2) (분류)

by goatlab 2023. 7. 24.
728x90
반응형
SMALL

Bootstrapping for Classification

 

https://www.researchgate.net/figure/Figure-C1-Benchmark-for-the-Ridge-regression-on-10-solvers-5-datasets-rows-and-3_fig1_361578637

 

분류 부트스트래핑은 StratifiedShuffleSplit 클래스를 사용하여 분할을 수행한다. 이 클래스는 표본 추출이 비율에 영향을 미치지 않도록 클래스가 균형을 이루므로 교차 검증을 위한 StratifiedKFold와 유사하다.

 

import pandas as pd
import os
import numpy as np
import time
import statistics
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import StratifiedShuffleSplit

EPOCHS = 500
SPLITS = 50

# Bootstrap
boot = StratifiedShuffleSplit(n_splits = SPLITS, test_size =0.1, random_state=42)

# Track progress
mean_benchmark = []
epochs_needed = []
num = 0

fold = 0
for train, test in boot.split(x):
  start_time = time.time()
  num += 1
  
  x_train = x[train]
  y_train = y[train]
  x_test= x[test]
  y_test= y[test]
  
  model = Sequential()
  model.add(Dense(50, input_dim = x.shape[1], activation = 'relu'))
  model.add(Dense(25, activation = 'relu'))
  model.add(Dense(y.shape[1], activation = 'softmax'))
  model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')
  monitor = EarlyStopping(monitor = 'val_loss', min_delta = 1e −3,
                          patience = 25, verbose = 0, mode = 'auto', restore_best_weights = True)
  
  model.fit(x_train, y_train, validation_data = (x_test, y_test), callbacks = [monitor], verbose =0, epochs = 1000)
  epochs = monitor.stopped_epoch
  epochs_needed.append(epochs)
  
  pred = model.predict(x_test)
  score = np.sqrt(metrics.mean_squared_error(pred, y_test))
  mean_benchmark.append(score)
  m1 = statistics.mean(mean_benchmark)
  m2 = statistics.mean(epochs_needed)
  mdev = statistics.pstdev(mean_benchmark)
  
  # Record this iteration
  time_took = time.time() − start_time
  print(f"#{num} : score = {score : .6f}, mean score = {m1:.6f}",
        f"stdev = {mdev : .6f}",
        f"epochs = {epochs}, mean epochs = {int(m2)}",
        f"time = {hms_string(time_took)}")

 

Benchmarking

 

이제 분류와 회귀를 모두 사용하여 부트스트랩하는 방법을 살펴보았으므로 데이터에 대한 하이퍼 파라미터 최적화를 시도할 수 있다. 평가는 로그 손실 상태가 된다.

 

import pandas as pd
import os
import numpy as np
import time
import statistics
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import StratifiedShuffleSplit
from tensorflow.keras.layers import LeakyReLU, PReLU

SPLITS = 100

# Bootstrap
boot = StratifiedShuffleSplit(n_splits = SPLITS, test_size =0.1)

# Track progress
mean_benchmark = []
epochs_needed = []
num = 0

fold = 0
for train, test in boot.split(x):
  start_time = time.time()
  num += 1
  
  x_train = x[train]
  y_train = y[train]
  x_test= x[test]
  y_test= y[test]
  
  model = Sequential()
  model.add(Dense(100, input_dim = x.shape[1], activation = PReLU(), kernel_r_regularizer = regularizers.l2(1e−4)))
  model.add(Dropout(0.5))
  model.add(Dense(100, activation = PReLU(), kernel_r_regularizer = regularizers.l2(1e−4)))
  model.add(Dropout(0.5))
  model.add(Dense(100, activation = PReLU(), kernel_r_regularizer = regularizers.l2(1e−4)))
  # model.add(Dropout(0.5)) − Usually better performance
  model.add(Dense(y.shape[1], activation = 'softmax'))
  model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')
  monitor = EarlyStopping(monitor = 'val_loss', min_delta = 1e −3,
                          patience = 100, verbose = 0, mode = 'auto', restore_best_weights = True)
  
  model.fit(x_train, y_train, validation_data = (x_test, y_test), callbacks = [monitor], verbose =0, epochs = 1000)
  epochs = monitor.stopped_epoch
  epochs_needed.append(epochs)
  
  pred = model.predict(x_test)
  score = np.sqrt(metrics.mean_squared_error(pred, y_test))
  mean_benchmark.append(score)
  m1 = statistics.mean(mean_benchmark)
  m2 = statistics.mean(epochs_needed)
  mdev = statistics.pstdev(mean_benchmark)
  
  # Record this iteration
  time_took = time.time() − start_time
  print(f"#{num} : score = {score : .6f}, mean score = {m1:.6f}",
        f"stdev = {mdev : .6f}",
        f"epochs = {epochs}, mean epochs = {int(m2)}",
        f"time = {hms_string(time_took)}")
728x90
반응형
LIST