App Programming/MLops
[MLops] 모델 저장하기
goatlab
2024. 8. 12. 15:27
728x90
반응형
SMALL
라이브러리 설치
ONNX 포맷으로 저장하기 위해 다음을 실행한다. 그리고 Fire 라이브러리를 활용하면 task 별로 필요한 인자를 설정하여 CLI 기반 프로그램을 쉽고 빠르게 만들 수 있다. task를 분리하면 다양한 장점이 있다 (필요한 태스크만 수행, 트러블 슈팅 및 디버깅 용이, 유연한 자원 할당, 유지보수성, 워크플로우 관리 등).
pip install onnx onnxruntime fire
src/model/movie_predictor.py
torch(pth) 포맷으로 저장하기 아래 코드를 추가한다.
import os
import datetime
import torch
from src.utils.utils import model_dir
import torch.nn as nn
class MoviePredictor(nn.Module):
name = "movie_predictor"
def __init__(self, input_dim, num_classes):
super(MoviePredictor, self).__init__()
self.input_dim = input_dim
self.layer1 = nn.Linear(input_dim, 64)
self.layer2 = nn.Linear(64, 32)
self.layer3 = nn.Linear(32, num_classes)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = self.relu(self.layer1(x))
x = self.dropout(x)
x = self.relu(self.layer2(x))
x = self.dropout(x)
x = self.layer3(x)
return x
def model_save(model, model_params, epoch, optimizer, loss, scaler, contents_id_map, ext="pth"):
save_dir = model_dir(model.name)
os.makedirs(save_dir, exist_ok=True)
current_time = datetime.datetime.now().strftime("%y%m%d%H%M%S")
dst = os.path.join(save_dir, f"E{epoch}_T{current_time}.{ext}") # ext 부분 수정
if ext == "pth":
torch.save({
"epoch": epoch,
"model_params": model_params,
"model_state_dict": model.state_dict(),
"optimizer_state_dict": optimizer.state_dict(),
"loss": loss,
"scaler": scaler,
"contents_id_map": contents_id_map,
}, dst)
elif ext == "onnx":
dummy_input = torch.randn(1, model.input_dim)
torch.onnx.export(
model,
dummy_input,
dst,
export_params=True
)
else:
raise ValueError(f"Invalid model export extension : {ext}")
src/main.py
import os
import sys
sys.path.append(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
import fire
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from src.dataset.watch_log import get_datasets
from src.model.movie_predictor import MoviePredictor
from src.utils.utils import init_seed
from src.train.train import train
from src.evaluate.evaluate import evaluate
from src.model.movie_predictor import MoviePredictor, model_save
from utils.constant import Optimizers, Models
init_seed()
def run_train(model_name, optimizer, num_epochs=10, lr=0.001, model_ext="pth"):
Models.validation(model_name)
Optimizers.validation(optimizer)
# 데이터셋 및 DataLoader 생성
train_dataset, val_dataset, test_dataset = get_datasets()
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=False)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=0, pin_memory=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0, pin_memory=False)
# 모델 초기화
model_params = {
"input_dim": train_dataset.features_dim,
"num_classes": train_dataset.num_classes
}
model_class = Models[model_name.upper()].value
model = model_class(**model_params)
# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer_class = Optimizers[optimizer.upper()].value
optimizer = optimizer_class(model.parameters(), lr=lr)
# 학습 루프
epoch = 0
train_loss = 0
num_epochs = 10
for epoch in tqdm(range(num_epochs)):
train_loss = train(model, train_loader, criterion, optimizer)
val_loss, _ = evaluate(model, val_loader, criterion)
print(f"Epoch {epoch + 1}/{num_epochs}, "
f"Train Loss: {train_loss:.4f}, "
f"Val Loss: {val_loss:.4f}, "
f"Val-Train Loss : {val_loss-train_loss:.4f}")
model_ext = "onnx" # or "pth"
model_save(
model=model,
model_params=model_params,
epoch=num_epochs,
optimizer=optimizer,
loss=train_loss,
scaler=train_dataset.scaler,
contents_id_map=train_dataset.contents_id_map,
ext=model_ext,
)
# 테스트
model.eval()
test_loss, predictions = evaluate(model, test_loader, criterion)
print(f"Test Loss : {test_loss:.4f}")
# print([train_dataset.decode_content_id(idx) for idx in predictions])
if __name__ == '__main__':
fire.Fire({
"train": run_train,
})
# CLI
python src/main.py train --model_name movie_predictor --optimizer adam --num_epochs 20 --lr 0.002
728x90
반응형
LIST