App Programming/MLops

[MLops] 모델 저장하기

goatlab 2024. 8. 12. 15:27
728x90
반응형
SMALL

라이브러리 설치

 

ONNX 포맷으로 저장하기 위해 다음을 실행한다. 그리고 Fire 라이브러리를 활용하면 task 별로 필요한 인자를 설정하여 CLI 기반 프로그램을 쉽고 빠르게 만들 수 있다. task를 분리하면 다양한 장점이 있다 (필요한 태스크만 수행, 트러블 슈팅 및 디버깅 용이, 유연한 자원 할당, 유지보수성, 워크플로우 관리 등).

 

pip install onnx onnxruntime fire

 

src/model/movie_predictor.py

 

torch(pth) 포맷으로 저장하기 아래 코드를 추가한다.

 

import os
import datetime
import torch
from src.utils.utils import model_dir
import torch.nn as nn

class MoviePredictor(nn.Module):
    name = "movie_predictor"

    def __init__(self, input_dim, num_classes):
        super(MoviePredictor, self).__init__()
        self.input_dim = input_dim
        self.layer1 = nn.Linear(input_dim, 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        x = self.dropout(x)
        x = self.layer3(x)
        return x

def model_save(model, model_params, epoch, optimizer, loss, scaler, contents_id_map, ext="pth"):
    save_dir = model_dir(model.name)
    os.makedirs(save_dir, exist_ok=True)

    current_time = datetime.datetime.now().strftime("%y%m%d%H%M%S")
    dst = os.path.join(save_dir, f"E{epoch}_T{current_time}.{ext}")  # ext 부분 수정
    if ext == "pth":
        torch.save({
            "epoch": epoch,
            "model_params": model_params,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "loss": loss,
            "scaler": scaler,
            "contents_id_map": contents_id_map,
        }, dst)
    elif ext == "onnx":
        dummy_input = torch.randn(1, model.input_dim)
        torch.onnx.export(
            model,
            dummy_input,
            dst,
            export_params=True
        )
    else:
        raise ValueError(f"Invalid model export extension : {ext}")

 

src/main.py

 

import os
import sys

sys.path.append(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)

import fire
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from src.dataset.watch_log import get_datasets
from src.model.movie_predictor import MoviePredictor
from src.utils.utils import init_seed
from src.train.train import train
from src.evaluate.evaluate import evaluate
from src.model.movie_predictor import MoviePredictor, model_save
from utils.constant import Optimizers, Models

init_seed()

def run_train(model_name, optimizer, num_epochs=10, lr=0.001, model_ext="pth"):
    Models.validation(model_name)
    Optimizers.validation(optimizer)

    # 데이터셋 및 DataLoader 생성
    train_dataset, val_dataset, test_dataset = get_datasets()
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=False)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=0, pin_memory=False)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0, pin_memory=False)

    # 모델 초기화
    model_params = {
        "input_dim": train_dataset.features_dim,
        "num_classes": train_dataset.num_classes
    }
    model_class = Models[model_name.upper()].value
    model = model_class(**model_params)

    # 손실 함수 및 옵티마이저 정의
    criterion = nn.CrossEntropyLoss()
    optimizer_class = Optimizers[optimizer.upper()].value
    optimizer = optimizer_class(model.parameters(), lr=lr)

    # 학습 루프
    epoch = 0
    train_loss = 0
    num_epochs = 10
    for epoch in tqdm(range(num_epochs)):
        train_loss = train(model, train_loader, criterion, optimizer)
        val_loss, _ = evaluate(model, val_loader, criterion)
        print(f"Epoch {epoch + 1}/{num_epochs}, "
              f"Train Loss: {train_loss:.4f}, "
              f"Val Loss: {val_loss:.4f}, "
              f"Val-Train Loss : {val_loss-train_loss:.4f}")

    model_ext = "onnx"  # or "pth"
    model_save(
        model=model,
        model_params=model_params,
        epoch=num_epochs,
        optimizer=optimizer,
        loss=train_loss,
        scaler=train_dataset.scaler,
        contents_id_map=train_dataset.contents_id_map,
        ext=model_ext,
    )

    # 테스트
    model.eval()
    test_loss, predictions = evaluate(model, test_loader, criterion)
    print(f"Test Loss : {test_loss:.4f}")
    # print([train_dataset.decode_content_id(idx) for idx in predictions])

if __name__ == '__main__':
    fire.Fire({
        "train": run_train,
    })

# CLI
python src/main.py train --model_name movie_predictor --optimizer adam --num_epochs 20 --lr 0.002

 

 

 

728x90
반응형
LIST