728x90
반응형
SMALL
와인 품질 : 3 ~ 9 숫자 값으로 예측
winequality-white.csv
0.25MB
winequality-red.csv
0.08MB
import pandas as pd
# 데이터 로드
# 원본 파일은 분리자가 세미콜론
red_df = pd.read_csv('./winequality-red.csv', sep=';')
white_df = pd.read_csv('./winequality-white.csv', sep=';')
red_df.head()
white_df.head()
# 분리자를 콤마로 사본 저장 (일반적으로 csv 파일 세미콜론이 아닌 콤마로 분리자를 사용)
red_df.to_csv('./winequality-red2.csv', index=False)
white_df.to_csv('./winequality-white2.csv', index=False)
red_df = pd.read_csv('./winequality-red2.csv')
white_df = pd.read_csv('./winequality-white2.csv')
red_df.head()
white_df.head()
# pd.concat() 이용해서 하나로 합치기
wine_df = pd.concat([red_df, white_df], axis=0)
wine_df.shape
wine_df.head()
# 표준화 시행 (정답으로 사용할 quality 칼럼은 제외하고 표준화)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
wine_scaled_col = wine_df.columns.difference(['quality'])
wine_scaled_np = scaler.fit_transform(wine_df[wine_scaled_col])
# fit_transform 리턴값은 numpy 이므로 다시 DataFrame 생성
wine_scaled_df = pd.DataFrame(wine_scaled_np, columns=wine_scaled_col)
wine_scaled_df['quality'] = wine_df['quality'].values
wine_scaled_df.describe()
wine_scaled_df['quality']
wine_scaled_df.info()
wine_scaled_df.describe()
wine_scaled_df.isnull().sum()
wine_scaled_df['quality'].unique()
wine_scaled_df['quality'].value_counts()
wine_scaled_df['quality'].isna()
# 입력데이터
feature_col = wine_scaled_df.columns.difference(['quality'])
# 정답데이터
label_col = wine_scaled_df['quality']
wine_feature_df = wine_scaled_df[feature_col]
wine_label_df = label_col
wine_feature_df.head()
wine_label_df.head()
import numpy as np
wine_feature_np = wine_feature_df.to_numpy().astype('float32')
wine_label_np = wine_label_df.to_numpy().astype('float32')
print(wine_feature_np.shape, wine_label_np.shape)
s = np.arange(len(wine_feature_np))
np.random.shuffle(s)
wine_feature_np = wine_feature_np[s]
wine_label_np = wine_label_np[s]
print(wine_feature_np.shape, wine_label_np.shape)
# 80% : 20% 비율로 train / test data 분리
split_ratio = 0.20
test_num = int(split_ratio*len(wine_feature_np))
x_test = wine_feature_np[:test_num]
y_test = wine_label_np[:test_num]
x_train = wine_feature_np[test_num:]
y_train = wine_label_np[test_num:]
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
model = Sequential()
model.add(Dense(1, activation='linear', input_shape=(x_train.shape[1],)))
model.summary()
Model: "sequential_9"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_17 (Dense) (None, 1) 12
=================================================================
Total params: 12
Trainable params: 12
Non-trainable params: 0
_________________________________________________________________
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='mse', metrics=['mae'] )
from datetime import datetime
start_time = datetime.now()
hist = model.fit(x_train, y_train, epochs=1000, batch_size=32,
validation_data=(x_test, y_test))
end_time = datetime.now()
print('elapsed time => ', end_time-start_time)
pred = model.predict(x_test[-5:])
print('prediction = ', pred.flatten())
print('label =', y_test[-5:])
prediction = [5.30889 6.671764 6.099701 5.728717 5.601633]
label = [6. 7. 7. 6. 5.]
plt.plot(hist.history['loss'], label='train')
plt.plot(hist.history['val_loss'], label='validation')
plt.title('loss trend')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(loc='best')
plt.grid()
plt.show()
728x90
반응형
LIST
'AI-driven Methodology > Artificial Intelligence' 카테고리의 다른 글
[AI] 콜백 (Callback) (0) | 2022.07.31 |
---|---|
[AI] 와인 종류 예측 (0) | 2022.07.24 |
[AI] 표준화 (Standardization) (0) | 2022.07.24 |
[AI] 분류 (Classification) (0) | 2022.07.23 |
[AI] 다변수 선형 회귀 (0) | 2022.07.17 |