728x90
반응형
SMALL
GRU으로 삼성전자 주가 예측
yahoo finance에서 데이터 다운로드 후 3일 (3MA), 5일 (5MA) 가격이평선 추가한다.
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
# yahoo finance로부터 데이터 다운로드
raw_df = pd.read_csv('./000000.KS_3MA_5MA.csv')
raw_df.head()
plt.title('SAMSUNG ELECTRONIC STCOK PRICE')
plt.ylabel('price')
plt.xlabel('period')
plt.grid()
plt.plot(raw_df['Adj Close'], label='Adj Close')
plt.show()
데이터 전처리
# 통계 정보 확인
raw_df.describe()
# Missing Data 확인
raw_df.isnull().sum()
Date 0
Open 6
High 6
Low 6
Close 6
Adj Close 6
Volume 6
3MA 2
5MA 4
dtype: int64
# 최소값이 0인 column 체크
for col in raw_df.columns:
if raw_df[col].min() == 0:
col_name = col
print(col_name, type(col_name))
Volume <class 'str'>
raw_df.loc[raw_df['Volume']==0]
# 각 column에 0이 몇 개인지 확인
for col in raw_df.columns:
missing_rows = raw_df.loc[raw_df[col]==0].shape[0]
print(col + ': ' + str(missing_rows))
Date: 0
Open: 0
High: 0
Low: 0
Close: 0
Adj Close: 0
Volume: 116
3MA: 0
5MA: 0
# 먼저 0을 NaN 으로 바꾼 후, Missing Data 처리
raw_df['Volume'] = raw_df['Volume'].replace(0, np.nan)
# 각 column에 0이 몇 개인지 확인
for col in raw_df.columns:
missing_rows = raw_df.loc[raw_df[col]==0].shape[0]
print(col + ': ' + str(missing_rows))
Date: 0
Open: 0
High: 0
Low: 0
Close: 0
Adj Close: 0
Volume: 0
3MA: 0
5MA: 0
# missing data 확인
raw_df.isnull().sum()
Date 0
Open 6
High 6
Low 6
Close 6
Adj Close 6
Volume 122
3MA 2
5MA 4
dtype: int64
raw_df.isnull().any()
Date False
Open True
High True
Low True
Close True
Adj Close True
Volume True
3MA True
5MA True
dtype: bool
raw_df.loc[raw_df['Open'].isna()]
# missing data 처리
raw_df = raw_df.dropna()
raw_df.isnull().sum()
Date 0
Open 0
High 0
Low 0
Close 0
Adj Close 0
Volume 0
3MA 0
5MA 0
dtype: int64
# 정규화 (Date 제외한 모든 수치 부분 정규화)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scale_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close',
'3MA', '5MA', 'Volume']
scaled_df = scaler.fit_transform(raw_df[scale_cols])
scaled_df = pd.DataFrame(scaled_df, columns=scale_cols)
print(scaled_df)
# 입력 파라미터 feature, label => numpy type
def make_sequene_dataset(feature, label, window_size):
feature_list = [] # 생성될 feature list
label_list = [] # 생성될 label list
for i in range(len(feature)-window_size):
feature_list.append(feature[i:i+window_size])
label_list.append(label[i+window_size])
return np.array(feature_list), np.array(label_list)
# feature_df, label_df 생성
feature_cols = [ '3MA', '5MA', 'Adj Close' ]
label_cols = [ 'Adj Close' ]
feature_df = pd.DataFrame(scaled_df, columns=feature_cols)
label_df = pd.DataFrame(scaled_df, columns=label_cols)
# DataFrame => Numpy 변환
feature_np = feature_df.to_numpy()
label_np = label_df.to_numpy()
print(feature_np.shape, label_np.shape)
(5269, 3) (5269, 1)
시계열 데이터 생성 (make_sequence_dataset)
window_size = 40
X, Y = make_sequene_dataset(feature_np, label_np, window_size)
print(X.shape, Y.shape)
(5229, 40, 3) (5229, 1)
# train, test 분리
split = -200
x_train = X[0:split]
y_train = Y[0:split]
x_test = X[split:]
y_test = Y[split:]
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
(5029, 40, 3) (5029, 1)
(200, 40, 3) (200, 1)
모델 생성
# model 생성
model = Sequential()
model.add(GRU(256, activation='tanh', input_shape=x_train[0].shape))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
model.summary()
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', patience=5)
model.fit(x_train, y_train,
validation_data=(x_test, y_test),
epochs=100, batch_size=16,
callbacks=[early_stop])
pred = model.predict(x_test)
plt.figure(figsize=(12, 6))
plt.title('3MA + 5MA + Adj Close, window_size=40')
plt.ylabel('adj close')
plt.xlabel('period')
plt.plot(y_test, label='actual')
plt.plot(pred, label='prediction')
plt.grid()
plt.legend(loc='best')
plt.show()
# 평균 절대값 백분율 오차 계산 (MAPE)
print( np.sum(abs(y_test-pred)/y_test) / len(x_test) )
728x90
반응형
LIST
'AI-driven Methodology > ANN' 카테고리의 다른 글
[ANN] 신경망 구현 (0) | 2022.11.17 |
---|---|
[ANN] 퍼셉트론 연산 (0) | 2022.11.10 |
[ANN] LSTM으로 삼성전자 주가 예측 (0) | 2022.10.21 |
[ANN] SimpleRNN (2) (0) | 2022.10.21 |
[ANN] SimpleRNN (1) (0) | 2022.10.19 |