728x90
반응형
SMALL
무작위 오버샘플링에서 수축 요인 (shrinkage factor)의 영향
RandomOverSampler를 사용하여 smoothed bootstrap (부트스트랩)을 생성하는 데 사용된 수축 계수의 효과를 보여준다. 먼저, 몇 개의 샘플만으로 분류 데이터 집합을 생성한다. 클래스 간의 비율은 불균형하다.
import seaborn as sns
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
sns.set_context("poster")
X, y = make_classification(
n_samples=100,
n_features=2,
n_redundant=0,
weights=[0.1, 0.9],
random_state=0,
)
Counter(y)
Counter({1: 90, 0: 10})
fig, ax = plt.subplots(figsize=(7, 7))
scatter = plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.4)
class_legend = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")
ax.add_artist(class_legend)
ax.set_xlabel("Feature #1")
_ = ax.set_ylabel("Feature #2")
plt.tight_layout()
shrinkage 0
sampler = RandomOverSampler(random_state=0)
X_res, y_res = sampler.fit_resample(X, y)
Counter(y_res)
Counter({1: 90, 0: 90})
fig, ax = plt.subplots(figsize=(7, 7))
scatter = plt.scatter(X_res[:, 0], X_res[:, 1], c=y_res, alpha=0.4)
class_legend = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")
ax.add_artist(class_legend)
ax.set_xlabel("Feature #1")
_ = ax.set_ylabel("Feature #2")
plt.tight_layout()
shrinkage 0.1
sampler = RandomOverSampler(shrinkage=0.1, random_state=0)
X_res, y_res = sampler.fit_resample(X, y)
Counter(y_res)
fig, ax = plt.subplots(figsize=(7, 7))
scatter = plt.scatter(X_res[:, 0], X_res[:, 1], c=y_res, alpha=0.4)
class_legend = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")
ax.add_artist(class_legend)
ax.set_xlabel("Feature #1")
_ = ax.set_ylabel("Feature #2")
plt.tight_layout()
shrinkage 0.01
sampler = RandomOverSampler(shrinkage=0.01, random_state=0)
X_res, y_res = sampler.fit_resample(X, y)
Counter(y_res)
fig, ax = plt.subplots(figsize=(100, 100))
scatter = plt.scatter(X_res[:, 0], X_res[:, 1], c=y_res, alpha=0.4)
class_legend = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")
ax.add_artist(class_legend)
ax.set_xlabel("Feature #1")
_ = ax.set_ylabel("Feature #2")
plt.tight_layout()
shrinkage 0.001
sampler = RandomOverSampler(shrinkage=0.001, random_state=0)
X_res, y_res = sampler.fit_resample(X, y)
Counter(y_res)
fig, ax = plt.subplots(figsize=(7, 7))
scatter = plt.scatter(X_res[:, 0], X_res[:, 1], c=y_res, alpha=0.4)
class_legend = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")
ax.add_artist(class_legend)
ax.set_xlabel("Feature #1")
_ = ax.set_ylabel("Feature #2")
plt.tight_layout()
shrinkage 1
sampler = RandomOverSampler(shrinkage=1, random_state=0)
X_res, y_res = sampler.fit_resample(X, y)
Counter(y_res)
fig, ax = plt.subplots(figsize=(7, 7))
scatter = plt.scatter(X_res[:, 0], X_res[:, 1], c=y_res, alpha=0.4)
class_legend = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")
ax.add_artist(class_legend)
ax.set_xlabel("Feature #1")
_ = ax.set_ylabel("Feature #2")
plt.tight_layout()
shrinkage 3
sampler = RandomOverSampler(shrinkage=3, random_state=0)
X_res, y_res = sampler.fit_resample(X, y)
Counter(y_res)
fig, ax = plt.subplots(figsize=(7, 7))
scatter = plt.scatter(X_res[:, 0], X_res[:, 1], c=y_res, alpha=0.4)
class_legend = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")
ax.add_artist(class_legend)
ax.set_xlabel("Feature #1")
_ = ax.set_ylabel("Feature #2")
plt.tight_layout()
https://imbalanced-learn.org/stable/auto_examples/over-sampling/plot_shrinkage_effect.html
728x90
반응형
LIST
'Data-driven Methodology > Resampling Methods' 카테고리의 다른 글
리샘플링 방법 (Resampling Methods) (0) | 2024.01.30 |
---|