

from tensorflow.keras.datasets import fashion_mnist
import matplotlib.pyplot as plt
import openTSNE
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
def plot_tsne_embedding(data, labels, perplexity=30, n_jobs=8, random_state=42, exaggeration=1.5, title_suffix=""):
# 기본 파라미터 값
default_params = {
'perplexity': 30,
'n_jobs': 8,
'random_state': 42,
'exaggeration': 1.5
}
# 입력된 파라미터와 기본 파라미터 비교
params_used = {
'perplexity': perplexity,
'n_jobs': n_jobs,
'random_state': random_state,
'exaggeration': exaggeration
}
changed_params = {k: v for k, v in params_used.items() if v != default_params[k]}
print(f"Changed parameters: {changed_params}")
#affinity생성
affinities = openTSNE.affinity.PerplexityBasedNN(
data,
perplexity=perplexity,
metric="euclidean",
n_jobs=n_jobs,
random_state=random_state,
verbose=True,
)
#initialization 생성
initialization = openTSNE.initialization.random(data, random_state=random_state)
# 초기절대좌표
print("Initial coordinates: ", initialization)
#임베딩 생성
embedding = openTSNE.TSNEEmbedding(
initialization,
affinities,
negative_gradient_method="fft",
n_jobs=n_jobs,
verbose=True
)
embedding.optimize(250, exaggeration=exaggeration, inplace=True)
# 그래프이름
title = 't-SNE Embedding - ' + title_suffix
if changed_params:
title += ' ('
title += ', '.join([f"{k}={v}" for k, v in changed_params.items()])
title += ')'
# plotting
plt.figure(figsize=(8, 6))
plt.scatter(embedding[:, 0], embedding[:, 1], c=labels, cmap='viridis', s=1.5) # 점 크기를 작게 설정
plt.colorbar(label='Class Label')
plt.title(title)
plt.xlabel('t-SNE Feature 1')
plt.ylabel('t-SNE Feature 2')
plt.show()
return embedding
# Fashion MNIST 데이터셋 로드
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
# 데이터 전처리
x_all = np.concatenate((x_train, x_test))
y_all = np.concatenate((y_train, y_test))
# 이미지를 1차원 벡터로 변환
x_all = x_all.reshape((x_all.shape[0], -1))
# 10,000개의 데이터 샘플을 랜덤으로 선택
x_sam, _, y_sample, _ = train_test_split(x_all, y_all, train_size=10000, stratify=y_all, random_state=42)
# PCA로 차원 축소 (784차원 -> 30차원)
pca = PCA(n_components=30, random_state=42)
x_sample = pca.fit_transform(x_sam)
# 데이터 나누기
third_index = len(x_sample) // 3
data_part1 = x_sample[:third_index]
labels_part1 = y_sample[:third_index]
data_part2 = x_sample[:2*third_index]
labels_part2 = y_sample[:2*third_index]
data_part3 = x_sample
labels_part3 = y_sample
# 첫 번째 1/3 부분에 대해 embedding 생성 및 시각화
embedding_1 = plot_tsne_embedding(data_part1, labels_part1, title_suffix="1/3 of the Data Set")
# 두 번째 2/3부분에 대해 partialEmbedding 생성 및 시각화
embedding_2 = embedding_1.transform(data_part2)
plt.figure(figsize=(8, 6))
plt.scatter(embedding_2[:, 0], embedding_2[:, 1], c=labels_part2, cmap='viridis', s=1.5) # 점 크기를 작게 설정
plt.colorbar(label='Class Label')
plt.title("2/3 of the Data Set")
plt.xlabel('t-SNE Feature 1')
plt.ylabel('t-SNE Feature 2')
plt.show()
# 세 번째 전체 부분에 대해 partial Embedding 생성 및 시각화
embedding_3 = embedding_1.transform(data_part3)
plt.figure(figsize=(8, 6))
plt.scatter(embedding_3[:, 0], embedding_3[:, 1], c=labels_part3, cmap='viridis', s=1.5) # 점 크기를 작게 설정
plt.colorbar(label='Class Label')
plt.title("Whole Data set")
plt.xlabel('t-SNE Feature 1')
plt.ylabel('t-SNE Feature 2')
plt.show()






Time elapsed: 27.08 seconds
Time elapsed: 0.25 seconds
Time elapsed: 0.31 seconds