Back

Explore Courses Blog Tutorials Interview Questions
0 votes
2 views
in Data Science by (17.6k points)

i am currently trying to visualize word vectors of 300 dimension in 2d. I tried t-SNE with different parameters and read the blog on https://distill.pub/2016/misread-tsne/ but so far i got no useful results.

i want a visualisation that corresponds to the nearest neighbors of a few selected word vectors, but the 2d visualisation is all over the place.

Is it unsuitable to use TSNE for my problem?

from sklearn.manifold import TSNE

arr = []

for category in category_embeddings.keys():

    arr.append(category_embeddings[category][0]) 

perplex = 30

tsne_steps = 50000

lr = 10

fig_tsne = plt.figure(figsize=(18, 18), dpi=800)

tsne = TSNE(perplexity=perplex, 

            n_components=2, 

            init='pca', 

            n_iter=tsne_steps, 

            learning_rate=lr, 

            method="exact")

plot_only = len(category_embeddings.keys())

low_dim_embs = tsne.fit_transform(np.asarray(arr))

for i, title in enumerate(category_embeddings.keys()):

    x, y = low_dim_embs[i, :]

    plt.scatter(x, y)

    plt.annotate(

        title,

        xy=(x, y),

        xytext=(5, 2),

        textcoords='offset points',

        ha='right',

        va='bottom')

1 Answer

0 votes
by (41.4k points)

1.Create a distance matrix.

2.Then feed TSNE with the matrix results.

from sklearn.metrics.pairwise import cosine_distances

c1_c2_cos_dist = {}

# Create distance Matrix

for c1in category_embeddings.keys():

    tmp = {}

    for c2 in category_embeddings.keys():

        cos_dis = cosine_distances(category_embeddings[c1],category_embeddings[

        tmp[c2] = cos_dis[0][0]

    c1_c2_cos_dist[c1] = copy(tmp)

# --- 

from sklearn.manifold import TSNE

arr = []

for category in category_embeddings.keys():

    arr.append(category_embeddings[category][0]) 

perplex = 30

tsne_steps = 50000

lr = 10

fig_tsne = plt.figure(figsize=(18, 18), dpi=800)

tsne = TSNE(perplexity=perplex, 

            n_components=2, 

            metric="precomputed",

            n_iter=tsne_steps, 

            learning_rate=lr)

distMatrix = []

for col in c1_c2_cos_dist.keys():

    arr =[]

    for row in c1_c2_cos_dist[col]:

        arr.append(c1_c2_cos_dist[col][row])

    distMatrix.append(copy(arr))  

distMatrix = np.asarray(distMatrix)

low_dim_embs = tsne.fit_transform(distMatrix)

plot_only = len(category_embeddings.keys())

for i, title in enumerate(category_embeddings.keys()):

    x, y = low_dim_embs[i, :]

    plt.scatter(x, y)

    plt.annotate(

        title,

        xy=(x, y),

        xytext=(5, 2),

        textcoords='offset points',

        ha='right',

        va='bottom')

Browse Categories

...