import pandas as pd


df = pd.read_csv('../data/dogs.csv')

df


feature_names = list(df.columns[1:])
feature_names

['height', 'weight']


X = df[feature_names]
X


from sklearn.preprocessing import MinMaxScaler


scaler = MinMaxScaler()


scaler.fit(X)

MinMaxScaler()


X = scaler.transform(X)


X = pd.DataFrame(X, columns=feature_names)

X


from sklearn.cluster import KMeans


kmeans = KMeans(n_clusters=2, init='k-means++')


kmeans.fit(X)

KMeans(n_clusters=2)


kmeans.cluster_centers_

array([[0.90384615, 0.86928105],
       [0.43589744, 0.22875817]])


kmeans.inertia_

0.7828436301619693


kmeans.labels_

array([1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1], dtype=int32)


from matplotlib import pyplot as plt


colors = ['red', 'green', 'blue', 'yellow', 'purple', 'grey', 'black', 'orange', 'cyan', 'pink', 'brown']


X.shape

(11, 2)


len(colors)

11


ks = range(1, X.shape[0] + 1) # [ )
init_methods = ['k-means++', 'random']

fig = plt.figure(figsize=(10,20))

inertias = []

plt_index = 1
for k in ks:
    for init_method in init_methods:
        kmeans = KMeans(n_clusters=k, init=init_method)
        kmeans.fit(X)
        
        df['label'] = kmeans.labels_
        
        fig.add_subplot(len(ks), len(init_methods), plt_index)
        
        for label in range(k):
            cluster = df[df['label'] == label]
            plt.scatter(cluster['height'], cluster['weight'], color=colors[label])
            
        centroids = pd.DataFrame(scaler.inverse_transform(kmeans.cluster_centers_), columns=feature_names)
        plt.scatter(centroids['height'], centroids['weight'], marker='x', color='black')
        
        plt_index += 1
        
    inertias.append(kmeans.inertia_)
        
plt.show()


plt.plot(ks, inertias, marker='o')

[<matplotlib.lines.Line2D at 0x7f112b2f2b60>]

	breed	height	weight
0	Border Collie	20	45
1	Boston Terrier	16	20
2	Brittany Spaniel	18	35
3	Bullmastiff	27	120
4	Chihuahua	8	8
5	German Shepherd	25	78
6	Golden Retriever	23	70
7	Great Dane	32	160
8	Portuguese Water Dog	21	50
9	Standard Poodle	19	65
10	Yorkshire Terrier	6	7

	height	weight
0	20	45
1	16	20
2	18	35
3	27	120
4	8	8
5	25	78
6	23	70
7	32	160
8	21	50
9	19	65
10	6	7

	height	weight
0	0.538462	0.248366
1	0.384615	0.084967
2	0.461538	0.183007
3	0.807692	0.738562
4	0.076923	0.006536
5	0.730769	0.464052
6	0.653846	0.411765
7	1.000000	1.000000
8	0.576923	0.281046
9	0.500000	0.379085
10	0.000000	0.000000

K-sredina klasterovanje¶

Podaci¶

Preprocesiranje¶

Treniranje¶

Vizuelizacija¶