DBSCAN klasterovanje¶

In [1]:
import pandas as pd
In [2]:
import matplotlib.pyplot as plt

Podaci¶

In [3]:
from sklearn.datasets import make_moons
In [4]:
dataset = make_moons(1000, noise=0.05)
X = pd.DataFrame(dataset[0], columns=["x", "y"])
y = pd.Series(dataset[1])
print(X.shape)
print(y.shape)
(1000, 2)
(1000,)
In [5]:
plt.scatter(X["x"], X["y"])
Out[5]:
<matplotlib.collections.PathCollection at 0x7f695d853430>

Treniranje modela¶

In [6]:
from sklearn.cluster import DBSCAN
In [7]:
model = DBSCAN(eps=0.1, min_samples=2)
model.fit(X)
Out[7]:
DBSCAN(eps=0.1, min_samples=2)
In [8]:
X["labels"] = model.labels_
X.head()
Out[8]:
x y labels
0 -0.914107 0.456677 0
1 0.733471 -0.531546 1
2 0.985271 0.122425 0
3 1.937545 0.308542 1
4 -0.740937 0.517254 0
In [9]:
colors = ["blue", "yello"]
cluster_0 = X[X["labels"] == 0]
cluster_1 = X[X["labels"] == 1]
plt.scatter(cluster_0["x"], cluster_0["y"])
plt.scatter(cluster_1["x"], cluster_1["y"], color="orange")
Out[9]:
<matplotlib.collections.PathCollection at 0x7f695a8cc7c0>