import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
dataset = make_moons(1000, noise=0.05)
X = pd.DataFrame(dataset[0], columns=["x", "y"])
y = pd.Series(dataset[1])
print(X.shape)
print(y.shape)
(1000, 2) (1000,)
plt.scatter(X["x"], X["y"])
<matplotlib.collections.PathCollection at 0x7f695d853430>
from sklearn.cluster import DBSCAN
model = DBSCAN(eps=0.1, min_samples=2)
model.fit(X)
DBSCAN(eps=0.1, min_samples=2)
X["labels"] = model.labels_
X.head()
x | y | labels | |
---|---|---|---|
0 | -0.914107 | 0.456677 | 0 |
1 | 0.733471 | -0.531546 | 1 |
2 | 0.985271 | 0.122425 | 0 |
3 | 1.937545 | 0.308542 | 1 |
4 | -0.740937 | 0.517254 | 0 |
colors = ["blue", "yello"]
cluster_0 = X[X["labels"] == 0]
cluster_1 = X[X["labels"] == 1]
plt.scatter(cluster_0["x"], cluster_0["y"])
plt.scatter(cluster_1["x"], cluster_1["y"], color="orange")
<matplotlib.collections.PathCollection at 0x7f695a8cc7c0>