import pandas as pd


df = pd.read_csv("../data/balloons.csv")


df.head()


df.describe()


features = list(df.columns[:-1])
print(features)

['color', 'size', 'act', 'age']


from sklearn.model_selection import train_test_split


from sklearn.preprocessing import OrdinalEncoder


X = df[features]
y = df["inflated"]


print(X.shape)
print(y.shape)

(76, 4)
(76,)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=13, stratify=y)


print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(50, 4)
(26, 4)
(50,)
(26,)


oe = OrdinalEncoder()
oe.fit(X_train)
X_train = oe.transform(X_train)
X_test = oe.transform(X_test)


pd.DataFrame(X_train, columns=features).head()


from sklearn.naive_bayes import CategoricalNB


model = CategoricalNB()
model.fit(X_train, y_train)

CategoricalNB()


classes = model.classes_
classes

array(['F', 'T'], dtype='<U1')


model.class_count_

array([27., 23.])


model.category_count_
model.category_count_

[array([[13., 14.],
        [ 7., 16.]]),
 array([[17., 10.],
        [ 7., 16.]]),
 array([[19.,  8.],
        [ 6., 17.]]),
 array([[ 9., 18.],
        [17.,  6.]])]


from sklearn.metrics import confusion_matrix


y_train_pred = model.predict(X_train)
pd.DataFrame(confusion_matrix(y_train, y_train_pred), columns=classes, index=classes)


y_test_pred = model.predict(X_test)
pd.DataFrame(confusion_matrix(y_test, y_test_pred), columns=classes, index=classes)


from sklearn.pipeline import Pipeline


pipe = Pipeline([("ordinal encoder", OrdinalEncoder()), ("classifier", CategoricalNB())])


pipe.fit(X_train, y_train)

Pipeline(steps=[('ordinal encoder', OrdinalEncoder()),
                ('classifier', CategoricalNB())])


pipe["ordinal encoder"]

OrdinalEncoder()


y_test_pred = pipe.predict(X_test)
pd.DataFrame(confusion_matrix(y_test, y_test_pred), columns=classes, index=classes)

	color	size	act	age	inflated
0	YELLOW	SMALL	STRETCH	ADULT	T
1	YELLOW	SMALL	STRETCH	ADULT	T
2	YELLOW	SMALL	STRETCH	CHILD	F
3	YELLOW	SMALL	DIP	ADULT	F
4	YELLOW	SMALL	DIP	CHILD	F

	color	size	act	age	inflated
count	76	76	76	76	76
unique	2	2	2	2	2
top	YELLOW	SMALL	STRETCH	ADULT	F
freq	40	40	38	38	41

	color	size	act	age
0	1.0	0.0	0.0	0.0
1	1.0	1.0	1.0	1.0
2	0.0	1.0	1.0	1.0
3	1.0	0.0	1.0	0.0
4	1.0	0.0	0.0	0.0

Naivni Bajesov algoritam¶

Podaci¶

Preprocesiranje¶

Treniranje¶

Performanse modela na trening skupu¶

Performase modela na test skupu¶

Pajplajn¶

	color	size	act	age
0	1.0	0.0	0.0	0.0
1	1.0	1.0	1.0	1.0
2	0.0	1.0	1.0	1.0
3	1.0	0.0	1.0	0.0
4	1.0	0.0	0.0	0.0

	color	size	act	age
0	1.0	0.0	0.0	0.0
1	1.0	1.0	1.0	1.0
2	0.0	1.0	1.0	1.0
3	1.0	0.0	1.0	0.0
4	1.0	0.0	0.0	0.0

	color	size	act	age
0	1.0	0.0	0.0	0.0
1	1.0	1.0	1.0	1.0
2	0.0	1.0	1.0	1.0
3	1.0	0.0	1.0	0.0
4	1.0	0.0	0.0	0.0