+2 votes

Best answer

The CatBoost classifier has a **predict()** function to get the predicted labels of the data and a **predict_proba()** function to get their predicted probabilities. If there are k classes in the data, each record will have k predicted probabilities, indicating that the record belongs to the given classes.

I am applying the CatBoost to a binary classification problem in the example below. Using predicted labels, I am computing the accuracy and using class 1 predicted probabilities, I am computing the AUC-ROC.

from catboost import CatBoostClassifier

from sklearn.metrics import accuracy_score

from sklearn.metrics import roc_auc_score

import numpy as np

def generate_train_test_data():

"""

Randomly generate train test data and labels

"""

np.random.seed(1007)

# train data

feature_count = 10

train_data_count = 500

train_data = np.reshape(np.random.random(train_data_count * feature_count), (train_data_count, feature_count))

train_labels = np.round(np.random.random(train_data_count))

# test data

test_data_count = 100

test_data = np.reshape(np.random.random(test_data_count * feature_count), (test_data_count, feature_count))

test_labels = np.round(np.random.random(test_data_count))

return train_data, train_labels, test_data, test_labels

if __name__ == "__main__":

X_train, y_train, X_test, y_test = generate_train_test_data()

# train the model

model = CatBoostClassifier(verbose=False)

model.fit(X_train, y_train)

# get predicted labels

pred_labels = model.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, pred_labels))

# get predicted probabilities

pred_probs = model.predict_proba(X_test)

print("AUC-ROC: ", roc_auc_score(y_test, pred_probs[:, 1]))