I'm currently trying to build a model using CatBoost. For my parameter tuning, I'm using optuna and cross-validation and pruning the trial checking on the intermediate cross-validation scores. Here there's a minimum example:
def objective(trial):
param = {
"iterations": trial.suggest_int("iterations", 50, 5000),
"boosting_type": trial.suggest_categorical(
"boosting_type", ["Ordered", "Plain"]
),
"model_shrink_mode": trial.suggest_categorical(
"model_shrink_mode", ["Constant", "Decreasing"]
),
}
scores = []
skf = StratifiedKFold(n_splits=nfolds)
for k_step, (train_index, test_index) in enumerate(skf.split(X, y)):
X_Ktrain, y_Ktrain = X.iloc[train_index], y.iloc[train_index]
X_Kval, y_Kval = X.iloc[test_index], y.iloc[test_index]
clf = CatBoostClassifier(**param, cat_features=X_Ktrain.select_dtype(exclude="number").columns)
clf.fit(x_Ktrain, y_Ktrain)
score = clf.score(X_Kval, y_Kval)
scores.append(score)
trial.report(score, step)
# Handle pruning based on the intermediate value.
if trial.should_prune():
raise optuna.TrialPruned()
return sum(scores) / len(scores)
My question comes because optuna's pruning is designed to work with algorithms that can learn continually therefore I'm not sure that the way I'm using it makes sense.