1

I'm currently trying to build a model using CatBoost. For my parameter tuning, I'm using optuna and cross-validation and pruning the trial checking on the intermediate cross-validation scores. Here there's a minimum example:

def objective(trial):
    param = {
        "iterations": trial.suggest_int("iterations", 50, 5000),
        "boosting_type": trial.suggest_categorical(
            "boosting_type", ["Ordered", "Plain"]
        ),
        "model_shrink_mode": trial.suggest_categorical(
            "model_shrink_mode", ["Constant", "Decreasing"]
        ),
    }
    scores = []
    skf = StratifiedKFold(n_splits=nfolds)
    for k_step, (train_index, test_index) in enumerate(skf.split(X, y)):
        X_Ktrain, y_Ktrain = X.iloc[train_index], y.iloc[train_index]
        X_Kval, y_Kval = X.iloc[test_index], y.iloc[test_index]
        clf = CatBoostClassifier(**param, cat_features=X_Ktrain.select_dtype(exclude="number").columns)
        clf.fit(x_Ktrain, y_Ktrain)
        score = clf.score(X_Kval, y_Kval)
        scores.append(score)
        trial.report(score, step)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.TrialPruned()
    return sum(scores) / len(scores)

My question comes because optuna's pruning is designed to work with algorithms that can learn continually therefore I'm not sure that the way I'm using it makes sense.

0 Answers0