Skip to main content

Multiple Ensemble to find best Hyperparameters

Below is a Python implementation of a machine learning pipeline class that supports LightGBM, XGBoost, CatBoost, and AdaBoost, using RandomizedSearchCV to find the best hyperparameters. The output includes the best hyperparameters for each model in JSON format.



Prerequisites

Install required libraries:

pip install lightgbm xgboost catboost scikit-learn pandas numpy

Code: Machine Learning Pipeline Class

import json
import numpy as np
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score

class MLBoostPipeline:
    def __init__(self, random_state=42, n_iter=20, cv=5):
        self.random_state = random_state
        self.n_iter = n_iter
        self.cv = cv
        self.models = {
            "LightGBM": LGBMClassifier(random_state=self.random_state),
            "XGBoost": XGBClassifier(random_state=self.random_state, use_label_encoder=False, eval_metric='logloss'),
            "CatBoost": CatBoostClassifier(random_state=self.random_state, verbose=0),
            "AdaBoost": AdaBoostClassifier(random_state=self.random_state)
        }
        self.param_grids = {
            "LightGBM": {
                "num_leaves": [20, 31, 40],
                "learning_rate": [0.01, 0.05, 0.1],
                "n_estimators": [50, 100, 200]
            },
            "XGBoost": {
                "n_estimators": [50, 100, 200],
                "max_depth": [3, 6, 10],
                "learning_rate": [0.01, 0.1, 0.2]
            },
            "CatBoost": {
                "iterations": [50, 100, 200],
                "learning_rate": [0.01, 0.1, 0.2],
                "depth": [4, 6, 10]
            },
            "AdaBoost": {
                "n_estimators": [50, 100, 200],
                "learning_rate": [0.5, 1.0, 1.5]
            }
        }
        self.best_params = {}

    def fit_and_tune(self, X_train, y_train):
        for model_name, model in self.models.items():
            print(f"Optimizing {model_name}...")
            param_grid = self.param_grids[model_name]
            search = RandomizedSearchCV(
                estimator=model,
                param_distributions=param_grid,
                n_iter=self.n_iter,
                cv=self.cv,
                scoring='accuracy',
                random_state=self.random_state,
                verbose=1,
                n_jobs=-1
            )
            search.fit(X_train, y_train)
            self.best_params[model_name] = search.best_params_
            print(f"Best parameters for {model_name}: {search.best_params_}")
        return self.best_params

    def evaluate_models(self, X_test, y_test):
        results = {}
        for model_name, model in self.models.items():
            print(f"Evaluating {model_name}...")
            model.set_params(**self.best_params[model_name])
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            results[model_name] = {
                "best_params": self.best_params[model_name],
                "accuracy": accuracy
            }
        return results

    def save_results(self, results, output_file="model_results.json"):
        with open(output_file, "w") as f:
            json.dump(results, f, indent=4)
        print(f"Results saved to {output_file}")

# Example usage
if __name__ == "__main__":
    # Replace with your dataset
    from sklearn.datasets import load_iris
    data = load_iris()
    X = data.data
    y = data.target

    # Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize the pipeline
    pipeline = MLBoostPipeline(random_state=42, n_iter=10, cv=3)

    # Optimize hyperparameters
    best_params = pipeline.fit_and_tune(X_train, y_train)

    # Evaluate models
    results = pipeline.evaluate_models(X_test, y_test)

    # Save results to JSON
    pipeline.save_results(results)

Features of the Code

  1. Supported Models:

    • LightGBM
    • XGBoost
    • CatBoost
    • AdaBoost
  2. Hyperparameter Optimization:

    • Uses RandomizedSearchCV for hyperparameter tuning.
    • Configurable parameter grids for each model.
  3. Evaluation:

    • Evaluates models on test data.
    • Returns the accuracy and best hyperparameters.
  4. JSON Output:

    • Saves the results, including the best hyperparameters and test accuracies, in a JSON file.

Example Output (JSON Format)

{
    "LightGBM": {
        "best_params": {
            "num_leaves": 31,
            "learning_rate": 0.05,
            "n_estimators": 100
        },
        "accuracy": 0.9667
    },
    "XGBoost": {
        "best_params": {
            "n_estimators": 200,
            "max_depth": 6,
            "learning_rate": 0.1
        },
        "accuracy": 0.9667
    },
    "CatBoost": {
        "best_params": {
            "iterations": 200,
            "learning_rate": 0.1,
            "depth": 6
        },
        "accuracy": 0.9667
    },
    "AdaBoost": {
        "best_params": {
            "n_estimators": 100,
            "learning_rate": 1.0
        },
        "accuracy": 0.9333
    }
}

Customization Options

  1. Parameter Grids:

    • Update the self.param_grids dictionary with additional parameters or ranges.
  2. Scoring Metrics:

    • Change scoring='accuracy' to other metrics like f1, roc_auc, etc.
  3. Ensemble Methods:

    • Add more models or custom ensembles if needed.

Comments

Popular posts from this blog

Guidewire Rating detailed explanations

Guidewire Rating is a critical component of Guidewire PolicyCenter that handles the calculation of insurance premiums based on various factors, such as risk characteristics, coverage options, and discounts. Here's a detailed explanation of how Guidewire Rating works, its components, and how it can be configured and extended. 1. Overview of Guidewire Rating Guidewire Rating is responsible for determining the price of an insurance policy by applying rating logic, rules, and algorithms to the insured's data. The rating process involves evaluating factors like the type of coverage, the insured risk (e.g., the driver’s record, vehicle type in auto insurance), and the chosen limits and deductibles. The output is a premium amount that the policyholder must pay. 2. Key Components of Guidewire Rating a. Rating Engine The Rating Engine is the core system within PolicyCenter that processes rating inputs and outputs premium calculations. It interprets rating formulas, applies them to speci...

Java Swing MySql JDBC: insert data into database

Program import javax.swing.*; import java.awt.*; import java.awt.event.*; import java.sql.*; public class insertswing implements ActionListener {   JFrame fr;JPanel po;   JLabel l1,l2,main;   JTextField tf1,tf2;   GridBagConstraints gbc;   GridBagLayout go;   JButton ok,exit; public insertswing(){ fr=new JFrame("New User Data "); Font f=new Font("Verdana",Font.BOLD,24); po=new JPanel(); fr.getContentPane().add(po); fr.setVisible(true); fr.setSize(1024,768); fr.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); po.setBackground(Color.WHITE); go=new GridBagLayout(); gbc=new GridBagConstraints(); po.setLayout(go); main=new JLabel("Enter User Details "); main.setFont(f); l1=new JLabel("Name  :");tf1=new JTextField(20); l2=new JLabel("User Name  :");tf2=new JTextField(20); ok=new JButton("Accept"); exit=new JButton("Exit"); gbc.anchor=GridBagConstraints.NORTH;gbc.gridx=5;gbc.gridy=0; go.s...

JSP and Servlet Form Submission without page refresh show results on the same page using Jquery AJAX

Code Snippet HTML Form  <form id='ajaxform' name='ajaxform' action='ajaxformexample' method='post'>  First Name: <input type='text' id='firstname' name='firstname' size='30' required/><br/>  Last Name: <input type='text' id='lastname' name='lastname' size='30'required/><br/>  Email:  <input type='email' id='emailid' name='emailid' size='30'required/><br/>  Password:  <input type='password' id='pwd' name='pwd' size='30'required/><br/>  <input type='Submit'/>   <div id='content'> </div> </form> the above HTML Form uses post method and url servlet redirect to " ajaxformexample " Javascript Code  var form = $('#ajaxform'); // id of form tag  form.submit(function () {  $.ajax({  ...