Source code for rumboost.utility_plotting

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from rumboost.utils import function_2d, weights_to_plot_v2, get_asc, non_lin_function, data_leaf_value, get_weights
from rumboost.utility_smoothing import stairs_to_pw, find_feat_best_fit, fit_func, monotone_spline, mean_monotone_spline


[docs]
def plot_2d(model, feature1: str, feature2: str, min1: int, max1: int, min2: int, max2: int, save_figure: bool = False, utility_names: list[str] = ['Walking', 'Cycling', 'Public Transport', 'Driving'], num_points = 1000):
    '''
    Plot a 2nd order feature interaction as a contour plot.

    Parameters
    ----------
    model : RUMBoost
        A RUMBoost object.
    feature1 : str
        Name of feature 1.
    feature2 : str
        Name of feature 2.
    min1 : int
        Minimum value of feature 1.
    max1 : int
        Maximum value of feature 1.
    min2 : int
        Minimum value of feature 2.
    max2 : int
        Maximum value of feature 2.
    save_figure : bool, optional (default = False)
        If true, save the figure as a png file
    utility_names : list[str]
        List of the alternative names
    num_points : int, optional (default=1000)
        The number of points per axis. The total number of points is num_points**2.

    '''
    _, weights_2d, _ = get_weights(model = model)
    weights_ordered = weights_to_plot_v2(model=model)

    name1 = feature1 + "-" + feature2
    name2 = feature2 + "-" + feature1

    x_vect = np.linspace(min1, max1, num_points)
    y_vect = np.linspace(min2, max2, num_points)

    #to generalise
    utility_names = ['Walking', 'Cycling', 'PT', 'Driving']
    tex_fonts = {
            # Use LaTeX to write all text
            # "text.usetex": True, 
            # "font.family": "serif",
            # "font.serif": "Computer Modern Roman",
            # Use 14pt font in plots, to match 10pt font in document
            "axes.labelsize": 7,
            "axes.linewidth":0.5,
            "axes.labelpad": 1,
            "font.size": 7,
            # Make the legend/label fonts a little smaller
            "legend.fontsize": 6,
            "legend.fancybox": False,
            "legend.edgecolor": "inherit",
            'legend.borderaxespad': 0.4,
            'legend.borderpad': 0.4,
            "xtick.labelsize": 6,
            "ytick.labelsize": 6,
            "xtick.major.pad": 0.5,
            "ytick.major.pad": 0.5,
            "grid.linewidth": 0.5,
            "lines.linewidth": 0.8
        }
    sns.set_theme(font_scale=1, rc=tex_fonts)
    #sns.set_context(tex_fonts)
    sns.set_style("whitegrid")
    # plt.rcParams.update({
    #     # "text.usetex": True,
    #     "font.family": "serif"
    #     #"font.sans-serif": "Computer Modern Roman",
    # })

    for u in weights_2d.Utility.unique():
        weights_2d_util = weights_2d[weights_2d.Utility==u]
        contour_plot1 = function_2d(weights_2d_util[weights_2d_util.Feature==name1], x_vect, y_vect)
        contour_plot2 = function_2d(weights_2d_util[weights_2d_util.Feature==name2], y_vect, x_vect)

        contour_plot = contour_plot1 + contour_plot2.T

        if np.sum(contour_plot) == 0:
            continue

        if (feature1 in weights_ordered[str(u)].keys()) and (feature2 in weights_ordered[str(u)].keys()):
            _, feature1_alone = non_lin_function(weights_ordered[str(u)][feature1], min1, max1, num_points)
            feature1_grid = np.repeat(feature1_alone, num_points).reshape((num_points, num_points))
            contour_plot += feature1_grid

            _, feature2_alone = non_lin_function(weights_ordered[str(u)][feature2], min2, max2, num_points)
            feature2_grid = np.repeat(feature2_alone, num_points).reshape((num_points, num_points)).T
            contour_plot += feature2_grid

        contour_plot -= contour_plot.max()

        colors = ['#F5E5E2', '#DF7057', '#A31D04']
        customPalette = sns.set_palette(sns.color_palette(colors, as_cmap=True))

        if np.sum(contour_plot) != 0:
            X, Y = np.meshgrid(x_vect, y_vect)
            fig, axes = plt.subplots(figsize=(3.49,3), layout='constrained', dpi=1000)

            res = num_points

            c_plot = axes.contourf(X, Y, contour_plot.T, levels=res, linewidths=0, cmap=customPalette, vmin=-12, vmax=0)


            #axes.set_title(f'{utility_names[int(u)]}')
            axes.set_xlabel(f'{feature1} [h]')
            axes.set_ylabel(f'{feature2}')

            cbar = fig.colorbar(c_plot, ax = axes, ticks=[-10, -8, -6, -4, -2, 0])
            cbar.ax.set_ylabel('Utility')

            if save_figure:
                plt.savefig('Figures/FI RUMBoost/age_travel_time_{}.png'.format(utility_names[int(u)]))

            plt.show()



[docs]
def plot_parameters(model, X, utility_names, Betas = None,  model_unconstrained = None, 
                    with_pw = False, save_figure=False, asc_normalised = False, with_asc = False, 
                    with_cat = True, only_tt = False, only_1d = False, with_fit = False, 
                    fit_all = True, technique = 'weighted_data', data_sep = False, sm_tt_cost=False,
                    save_file=''):
    """
    Plot the non linear impact of parameters on the utility function. When specified, unconstrained parameters
    and parameters from a RUM model can be added to the plot.

    Parameters
    ----------
    model : RUMBoost
        A RUMBoost object.
    X : pandas dataframe
        Features used to train the model, in a pandas dataframe.
    utility_name : dict
        Dictionary mapping utilities indices to their names.
    Betas : list, optional (default = None)
        List of beta parameters value from a RUM. They should be listed in the same order as 
        in the RUMBoost model.
    model_unconstrained : LightGBM model, optional (default = None)
        The unconstrained model. Must be trained and compatible with dump_model().
    with_pw : bool, optional (default = False)
        If the piece-wise function should be included in the graph.
    save_figure : bool, optional (default = False)
        If True, save the plot as a png file.
    asc_normalised : bool, optional (default = False)
        If True, scale down utilities to be zero at the y axis.
    with_asc : bool, optional (default = False)
        If True, add the ASCs to all graphs (one is normalised, and asc_normalised must be True).
    with_cat : bool, optional (default = True)
        If False, categorical features are not plotted.
    only_tt : bool, optional (default = False)
        If True, plot only travel time and distance.
    only_1d : bool, optional (default = False)
        If True, plot only the features separately.
    with_fit : bool, optional (default = False)
        If True, fit the data with simple functions to approximate the step functions.
    fit_all : bool, optional (default = True)
        If False, plot only the best fitting function.
    technique : str, optional (default = 'weighted_data')
        The technique for data sampling in the function fitting.
    data_sep : bool, optional (default = False)
        If True, split the data to fit subsets of data.
    sm_tt_cost : bool, optional (default = False)
        If True, plot only the swissmetro travel time and cost on the same figure.
    save_file : str, optional (default='')
        The name to save the figure with.
    """
    weights_arranged = weights_to_plot_v2(model)

    if with_asc:
        ASCs = get_asc(weights_arranged)

    tex_fonts = {
        # Use LaTeX to write all text
        # "text.usetex": True, 
        # "font.family": "serif",
        # "font.serif": "Computer Modern Roman",
        # Use 14pt font in plots, to match 10pt font in document
        "axes.labelsize": 7,
        "axes.linewidth":0.5,
        "axes.labelpad": 1,
        "font.size": 7,
        # Make the legend/label fonts a little smaller
        "legend.fontsize": 6,
        "legend.fancybox": False,
        "legend.edgecolor": "inherit",
        'legend.borderaxespad': 0.4,
        'legend.borderpad': 0.4,
        "xtick.labelsize": 6,
        "ytick.labelsize": 6,
        "xtick.major.pad": 0.5,
        "ytick.major.pad": 0.5,
        "grid.linewidth": 0.5,
        "lines.linewidth": 0.8
    }
    sns.set_theme(font_scale=1, rc=tex_fonts)
    #sns.set_context(tex_fonts)
    sns.set_style("whitegrid")
    # plt.rcParams.update({
    #     # "text.usetex": True,
    #     "font.family": "serif"
    #     #"font.sans-serif": "Computer Modern Roman",
    # })

    if sm_tt_cost:
        #plot for travel time on one figure
        plt.figure(figsize=(3.49, 3.49), dpi=1000)
        x_w, non_lin_func_rail = non_lin_function(weights_arranged['0']['TRAIN_TT'], 0, 600, 10000)
        if asc_normalised:
            non_lin_func_rail = [n - non_lin_func_rail[0] for n in non_lin_func_rail]
        if with_asc:
            non_lin_func_rail = [n + ASCs[0] for n in non_lin_func_rail]

        x_c, non_lin_func_SM = non_lin_function(weights_arranged['1']['SM_TT'], 0, 600, 10000)
        if asc_normalised:
            non_lin_func_SM = [n - non_lin_func_SM[0] for n in non_lin_func_SM]
        if with_asc:
            non_lin_func_SM = [n + ASCs[1] for n in non_lin_func_SM]

        x_d, non_lin_func_driving = non_lin_function(weights_arranged['2']['CAR_TT'], 0, 600, 10000)
        if asc_normalised:
            non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
        if with_asc:
            non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]

        sns.lineplot(x=x_w/60, y=non_lin_func_rail, color='g', label='Rail')
        sns.lineplot(x=x_c/60, y=non_lin_func_SM, color='#6b8ba4', label='Swissmetro')
        sns.lineplot(x=x_d/60, y=non_lin_func_driving, color='orange', label='Driving')

        #plt.title('Influence of alternative travel time on the utility function', fontdict={'fontsize':  16})
        plt.xlabel('Travel time [h]')
        plt.ylabel('Utility')

        plt.tight_layout()

        if save_figure:
            plt.savefig('Figures/RUMBoost/SwissMetro/travel_time.png')

        #plot for travel time on one figure
        plt.figure(figsize=(3.49, 3.49), dpi=1000)
        x_w, non_lin_func_rail = non_lin_function(weights_arranged['0']['TRAIN_COST'], 0, 500, 10000)
        if asc_normalised:
            non_lin_func_rail = [n - non_lin_func_rail[0] for n in non_lin_func_rail]
        if with_asc:
            non_lin_func_rail = [n + ASCs[0] for n in non_lin_func_rail]

        x_c, non_lin_func_SM = non_lin_function(weights_arranged['1']['SM_COST'], 0, 500, 10000)
        if asc_normalised:
            non_lin_func_SM = [n - non_lin_func_SM[0] for n in non_lin_func_SM]
        if with_asc:
            non_lin_func_SM = [n + ASCs[1] for n in non_lin_func_SM]

        x_d, non_lin_func_driving = non_lin_function(weights_arranged['2']['CAR_CO'], 0, 500, 10000)
        if asc_normalised:
            non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
        if with_asc:
            non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]

        sns.lineplot(x=x_w, y=non_lin_func_rail, color='g', label='Rail')
        sns.lineplot(x=x_c, y=non_lin_func_SM, color='#6b8ba4', label='Swissmetro')
        sns.lineplot(x=x_d, y=non_lin_func_driving, color='orange', label='Driving')

        #plt.title('Influence of alternative cost on the utility function', fontdict={'fontsize':  16})

        plt.xlabel('Cost [chf]')
        plt.ylabel('Utility')

        plt.tight_layout()

        if save_figure:
            plt.savefig('Figures/RUMBoost/SwissMetro/cost.png')

    if not only_1d:
        #plot for travel time on one figure
        plt.figure(figsize=(3.49, 3.49), dpi=1000)
        x_w, non_lin_func_walk = non_lin_function(weights_arranged['0']['dur_walking'], 0, 2.5, 10000)
        if asc_normalised:
            non_lin_func_walk = [n - non_lin_func_walk[0] for n in non_lin_func_walk]
        if with_asc:
            non_lin_func_walk = [n + ASCs[0] for n in non_lin_func_walk]

        x_c, non_lin_func_cycle = non_lin_function(weights_arranged['1']['dur_cycling'], 0, 2.5, 10000)
        if asc_normalised:
            non_lin_func_cycle = [n - non_lin_func_cycle[0] for n in non_lin_func_cycle]
        if with_asc:
            non_lin_func_cycle = [n + ASCs[1] for n in non_lin_func_cycle]

        x_ptb, non_lin_func_pt_bus = non_lin_function(weights_arranged['2']['dur_pt_bus'], 0, 2.5, 10000)
        if asc_normalised:
            non_lin_func_pt_bus = [n - non_lin_func_pt_bus[0] for n in non_lin_func_pt_bus]
        if with_asc:
            non_lin_func_pt_bus = [n + ASCs[2] for n in non_lin_func_pt_bus]

        x_ptr, non_lin_func_pt_rail = non_lin_function(weights_arranged['2']['dur_pt_rail'], 0, 2.5, 10000)
        if asc_normalised:
            non_lin_func_pt_rail = [n - non_lin_func_pt_rail[0] for n in non_lin_func_pt_rail]
        if with_asc:
            non_lin_func_pt_rail = [n + ASCs[2] for n in non_lin_func_pt_rail]

        x_d, non_lin_func_driving = non_lin_function(weights_arranged['3']['dur_driving'], 0, 2.5, 10000)
        if asc_normalised:
            non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
        if with_asc:
            non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]

        sns.lineplot(x=x_w, y=non_lin_func_walk, color='b', label='Walking')
        sns.lineplot(x=x_c, y=non_lin_func_cycle, color='r', label='Cycling')
        sns.lineplot(x=x_ptb, y=non_lin_func_pt_bus, color='#02590f', label='PT Bus')
        sns.lineplot(x=x_ptr, y=non_lin_func_pt_rail, color='g', label='PT Rail')
        sns.lineplot(x=x_d, y=non_lin_func_driving, color='orange', label='Driving')


        #plt.title('Influence of alternative travel time on the utility function', fontdict={'fontsize':  16})
        plt.xlabel('Travel time [h]')
        plt.ylabel('Utility')

        plt.tight_layout()

        if save_figure:
            plt.savefig('Figures/RUMBoost/LPMC/travel_time.png')
        
        #plot for distance on one figure
        plt.figure(figsize=(3.49, 3.49), dpi=1000)

        x_pt, non_lin_func_pt = non_lin_function(weights_arranged['2']['cost_transit'], 0, 10, 10000)
        if asc_normalised:
            non_lin_func_pt = [n - non_lin_func_pt[0] for n in non_lin_func_pt]
        if with_asc:
            non_lin_func_pt = [n + ASCs[2] for n in non_lin_func_pt]

        x_d, non_lin_func_driving = non_lin_function(weights_arranged['3']['cost_driving_fuel'], 0, 10, 10000)
        if asc_normalised:
            non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
        if with_asc:
            non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]

        # sns.lineplot(x=x_w, y=non_lin_func_walk, lw=2, color='#fab9a5', label='Walking')
        # sns.lineplot(x=x_c, y=non_lin_func_cycle, lw=2, color='#B65FCF', label='Cycling')
        sns.lineplot(x=x_pt, y=non_lin_func_pt, color='g', label='PT')
        sns.lineplot(x=x_d, y=non_lin_func_driving, color='orange', label='Driving')


        #plt.title('Influence of straight line distance on the utility function', fontdict={'fontsize':  16})
        plt.xlabel('Cost [£]')
        plt.ylabel('Utility')

        plt.tight_layout()
        
        if save_figure:
            plt.savefig('Figures/RUMBoost/LPMC/cost.png')

        plt.show()

        plt.figure(figsize=(3.49, 3.49), dpi=1000)
        x_w, non_lin_func_walk = non_lin_function(weights_arranged['0']['age'], 0, 100, 10000)
        if asc_normalised:
            non_lin_func_walk = [n - non_lin_func_walk[0] for n in non_lin_func_walk]
        if with_asc:
            non_lin_func_walk = [n + ASCs[0] for n in non_lin_func_walk]

        x_c, non_lin_func_cycle = non_lin_function(weights_arranged['1']['age'], 0, 100, 10000)
        if asc_normalised:
            non_lin_func_cycle = [n - non_lin_func_cycle[0] for n in non_lin_func_cycle]
        if with_asc:
            non_lin_func_cycle = [n + ASCs[1] for n in non_lin_func_cycle]

        x_pt, non_lin_func_pt = non_lin_function(weights_arranged['2']['age'], 0, 100, 10000)
        if asc_normalised:
            non_lin_func_pt = [n - non_lin_func_pt[0] for n in non_lin_func_pt]
        if with_asc:
            non_lin_func_pt = [n + ASCs[2] for n in non_lin_func_pt]

        x_d, non_lin_func_driving = non_lin_function(weights_arranged['3']['age'], 0, 100, 10000)
        if asc_normalised:
            non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
        if with_asc:
            non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]

        sns.lineplot(x=x_w, y=non_lin_func_walk, color='b', label='Walking')
        sns.lineplot(x=x_c, y=non_lin_func_cycle, color='r', label='Cycling')
        sns.lineplot(x=x_pt, y=non_lin_func_pt, color='g', label='PT')
        sns.lineplot(x=x_d, y=non_lin_func_driving, color='orange', label='Driving')


        #plt.title('Influence of straight line distance on the utility function', fontdict={'fontsize':  16})
        plt.xlabel('Age')
        plt.ylabel('Utility')

        plt.tight_layout()

        if save_figure:
            plt.savefig('Figures/RUMBoost/LPMC/age.png')

        plt.show()

        plt.figure(figsize=(3.49, 3.49), dpi=1000)
        x_w, non_lin_func_walk = non_lin_function(weights_arranged['0']['start_time_linear'], 0, 24, 10000)
        if asc_normalised:
            non_lin_func_walk = [n - non_lin_func_walk[0] for n in non_lin_func_walk]
        if with_asc:
            non_lin_func_walk = [n + ASCs[0] for n in non_lin_func_walk]

        x_c, non_lin_func_cycle = non_lin_function(weights_arranged['1']['start_time_linear'], 0, 24, 10000)
        if asc_normalised:
            non_lin_func_cycle = [n - non_lin_func_cycle[0] for n in non_lin_func_cycle]
        if with_asc:
            non_lin_func_cycle = [n + ASCs[1] for n in non_lin_func_cycle]

        x_pt, non_lin_func_pt = non_lin_function(weights_arranged['2']['start_time_linear'], 0, 24, 10000)
        if asc_normalised:
            non_lin_func_pt = [n - non_lin_func_pt[0] for n in non_lin_func_pt]
        if with_asc:
            non_lin_func_pt = [n + ASCs[2] for n in non_lin_func_pt]

        x_d, non_lin_func_driving = non_lin_function(weights_arranged['3']['start_time_linear'], 0, 24, 10000)
        if asc_normalised:
            non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
        if with_asc:
            non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]

        sns.lineplot(x=x_w, y=non_lin_func_walk, color='b', label='Walking')
        sns.lineplot(x=x_c, y=non_lin_func_cycle, color='r', label='Cycling')
        sns.lineplot(x=x_pt, y=non_lin_func_pt, color='g', label='PT')
        sns.lineplot(x=x_d, y=non_lin_func_driving, color='orange', label='Driving')


        #plt.title('Influence of straight line distance on the utility function', fontdict={'fontsize':  16})
        plt.xlabel('Departure time')
        plt.ylabel('Utility')

        plt.tight_layout()

        if save_figure:
            plt.savefig('Figures/RUMBoost/LPMC/departure_time.png')

        plt.show()

    #for all features parameters
    if not only_tt:
        for u in weights_arranged:
            for i, f in enumerate(weights_arranged[u]):

                #create nonlinear plot
                x, non_lin_func = non_lin_function(weights_arranged[u][f], 0, 1.05*max(X[f]), 10000)

                if asc_normalised:
                    val_0 = non_lin_func[0]
                    non_lin_func = [n - val_0 for n in non_lin_func]

                if with_asc:
                    non_lin_func = [n + ASCs[int(u)] for n in non_lin_func]
                
                #plot parameters
                plt.figure(figsize=(3.49, 2.09), dpi=1000)
                #plt.title('Influence of {} on the predictive function ({} utility)'.format(f, utility_names[u]), fontdict={'fontsize':  16})
                plt.ylabel('{} utility'.format(utility_names[u]))

                                    
                if 'dur' in f:
                    plt.xlabel('{} [h]'.format(f))
                elif 'TIME' in f:
                    plt.xlabel('{} [min]'.format(f))
                elif 'cost' in f:
                    plt.xlabel('{} [£]'.format(f))
                elif 'distance' in f:
                    plt.xlabel('{} [km]'.format(f))
                elif 'CO' in f:
                    plt.xlabel('{} [chf]'.format(f))
                else:
                    plt.xlabel('{}'.format(f))

                sns.lineplot(x=x, y=non_lin_func, color='k', label='RUMBoost')
                
                plt.xlim([0-0.05*np.max(X[f]), np.max(X[f])*1.05])
                plt.ylim([np.min(non_lin_func) - 0.05*(np.max(non_lin_func)-np.min(non_lin_func)), np.max(non_lin_func) + 0.05*(np.max(non_lin_func)-np.min(non_lin_func))])

                plt.tight_layout()
                    
                if save_figure:
                    if with_fit:
                        plt.savefig('Figures/{}{} utility, {} feature {} technique.png'.format(utility_names[u], f, technique))
                    else:
                        plt.savefig('Figures/{}{} utility, {} feature.png'.format(save_file, utility_names[u], f))

                plt.show()



[docs]
def plot_market_segm(model, X, asc_normalised: bool = True, utility_names: list[str] = ['Walking', 'Cycling', 'Public Transport', 'Driving']):
    '''
    Plot the market segmentation.

    Parameters
    ----------
    model : RUMBoost
        A RUMBoost object.
    X : pandas DataFrame
        Training data.
    asc_normalised : bool, optional (default = False)
        If True, scale down utilities to be zero at the y axis.
    utility_names : list[str], optional (default = ['Walking', 'Cycling', 'Public Transport', 'Driving'])
        Names of utilities.
    
    '''
    
    sns.set_theme()

    weights_arranged = weights_to_plot_v2(model, market_segm=True)
    label = {0:'Weekdays',1:'Weekends'}
    color = ['r', 'b']

    for u in weights_arranged:
        plt.figure(figsize=(10, 6))

        for i, f in enumerate(weights_arranged[u]):

            #create nonlinear plot
            x, non_lin_func = non_lin_function(weights_arranged[u][f], 0, 1.05*max(X[f]), 10000)

            if asc_normalised:
                val_0 = non_lin_func[0]
                non_lin_func = [n - val_0 for n in non_lin_func]
            
            sns.lineplot(x=x, y=non_lin_func, lw=2, color=color[i], label=label[i])

        plt.title('Impact of travel time in weekdays and weekends on {} utility'.format(utility_names[u]), fontdict={'fontsize':  16})
        plt.ylabel('{} utility'.format(utility_names[u]))
        plt.xlabel('Travel time [h]')
        plt.show()       



[docs]
def plot_util(model, data_train, points=10000):
    '''
    Plot the raw utility functions of all features. This is done directly from the predict attribute of lightgbm.Boosters.

    Parameters
    ----------
    model : RUMBoost
        A RUMBoost object.
    data_train : pandas Dataframe
        The full training dataset.
    points : int, optional (default = 10000)
        The number of points used to draw the line plot.

    '''
    sns.set_theme()
    for j, struct in enumerate(model.rum_structure):
        booster = model.boosters[j]
        for i, f in enumerate(struct['columns']):
            xin = np.zeros(shape = (points, len(struct['columns'])))
            xin[:, i] = np.linspace(0,1.05*max(data_train[f]),points)
            
            ypred = booster.predict(xin)
            plt.figure()
            plt.plot(np.linspace(0,1.05*max(data_train[f]),points), ypred)
            plt.title(f)



[docs]
def plot_util_pw(model, data_train, points = 10000):
    '''
    Plot the piece-wise utility function

    Parameters
    ----------
    model : RUMBoost
        A RUMBoost object.
    data_train : pandas Dataframe
        The full training dataset.
    points : int, optional (default = 10000)
        The number of points used to draw the line plot.
        
    '''
    features = data_train.columns
    data_to_transform = {}
    for f in features:
        xi = np.linspace(0, 1.05*max(data_train[f]), points)
        data_to_transform[f] = xi

    data_to_transform = pd.DataFrame(data_to_transform)

    pw_func = stairs_to_pw(model, data_train, data_to_transform, util_for_plot = True)

    return pw_func



[docs]
def plot_spline(model, data_train, spline_collection, utility_names, mean_splines = False, x_knots_dict = None, save_fig = False, lpmc_tt_cost=False, sm_tt_cost=False, save_file=''):
    '''
    Plot the spline interpolation for all utilities interpolated.

    Parameters
    ----------
    model : RUMBoost
        A RUMBoost object.
    data_train : pandas Dataframe
        The full training dataset.
    spline_collection : dict
        A dictionary containing the optimal number of splines for each feature interpolated of each utility
    mean_splines : bool, optional (default = False)
        Must be True if the splines are computed at the mean distribution of data for stairs.
    x_knots_dict : dict
        A dictionary in the form of {utility: {attribute: x_knots}} where x_knots are the spline knots for the corresponding 
        utility and attributes
    '''
    #get weights ordered by features
    weights = weights_to_plot_v2(model)
    tex_fonts = {
            # Use LaTeX to write all text
            # "text.usetex": True, 
            # "font.family": "serif",
            # "font.serif": "Computer Modern Roman",
            # Use 14pt font in plots, to match 10pt font in document
            "axes.labelsize": 7,
            "axes.linewidth":0.5,
            "axes.labelpad": 1,
            "font.size": 7,
            # Make the legend/label fonts a little smaller
            "legend.fontsize": 6,
            "legend.fancybox": False,
            "legend.edgecolor": "inherit",
            'legend.borderaxespad': 0.4,
            'legend.borderpad': 0.4,
            "xtick.labelsize": 6,
            "ytick.labelsize": 6,
            "xtick.major.pad": 0.5,
            "ytick.major.pad": 0.5,
            "grid.linewidth": 0.5,
            "lines.linewidth": 0.8,
            'scatter.edgecolors': 'none'
        }
    sns.set_theme(font_scale=1, rc=tex_fonts)
    #sns.set_context(tex_fonts)
    sns.set_style("whitegrid")
    # plt.rcParams.update({
    #     # "text.usetex": True,
    #     "font.family": "serif"
    #     #"font.sans-serif": "Computer Modern Roman",
    # })

    if lpmc_tt_cost:
        x_plot_w, y_plot_w = data_leaf_value(data_train['dur_walking'], weights['0']['dur_walking'], 'data_weighted')
        y_plot_norm_w = [y - y_plot_w[0] for y in y_plot_w]
        x_spline_w = np.linspace(np.min(data_train['dur_walking']), np.max(data_train['dur_walking']), num=10000)
        x_knots_temp_w, y_knots_w = data_leaf_value(x_knots_dict['0']['dur_walking'], weights['0']['dur_walking'])
        _, y_spline_w, _, x_knot_w, y_knot_w = monotone_spline(x_spline_w, weights['0']['dur_walking'], num_splines=spline_collection['0']['dur_walking'], x_knots=x_knots_temp_w, y_knots=y_knots_w)
        y_spline_norm_w = [y - y_plot_w[0] for y in y_spline_w]
        y_knot_norm_w = [y - y_plot_w[0] for y in y_knot_w]

        

        
        plt.figure(figsize=(3.49, 2.09), dpi=1000)

        #data
        plt.scatter(x_plot_w, y_plot_norm_w, color='b', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_w, y_spline_norm_w, color='b', label=f'Walking travel time ({spline_collection["0"]["dur_walking"]} splines)')

        #knots position
        plt.scatter(x_knot_w, y_knot_norm_w, color='k', s=1)

        x_plot_c, y_plot_c = data_leaf_value(data_train['dur_cycling'], weights['1']['dur_cycling'], 'data_weighted')
        y_plot_norm_c = [y - y_plot_c[0] for y in y_plot_c]
        x_spline_c = np.linspace(np.min(data_train['dur_cycling']), np.max(data_train['dur_cycling']), num=10000)
        x_knots_temp_c, y_knots_c = data_leaf_value(x_knots_dict['1']['dur_cycling'], weights['1']['dur_cycling'])
        _, y_spline_c, _, x_knot_c, y_knot_c = monotone_spline(x_spline_c, weights['1']['dur_cycling'], num_splines=spline_collection['1']['dur_cycling'], x_knots=x_knots_temp_c, y_knots=y_knots_c)
        y_spline_norm_c = [y - y_plot_c[0] for y in y_spline_c]
        y_knot_norm_c = [y - y_plot_c[0] for y in y_knot_c]

        #data
        plt.scatter(x_plot_c, y_plot_norm_c, color='r', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_c, y_spline_norm_c, color='r', label=f'Cycling travel time ({spline_collection["1"]["dur_cycling"]} splines)')

        #knots position
        plt.scatter(x_knot_c, y_knot_norm_c, color='k', s=1)

        x_plot_p, y_plot_p = data_leaf_value(data_train['dur_pt_rail'], weights['2']['dur_pt_rail'], 'data_weighted')
        y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
        x_spline_p = np.linspace(np.min(data_train['dur_pt_rail']), np.max(data_train['dur_pt_rail']), num=10000)
        x_knots_temp_p, y_knots_p = data_leaf_value(x_knots_dict['2']['dur_pt_rail'], weights['2']['dur_pt_rail'])
        _, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(x_spline_p, weights['2']['dur_pt_rail'], num_splines=spline_collection['2']['dur_pt_rail'], x_knots=x_knots_temp_p, y_knots=y_knots_p)
        y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
        y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]

        #data
        plt.scatter(x_plot_p, y_plot_norm_p, color='g', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_p, y_spline_norm_p, color='g', label=f'Rail travel time ({spline_collection["2"]["dur_pt_rail"]} splines)')

        #knots position
        plt.scatter(x_knot_p, y_knot_norm_p, color='k', s=1)

        x_plot_d, y_plot_d = data_leaf_value(data_train['dur_driving'], weights['3']['dur_driving'], 'data_weighted')
        y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
        x_spline_d = np.linspace(np.min(data_train['dur_driving']), np.max(data_train['dur_driving']), num=10000)
        x_knots_temp_d, y_knots_d = data_leaf_value(x_knots_dict['3']['dur_driving'], weights['3']['dur_driving'])
        _, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(x_spline_d, weights['3']['dur_driving'], num_splines=spline_collection['3']['dur_driving'], x_knots=x_knots_temp_d, y_knots=y_knots_d)
        y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
        y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]

        #data
        plt.scatter(x_plot_d, y_plot_norm_d, color='orange', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_d, y_spline_norm_d, color='orange', label=f'Driving travel time ({spline_collection["3"]["dur_driving"]} splines)')

        #knots position
        plt.scatter(x_knot_d, y_knot_norm_d, color='k', s=1, label='Knots')

        #plt.title('Spline interpolation of {}'.format(f))
        plt.ylabel('Utility')
        plt.xlim([0, 5])
        plt.xlabel('Travel time  [h]')
        plt.legend()
        plt.tight_layout()
        if save_fig:
            plt.savefig("Figures/RUMBoost/LPMC/splines_travel_time.png")
        plt.show()

        plt.figure(figsize=(3.49, 2.09), dpi=1000)

        x_plot_p, y_plot_p = data_leaf_value(data_train['cost_transit'], weights['2']['cost_transit'], 'data_weighted')
        y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
        x_spline_p = np.linspace(np.min(data_train['cost_transit']), np.max(data_train['cost_transit']), num=10000)
        x_knots_temp_p, y_knots_p = data_leaf_value(x_knots_dict['2']['cost_transit'], weights['2']['cost_transit'])
        _, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(x_spline_p, weights['2']['cost_transit'], num_splines=spline_collection['2']['cost_transit'], x_knots=x_knots_temp_p, y_knots=y_knots_p)
        y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
        y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]

        #data
        plt.scatter(x_plot_p, y_plot_norm_p, color='g', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_p, y_spline_norm_p, color='g', label=f'PT cost ({spline_collection["2"]["cost_transit"]} splines)')

        #knots position
        plt.scatter(x_knot_p, y_knot_norm_p, color='k', s=1)

        x_plot_d, y_plot_d = data_leaf_value(data_train['cost_driving_fuel'], weights['3']['cost_driving_fuel'], 'data_weighted')
        y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
        x_spline_d = np.linspace(np.min(data_train['cost_driving_fuel']), np.max(data_train['cost_driving_fuel']), num=10000)
        x_knots_temp_d, y_knots_d = data_leaf_value(x_knots_dict['3']['cost_driving_fuel'], weights['3']['cost_driving_fuel'])
        _, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(x_spline_d, weights['3']['cost_driving_fuel'], num_splines=spline_collection['3']['cost_driving_fuel'], x_knots=x_knots_temp_d, y_knots=y_knots_d)
        y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
        y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]

        #data
        plt.scatter(x_plot_d, y_plot_norm_d, color='orange', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_d, y_spline_norm_d, color='orange', label=f'Driving cost ({spline_collection["3"]["cost_driving_fuel"]} splines)')

        #knots position
        plt.scatter(x_knot_d, y_knot_norm_d, color='k', s=1, label='Knots')

        #plt.title('Spline interpolation of {}'.format(f))
        plt.ylabel('Utility')
        plt.xlim([0, 10])
        plt.xlabel('Cost [£]')
        plt.legend()
        plt.tight_layout()
        if save_fig:
            plt.savefig("Figures/RUMBoost/LPMC/splines_cost.png")
        plt.show()

    if sm_tt_cost:

        x_plot_p, y_plot_p = data_leaf_value(data_train['TRAIN_TT'], weights['0']['TRAIN_TT'], 'data_weighted')
        y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
        x_spline_p = np.linspace(np.min(data_train['TRAIN_TT']), np.max(data_train['TRAIN_TT']), num=10000)
        x_knots_temp_p, y_knots_p = data_leaf_value(x_knots_dict['0']['TRAIN_TT'], weights['0']['TRAIN_TT'])
        _, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(x_spline_p, weights['0']['TRAIN_TT'], num_splines=spline_collection['0']['TRAIN_TT'], x_knots=x_knots_temp_p, y_knots=y_knots_p)
        y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
        y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]
        
        plt.figure(figsize=(3.49, 2.09), dpi=1000)
        #data
        plt.scatter(x_plot_p/60, y_plot_norm_p, color='g', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_p/60, y_spline_norm_p, color='g', label=f'Rail travel time ({spline_collection["0"]["TRAIN_TT"]} splines)')

        #knots position
        plt.scatter(x_knot_p/60, y_knot_norm_p, color='k', s=1)

        x_plot_s, y_plot_s = data_leaf_value(data_train['SM_TT'], weights['1']['SM_TT'], 'data_weighted')
        y_plot_norm_s = [y - y_plot_s[0] for y in y_plot_s]
        x_spline_s = np.linspace(np.min(data_train['SM_TT']), np.max(data_train['SM_TT']), num=10000)
        x_knots_temp_s, y_knots_s = data_leaf_value(x_knots_dict['1']['SM_TT'], weights['1']['SM_TT'])
        _, y_spline_s, _, x_knot_s, y_knot_s = monotone_spline(x_spline_s, weights['1']['SM_TT'], num_splines=spline_collection['1']['SM_TT'], x_knots=x_knots_temp_s, y_knots=y_knots_s)
        y_spline_norm_s = [y - y_plot_s[0] for y in y_spline_s]
        y_knot_norm_s = [y - y_plot_s[0] for y in y_knot_s]
        
        #data
        plt.scatter(x_plot_s/60, y_plot_norm_s, color='#6b8ba4', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_s/60, y_spline_norm_s, color='#6b8ba4', label=f'SwissMetro travel time ({spline_collection["1"]["SM_TT"]} splines)')

        #knots position
        plt.scatter(x_knot_s/60, y_knot_norm_s, color='k', s=1)

        x_plot_d, y_plot_d = data_leaf_value(data_train['CAR_TT'], weights['2']['CAR_TT'], 'data_weighted')
        y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
        x_spline_d = np.linspace(np.min(data_train['CAR_TT']), np.max(data_train['CAR_TT']), num=10000)
        x_knots_temp_d, y_knots_d = data_leaf_value(x_knots_dict['2']['CAR_TT'], weights['2']['CAR_TT'])
        _, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(x_spline_d, weights['2']['CAR_TT'], num_splines=spline_collection['2']['CAR_TT'], x_knots=x_knots_temp_d, y_knots=y_knots_d)
        y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
        y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]

        #data
        plt.scatter(x_plot_d/60, y_plot_norm_d, color='orange', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_d/60, y_spline_norm_d, color='orange', label=f'Driving travel time ({spline_collection["2"]["CAR_TT"]} splines)')

        #knots position
        plt.scatter(x_knot_d/60, y_knot_norm_d, color='k', s=1, label='Knots')

        #plt.title('Spline interpolation of {}'.format(f))
        plt.ylabel('Utility')
        plt.xlim([0, 10])
        plt.xlabel('Travel time [h]')
        plt.legend()
        plt.tight_layout()
        if save_fig:
            plt.savefig("Figures/RUMBoost/SwissMetro/splines_travel_time.png")
        plt.show()


        plt.figure(figsize=(3.49, 2.09), dpi=1000)
        x_plot_p, y_plot_p = data_leaf_value(data_train['TRAIN_COST'], weights['0']['TRAIN_COST'], 'data_weighted')
        y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
        x_spline_p = np.linspace(np.min(data_train['TRAIN_COST']), np.max(data_train['TRAIN_COST']), num=10000)
        x_knots_temp_p, y_knots_p = data_leaf_value(x_knots_dict['0']['TRAIN_COST'], weights['0']['TRAIN_COST'])
        _, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(x_spline_p, weights['0']['TRAIN_COST'], num_splines=spline_collection['0']['TRAIN_COST'], x_knots=x_knots_temp_p, y_knots=y_knots_p)
        y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
        y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]

        #data
        plt.scatter(x_plot_p, y_plot_norm_p, color='g', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_p, y_spline_norm_p, color='g', label=f'Rail cost ({spline_collection["0"]["TRAIN_COST"]} splines)')

        #knots position
        plt.scatter(x_knot_p, y_knot_norm_p, color='k', s=1)

        x_plot_s, y_plot_s = data_leaf_value(data_train['SM_COST'], weights['1']['SM_COST'], 'data_weighted')
        y_plot_norm_s = [y - y_plot_s[0] for y in y_plot_s]
        x_spline_s = np.linspace(np.min(data_train['SM_COST']), np.max(data_train['SM_COST']), num=10000)
        x_knots_temp_s, y_knots_s = data_leaf_value(x_knots_dict['1']['SM_COST'], weights['1']['SM_COST'])
        _, y_spline_s, _, x_knot_s, y_knot_s = monotone_spline(x_spline_s, weights['1']['SM_COST'], num_splines=spline_collection['1']['SM_COST'], x_knots=x_knots_temp_s, y_knots=y_knots_s)
        y_spline_norm_s = [y - y_plot_s[0] for y in y_spline_s]
        y_knot_norm_s = [y - y_plot_s[0] for y in y_knot_s]

        #data
        plt.scatter(x_plot_s, y_plot_norm_s, color='#6b8ba4', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_s, y_spline_norm_s, color='#6b8ba4', label=f'SwissMetro cost ({spline_collection["1"]["SM_COST"]} splines)')

        #knots position
        plt.scatter(x_knot_s, y_knot_norm_s, color='k', s=1)

        x_plot_d, y_plot_d = data_leaf_value(data_train['CAR_CO'], weights['2']['CAR_CO'], 'data_weighted')
        y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
        x_spline_d = np.linspace(np.min(data_train['CAR_CO']), np.max(data_train['CAR_CO']), num=10000)
        x_knots_temp_d, y_knots_d = data_leaf_value(x_knots_dict['2']['CAR_CO'], weights['2']['CAR_CO'])
        _, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(x_spline_d, weights['2']['CAR_CO'], num_splines=spline_collection['2']['CAR_CO'], x_knots=x_knots_temp_d, y_knots=y_knots_d)
        y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
        y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]

        #data
        plt.scatter(x_plot_d, y_plot_norm_d, color='orange', s=0.3, alpha=1, edgecolors='none')

        #splines
        plt.plot(x_spline_d, y_spline_norm_d, color='orange', label=f'Driving cost ({spline_collection["2"]["CAR_CO"]} splines)')

        #knots position
        plt.scatter(x_knot_d, y_knot_norm_d, color='k', s=1, label='Knots')

        #plt.title('Spline interpolation of {}'.format(f))
        plt.ylabel('Utility')
        plt.xlim([0, 500])
        plt.xlabel('Cost [chf]')
        plt.legend()
        plt.tight_layout()
        if save_fig:
            plt.savefig("Figures/RUMBoost/SwissMetro/splines_cost.png")
        plt.show()


    for u in spline_collection:
        for f in spline_collection[u]:
            #data points and their utilities
            x_plot, y_plot = data_leaf_value(data_train[f], weights[u][f], 'data_weighted')
            y_plot_norm = [y - y_plot[0] for y in y_plot]
            x_spline = np.linspace(np.min(data_train[f]), np.max(data_train[f]), num=10000)

            #if using splines
            #if mean technique
            if mean_splines:
                x_mean, y_mean = data_leaf_value(data_train[f], weights[u][f], technique='mean_data')
                x_spline, y_spline, _, x_knot, y_knot = mean_monotone_spline(x_plot, x_mean, y_plot, y_mean, num_splines=spline_collection[u][f])
            #else, i.e. linearly sampled points
            else:
                if x_knots_dict is not None:
                    x_knots_temp, y_knots = data_leaf_value(x_knots_dict[u][f], weights[u][f])
                    _, y_spline, _, x_knot, y_knot = monotone_spline(x_spline, weights[u][f], num_splines=spline_collection[u][f], x_knots=x_knots_temp, y_knots=y_knots)
                else:
                    x_spline, y_spline, _, x_knot, y_knot = monotone_spline(x_plot, y_plot, num_splines=spline_collection[u][f])
            y_spline_norm = [y - y_plot[0] for y in y_spline]
            y_knot_norm = [y - y_plot[0] for y in y_knot]

            
            plt.figure(figsize=(3.49, 2.09), dpi=1000)

            #data
            plt.scatter(x_plot, y_plot_norm, color='k', s=0.3)

            #splines
            plt.plot(x_spline, y_spline_norm, color='#5badc7')

            #knots position
            plt.scatter(x_knot, y_knot_norm, color='#CC5500', s=1)

            plt.legend(['Data', 'Splines ({})'.format(spline_collection[u][f]), 'Knots'])
            #plt.title('Spline interpolation of {}'.format(f))
            plt.ylabel('{} utility'.format(utility_names[u]))     
            plt.tight_layout()        
            if 'dur' in f:
                plt.xlabel('{} [h]'.format(f))
            elif 'TIME' in f:
                plt.xlabel('{} [h]'.format(f))
            elif 'cost' in f:
                plt.xlabel('{} [£]'.format(f))
            elif 'CO' in f:
                plt.xlabel('{} [chf]'.format(f))
            elif 'distance' in f:
                plt.xlabel('{} [km]'.format(f))
            else:
                plt.xlabel('{}'.format(f))
            if save_fig:
                plt.savefig(save_file + "{} utility, {} feature.png".format(u, f))
            plt.show()



[docs]
def plot_VoT(data_train, util_collection, attribute_VoT, utility_names, draw_range, save_figure = False, num_points = 1000):
    '''
    The function plot the Value of Time of the attributes specified in attribute_VoT.

    Parameters
    ----------
    util_collection : dict
        A dictionary containing the type of utility to use for all features in all utilities.
    attribute_VoT : dict
        A dictionary with keys being the utility number (as string) and values being a tuple of the attributes to compute the VoT on.
        The structure follows this form: {utility: (attribute1, attribute2)}
    '''

    tex_fonts = {
            # Use LaTeX to write all text
            # "text.usetex": True, 
            # "font.family": "serif",
            # "font.serif": "Computer Modern Roman",
            # Use 14pt font in plots, to match 10pt font in document
            "axes.labelsize": 7,
            "axes.linewidth":0.5,
            "axes.labelpad": 1,
            "font.size": 7,
            # Make the legend/label fonts a little smaller
            "legend.fontsize": 6,
            "legend.fancybox": False,
            "legend.edgecolor": "inherit",
            'legend.borderaxespad': 0.4,
            'legend.borderpad': 0.4,
            "xtick.labelsize": 6,
            "ytick.labelsize": 6,
            "xtick.major.pad": 0.1,
            "ytick.major.pad": 0.1,
            "grid.linewidth": 0.5,
            "lines.linewidth": 0.8
        }
    sns.set_theme(font_scale=1, rc=tex_fonts)
    #sns.set_context(tex_fonts)
    sns.set_style("whitegrid")
    # plt.rcParams.update({
    #     # "text.usetex": True,
    #     "font.family": "serif"
    #     #"font.sans-serif": "Computer Modern Roman",
    # })
    
    for u in attribute_VoT: 
        f1, f2 = attribute_VoT[u]
        x_vect = np.linspace(draw_range[u][f1][0], draw_range[u][f1][1], num_points)
        y_vect = np.linspace(draw_range[u][f2][0], draw_range[u][f2][1], num_points)
        d_f1 = util_collection[u][f1].derivative()
        d_f2 = util_collection[u][f2].derivative()
        VoT = lambda x1, x2, df1 = d_f1, df2 = d_f2: df1(x1) / df2(x2)
        VoT_contour_plot = np.array(np.zeros((len(x_vect), len(y_vect))))
        X, Y = np.meshgrid(x_vect, y_vect, indexing='ij')
        for i in range(len(x_vect)):
            for j in range(len(y_vect)):
                if d_f2(Y[i, j]) == 0:
                    VoT_contour_plot[i, j] = 100
                elif VoT(X[i, j], Y[i, j]) > 100:
                    VoT_contour_plot[i, j] = 100
                elif VoT(X[i, j], Y[i, j]) < 0.1:
                    VoT_contour_plot[i, j] = 0.1
                else:
                    VoT_contour_plot[i, j] = VoT(X[i, j], Y[i, j])

        fig, axes = plt.subplots(figsize=(3.49,3.49), dpi=1000)

        #fig.suptitle(f'VoT ({f1} and {f2}) of {utility_names[u]}')

        res = 100

        c_plot = axes.contourf(X, Y, np.log(VoT_contour_plot)/np.log(10), levels=res, linewidths=0, cmap=sns.color_palette("Blues", as_cmap=True), vmin = -1, vmax = 2)

        #axes.set_title(f'{utility_names[u]}')
        axes.set_xlabel(f'{f1} [h]')
        axes.set_ylabel(f'{f2} [£]')

        cbar = fig.colorbar(c_plot, ax = axes, ticks=[-1, 0, 1, 2])
        cbar.set_ticklabels([0.1, 1, 10, 100])
        cbar.ax.set_ylabel('VoT [£/h]')
        cbar.ax.set_ylim([-1, 2])

        #plt.tight_layout()

        if save_figure:
            plt.savefig('Figures/RUMBoost/LPMC/VoT_{}.png'.format(utility_names[u]))

        plt.show()




[docs]
def plot_pop_VoT(data_test, util_collection, attribute_VoT, save_figure = False):

    tex_fonts = {
            # Use LaTeX to write all text
            # "text.usetex": True, 
            # "font.family": "serif",
            # "font.serif": "Computer Modern Roman",
            # Use 14pt font in plots, to match 10pt font in document
            "axes.labelsize": 7,
            "axes.linewidth":0.5,
            "axes.labelpad": 1,
            "font.size": 7,
            # Make the legend/label fonts a little smaller
            "legend.fontsize": 6,
            "legend.fancybox": False,
            "legend.edgecolor": "inherit",
            'legend.borderaxespad': 0.4,
            'legend.borderpad': 0.4,
            "xtick.labelsize": 6,
            "ytick.labelsize": 6,
            "xtick.major.pad": 0.5,
            "ytick.major.pad": 0.5,
            "grid.linewidth": 0.5,
            "lines.linewidth": 0.8
        }
    sns.set_theme(font_scale=1, rc=tex_fonts)
    #sns.set_context(tex_fonts)
    sns.set_style("whitegrid")
    # plt.rcParams.update({
    #     # "text.usetex": True,
    #     "font.family": "serif"
    #     #"font.sans-serif": "Computer Modern Roman",
    # })

    for u in attribute_VoT: 
        f1, f2 = attribute_VoT[u]
        d_f1 = util_collection[u][f1].derivative()
        d_f2 = util_collection[u][f2].derivative()

        VoT_pop = d_f1(data_test[f1])/d_f2(data_test[f2])

        filtered_VoT_pop = VoT_pop[~np.isnan(VoT_pop)]

        limited_VoT_pop = filtered_VoT_pop[(filtered_VoT_pop>0) & (filtered_VoT_pop < np.quantile(filtered_VoT_pop, 0.99))]

        #fig, axes = plt.subplots(figsize=(10,8), layout='constrained')

        plt.figure(figsize=(3.49, 2.09), dpi=1000)
        sns.histplot(limited_VoT_pop, color='b', alpha = 0.5, kde=True, bins=50)
        plt.xlabel("VoT [£/h]")
        plt.tight_layout()
        plt.show()

        if save_figure:
           plt.savefig('Figures/RUMBoost/SwissMetro/pop_VoT_{}.png'.format(u))



[docs]
def plot_ind_spec_constant(socec_model, dataset_train, alternatives: list[str]):
    '''
    Plot a histogram of all alternatives individual specific constant of a functional effect model.

    Parameters
    ----------

    socec_model:
        The part of the functional effect model with full interactions of socio-economic characteristics.
    dataset_train: 
        The dataset used to train the model. It must be a lightGBM Dataset object.
    alternatives:
        The list of alternatives name.
    '''

    tex_fonts = {
        # Use LaTeX to write all text
        # "text.usetex": True, 
        # "font.family": "serif",
        # "font.serif": "Computer Modern Roman",
        # Use 14pt font in plots, to match 10pt font in document
        "axes.labelsize": 7,
        "axes.linewidth": 0.5,
        "axes.labelpad": 1,
        "font.size": 7,
        # Make the legend/label fonts a little smaller
        "legend.fontsize": 6,
        "legend.fancybox": False,
        "legend.edgecolor": "inherit",
        'legend.borderaxespad': 0.4,
        'legend.borderpad': 0.4,
        "xtick.labelsize": 6,
        "ytick.labelsize": 6,
        "xtick.major.pad": 0.5,
        "ytick.major.pad": 0.5,
        "grid.linewidth": 0.5,
        "lines.linewidth": 0.8
    }
    sns.set_theme(font_scale=1, rc=tex_fonts)
    #sns.set_context(tex_fonts)
    sns.set_style("whitegrid")
    # plt.rcParams.update({
    #     # "text.usetex": True,
    #     "font.family": "serif"
    #     #"font.sans-serif": "Computer Modern Roman",
    # })

    ind_spec_constants = socec_model.predict(dataset_train, utilities=True)

    bins=np.histogram(ind_spec_constants, bins=50)[1]
    sns.set_theme()
    f, axes = plt.subplots(2, 2, figsize=(12, 10), tight_layout=True)
    colors = ['b', 'r', 'g', 'orange']

    for i, axs in enumerate(axes.flatten()):
        sns.histplot(ind_spec_constants[:, i], bins=bins, alpha=0.5, ax=axs, kde=True, color=colors[i])
        axs.set_title(f'{alternatives[i]}')

    # Defining custom 'xlim' and 'ylim' values.
    xlim = (-3.5, 3.5)
    ylim = (0, 5250)

    # Setting the values for all axes.
    plt.setp(axes, xlim=xlim, ylim=ylim)

    plt.show()



[docs]
def plot_bootstrap(models: list, dataset: pd.DataFrame, features: dict[list[str]]):
    '''
    Plot the bootstrap sampling.

    Parameters
    ----------

    models: list
        A list containing all the trained mdoels of the bootstrap sampling
    dataset: pd.DataFrame
        The full dataset used for training
    features: dict[list[str]]
        A dictionary of lists of strings contaning the number of alternatives, and the features for that alternative, 
        e.g. {'0':['feature_1', ...], '1': [], ...]
    '''
    tex_fonts = {
        # Use LaTeX to write all text
        # "text.usetex": True, 
        # "font.family": "serif",
        # "font.serif": "Computer Modern Roman",
        # Use 14pt font in plots, to match 10pt font in document
        "axes.labelsize": 7,
        "axes.linewidth":0.5,
        "axes.labelpad": 1,
        "font.size": 7,
        # Make the legend/label fonts a little smaller
        "legend.fontsize": 6,
        "legend.fancybox": False,
        "legend.edgecolor": "inherit",
        'legend.borderaxespad': 0.4,
        'legend.borderpad': 0.4,
        "xtick.labelsize": 6,
        "ytick.labelsize": 6,
        "xtick.major.pad": 0.5,
        "ytick.major.pad": 0.5,
        "grid.linewidth": 0.5,
        "lines.linewidth": 0.8
    }
    sns.set_theme(font_scale=1, rc=tex_fonts)
    sns.set_style("whitegrid")
    # plt.rcParams.update({
    #     # "text.usetex": True,
    #     "font.family": "serif"
    #     #"font.sans-serif": "Computer Modern Roman",
    # })

    ufs_dict = {}
    for u in features:
        ufs_dict[u] = {}
        for f in features[u]:
            ufs_dict[u][f] = {'xplot': np.linspace(0, dataset[f].max(), 1000), 'yarr': np.array([]), 'yav': []}
            yi = []
            for model in models:
                vals = weights_to_plot_v2(model)
                _, y = non_lin_function(vals[u][f], 0, dataset[f].max(), 1000)
                yi.append([yii-y[0] for yii in y])
            ufs_dict[u][f]['yarr'] = np.array(yi)
            ufs_dict[u][f]['yav'] = ufs_dict[u][f]['yarr'].mean(axis=0)
            
            g = sns.JointGrid(xlim=(0, np.max(dataset[f])), height=3.89)
            g.figure.set_dpi(1000)
            x, y = ufs_dict[u][f]['xplot'],ufs_dict[u][f]['yav']
            sns.lineplot(x=x, y=y, ax=g.ax_joint,color='orange', linewidth=1, label='Average')
            sns.histplot(x=dataset[f], ax=g.ax_marg_x, bins=100, color = 'orange', alpha=0.5)
            for i in range(len(models)):
                sns.lineplot(x=x, y=ufs_dict[u][f]['yarr'][i, :].T, color = 'orange', alpha = 0.1, ax=g.ax_joint, linewidth=0.5)
            g.ax_joint.set(xlabel=f'{f}', ylabel='Utility')