import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from rumboost.utils import function_2d, weights_to_plot_v2, get_asc, non_lin_function, data_leaf_value, get_weights
from rumboost.utility_smoothing import stairs_to_pw, find_feat_best_fit, fit_func, monotone_spline, mean_monotone_spline
[docs]
def plot_2d(model, feature1: str, feature2: str, min1: int, max1: int, min2: int, max2: int, save_figure: bool = False, utility_names: list[str] = ['Walking', 'Cycling', 'Public Transport', 'Driving'], num_points = 1000):
'''
Plot a 2nd order feature interaction as a contour plot.
Parameters
----------
model : RUMBoost
A RUMBoost object.
feature1 : str
Name of feature 1.
feature2 : str
Name of feature 2.
min1 : int
Minimum value of feature 1.
max1 : int
Maximum value of feature 1.
min2 : int
Minimum value of feature 2.
max2 : int
Maximum value of feature 2.
save_figure : bool, optional (default = False)
If true, save the figure as a png file
utility_names : list[str]
List of the alternative names
num_points : int, optional (default=1000)
The number of points per axis. The total number of points is num_points**2.
'''
_, weights_2d, _ = get_weights(model = model)
weights_ordered = weights_to_plot_v2(model=model)
name1 = feature1 + "-" + feature2
name2 = feature2 + "-" + feature1
x_vect = np.linspace(min1, max1, num_points)
y_vect = np.linspace(min2, max2, num_points)
#to generalise
utility_names = ['Walking', 'Cycling', 'PT', 'Driving']
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth":0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
'legend.borderaxespad': 0.4,
'legend.borderpad': 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8
}
sns.set_theme(font_scale=1, rc=tex_fonts)
#sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
for u in weights_2d.Utility.unique():
weights_2d_util = weights_2d[weights_2d.Utility==u]
contour_plot1 = function_2d(weights_2d_util[weights_2d_util.Feature==name1], x_vect, y_vect)
contour_plot2 = function_2d(weights_2d_util[weights_2d_util.Feature==name2], y_vect, x_vect)
contour_plot = contour_plot1 + contour_plot2.T
if np.sum(contour_plot) == 0:
continue
if (feature1 in weights_ordered[str(u)].keys()) and (feature2 in weights_ordered[str(u)].keys()):
_, feature1_alone = non_lin_function(weights_ordered[str(u)][feature1], min1, max1, num_points)
feature1_grid = np.repeat(feature1_alone, num_points).reshape((num_points, num_points))
contour_plot += feature1_grid
_, feature2_alone = non_lin_function(weights_ordered[str(u)][feature2], min2, max2, num_points)
feature2_grid = np.repeat(feature2_alone, num_points).reshape((num_points, num_points)).T
contour_plot += feature2_grid
contour_plot -= contour_plot.max()
colors = ['#F5E5E2', '#DF7057', '#A31D04']
customPalette = sns.set_palette(sns.color_palette(colors, as_cmap=True))
if np.sum(contour_plot) != 0:
X, Y = np.meshgrid(x_vect, y_vect)
fig, axes = plt.subplots(figsize=(3.49,3), layout='constrained', dpi=1000)
res = num_points
c_plot = axes.contourf(X, Y, contour_plot.T, levels=res, linewidths=0, cmap=customPalette, vmin=-12, vmax=0)
#axes.set_title(f'{utility_names[int(u)]}')
axes.set_xlabel(f'{feature1} [h]')
axes.set_ylabel(f'{feature2}')
cbar = fig.colorbar(c_plot, ax = axes, ticks=[-10, -8, -6, -4, -2, 0])
cbar.ax.set_ylabel('Utility')
if save_figure:
plt.savefig('Figures/FI RUMBoost/age_travel_time_{}.png'.format(utility_names[int(u)]))
plt.show()
[docs]
def plot_parameters(model, X, utility_names, Betas = None, model_unconstrained = None,
with_pw = False, save_figure=False, asc_normalised = False, with_asc = False,
with_cat = True, only_tt = False, only_1d = False, with_fit = False,
fit_all = True, technique = 'weighted_data', data_sep = False, sm_tt_cost=False,
save_file=''):
"""
Plot the non linear impact of parameters on the utility function. When specified, unconstrained parameters
and parameters from a RUM model can be added to the plot.
Parameters
----------
model : RUMBoost
A RUMBoost object.
X : pandas dataframe
Features used to train the model, in a pandas dataframe.
utility_name : dict
Dictionary mapping utilities indices to their names.
Betas : list, optional (default = None)
List of beta parameters value from a RUM. They should be listed in the same order as
in the RUMBoost model.
model_unconstrained : LightGBM model, optional (default = None)
The unconstrained model. Must be trained and compatible with dump_model().
with_pw : bool, optional (default = False)
If the piece-wise function should be included in the graph.
save_figure : bool, optional (default = False)
If True, save the plot as a png file.
asc_normalised : bool, optional (default = False)
If True, scale down utilities to be zero at the y axis.
with_asc : bool, optional (default = False)
If True, add the ASCs to all graphs (one is normalised, and asc_normalised must be True).
with_cat : bool, optional (default = True)
If False, categorical features are not plotted.
only_tt : bool, optional (default = False)
If True, plot only travel time and distance.
only_1d : bool, optional (default = False)
If True, plot only the features separately.
with_fit : bool, optional (default = False)
If True, fit the data with simple functions to approximate the step functions.
fit_all : bool, optional (default = True)
If False, plot only the best fitting function.
technique : str, optional (default = 'weighted_data')
The technique for data sampling in the function fitting.
data_sep : bool, optional (default = False)
If True, split the data to fit subsets of data.
sm_tt_cost : bool, optional (default = False)
If True, plot only the swissmetro travel time and cost on the same figure.
save_file : str, optional (default='')
The name to save the figure with.
"""
weights_arranged = weights_to_plot_v2(model)
if with_asc:
ASCs = get_asc(weights_arranged)
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth":0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
'legend.borderaxespad': 0.4,
'legend.borderpad': 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8
}
sns.set_theme(font_scale=1, rc=tex_fonts)
#sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
if sm_tt_cost:
#plot for travel time on one figure
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_w, non_lin_func_rail = non_lin_function(weights_arranged['0']['TRAIN_TT'], 0, 600, 10000)
if asc_normalised:
non_lin_func_rail = [n - non_lin_func_rail[0] for n in non_lin_func_rail]
if with_asc:
non_lin_func_rail = [n + ASCs[0] for n in non_lin_func_rail]
x_c, non_lin_func_SM = non_lin_function(weights_arranged['1']['SM_TT'], 0, 600, 10000)
if asc_normalised:
non_lin_func_SM = [n - non_lin_func_SM[0] for n in non_lin_func_SM]
if with_asc:
non_lin_func_SM = [n + ASCs[1] for n in non_lin_func_SM]
x_d, non_lin_func_driving = non_lin_function(weights_arranged['2']['CAR_TT'], 0, 600, 10000)
if asc_normalised:
non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
sns.lineplot(x=x_w/60, y=non_lin_func_rail, color='g', label='Rail')
sns.lineplot(x=x_c/60, y=non_lin_func_SM, color='#6b8ba4', label='Swissmetro')
sns.lineplot(x=x_d/60, y=non_lin_func_driving, color='orange', label='Driving')
#plt.title('Influence of alternative travel time on the utility function', fontdict={'fontsize': 16})
plt.xlabel('Travel time [h]')
plt.ylabel('Utility')
plt.tight_layout()
if save_figure:
plt.savefig('Figures/RUMBoost/SwissMetro/travel_time.png')
#plot for travel time on one figure
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_w, non_lin_func_rail = non_lin_function(weights_arranged['0']['TRAIN_COST'], 0, 500, 10000)
if asc_normalised:
non_lin_func_rail = [n - non_lin_func_rail[0] for n in non_lin_func_rail]
if with_asc:
non_lin_func_rail = [n + ASCs[0] for n in non_lin_func_rail]
x_c, non_lin_func_SM = non_lin_function(weights_arranged['1']['SM_COST'], 0, 500, 10000)
if asc_normalised:
non_lin_func_SM = [n - non_lin_func_SM[0] for n in non_lin_func_SM]
if with_asc:
non_lin_func_SM = [n + ASCs[1] for n in non_lin_func_SM]
x_d, non_lin_func_driving = non_lin_function(weights_arranged['2']['CAR_CO'], 0, 500, 10000)
if asc_normalised:
non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
sns.lineplot(x=x_w, y=non_lin_func_rail, color='g', label='Rail')
sns.lineplot(x=x_c, y=non_lin_func_SM, color='#6b8ba4', label='Swissmetro')
sns.lineplot(x=x_d, y=non_lin_func_driving, color='orange', label='Driving')
#plt.title('Influence of alternative cost on the utility function', fontdict={'fontsize': 16})
plt.xlabel('Cost [chf]')
plt.ylabel('Utility')
plt.tight_layout()
if save_figure:
plt.savefig('Figures/RUMBoost/SwissMetro/cost.png')
if not only_1d:
#plot for travel time on one figure
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_w, non_lin_func_walk = non_lin_function(weights_arranged['0']['dur_walking'], 0, 2.5, 10000)
if asc_normalised:
non_lin_func_walk = [n - non_lin_func_walk[0] for n in non_lin_func_walk]
if with_asc:
non_lin_func_walk = [n + ASCs[0] for n in non_lin_func_walk]
x_c, non_lin_func_cycle = non_lin_function(weights_arranged['1']['dur_cycling'], 0, 2.5, 10000)
if asc_normalised:
non_lin_func_cycle = [n - non_lin_func_cycle[0] for n in non_lin_func_cycle]
if with_asc:
non_lin_func_cycle = [n + ASCs[1] for n in non_lin_func_cycle]
x_ptb, non_lin_func_pt_bus = non_lin_function(weights_arranged['2']['dur_pt_bus'], 0, 2.5, 10000)
if asc_normalised:
non_lin_func_pt_bus = [n - non_lin_func_pt_bus[0] for n in non_lin_func_pt_bus]
if with_asc:
non_lin_func_pt_bus = [n + ASCs[2] for n in non_lin_func_pt_bus]
x_ptr, non_lin_func_pt_rail = non_lin_function(weights_arranged['2']['dur_pt_rail'], 0, 2.5, 10000)
if asc_normalised:
non_lin_func_pt_rail = [n - non_lin_func_pt_rail[0] for n in non_lin_func_pt_rail]
if with_asc:
non_lin_func_pt_rail = [n + ASCs[2] for n in non_lin_func_pt_rail]
x_d, non_lin_func_driving = non_lin_function(weights_arranged['3']['dur_driving'], 0, 2.5, 10000)
if asc_normalised:
non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
sns.lineplot(x=x_w, y=non_lin_func_walk, color='b', label='Walking')
sns.lineplot(x=x_c, y=non_lin_func_cycle, color='r', label='Cycling')
sns.lineplot(x=x_ptb, y=non_lin_func_pt_bus, color='#02590f', label='PT Bus')
sns.lineplot(x=x_ptr, y=non_lin_func_pt_rail, color='g', label='PT Rail')
sns.lineplot(x=x_d, y=non_lin_func_driving, color='orange', label='Driving')
#plt.title('Influence of alternative travel time on the utility function', fontdict={'fontsize': 16})
plt.xlabel('Travel time [h]')
plt.ylabel('Utility')
plt.tight_layout()
if save_figure:
plt.savefig('Figures/RUMBoost/LPMC/travel_time.png')
#plot for distance on one figure
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_pt, non_lin_func_pt = non_lin_function(weights_arranged['2']['cost_transit'], 0, 10, 10000)
if asc_normalised:
non_lin_func_pt = [n - non_lin_func_pt[0] for n in non_lin_func_pt]
if with_asc:
non_lin_func_pt = [n + ASCs[2] for n in non_lin_func_pt]
x_d, non_lin_func_driving = non_lin_function(weights_arranged['3']['cost_driving_fuel'], 0, 10, 10000)
if asc_normalised:
non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
# sns.lineplot(x=x_w, y=non_lin_func_walk, lw=2, color='#fab9a5', label='Walking')
# sns.lineplot(x=x_c, y=non_lin_func_cycle, lw=2, color='#B65FCF', label='Cycling')
sns.lineplot(x=x_pt, y=non_lin_func_pt, color='g', label='PT')
sns.lineplot(x=x_d, y=non_lin_func_driving, color='orange', label='Driving')
#plt.title('Influence of straight line distance on the utility function', fontdict={'fontsize': 16})
plt.xlabel('Cost [£]')
plt.ylabel('Utility')
plt.tight_layout()
if save_figure:
plt.savefig('Figures/RUMBoost/LPMC/cost.png')
plt.show()
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_w, non_lin_func_walk = non_lin_function(weights_arranged['0']['age'], 0, 100, 10000)
if asc_normalised:
non_lin_func_walk = [n - non_lin_func_walk[0] for n in non_lin_func_walk]
if with_asc:
non_lin_func_walk = [n + ASCs[0] for n in non_lin_func_walk]
x_c, non_lin_func_cycle = non_lin_function(weights_arranged['1']['age'], 0, 100, 10000)
if asc_normalised:
non_lin_func_cycle = [n - non_lin_func_cycle[0] for n in non_lin_func_cycle]
if with_asc:
non_lin_func_cycle = [n + ASCs[1] for n in non_lin_func_cycle]
x_pt, non_lin_func_pt = non_lin_function(weights_arranged['2']['age'], 0, 100, 10000)
if asc_normalised:
non_lin_func_pt = [n - non_lin_func_pt[0] for n in non_lin_func_pt]
if with_asc:
non_lin_func_pt = [n + ASCs[2] for n in non_lin_func_pt]
x_d, non_lin_func_driving = non_lin_function(weights_arranged['3']['age'], 0, 100, 10000)
if asc_normalised:
non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
sns.lineplot(x=x_w, y=non_lin_func_walk, color='b', label='Walking')
sns.lineplot(x=x_c, y=non_lin_func_cycle, color='r', label='Cycling')
sns.lineplot(x=x_pt, y=non_lin_func_pt, color='g', label='PT')
sns.lineplot(x=x_d, y=non_lin_func_driving, color='orange', label='Driving')
#plt.title('Influence of straight line distance on the utility function', fontdict={'fontsize': 16})
plt.xlabel('Age')
plt.ylabel('Utility')
plt.tight_layout()
if save_figure:
plt.savefig('Figures/RUMBoost/LPMC/age.png')
plt.show()
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_w, non_lin_func_walk = non_lin_function(weights_arranged['0']['start_time_linear'], 0, 24, 10000)
if asc_normalised:
non_lin_func_walk = [n - non_lin_func_walk[0] for n in non_lin_func_walk]
if with_asc:
non_lin_func_walk = [n + ASCs[0] for n in non_lin_func_walk]
x_c, non_lin_func_cycle = non_lin_function(weights_arranged['1']['start_time_linear'], 0, 24, 10000)
if asc_normalised:
non_lin_func_cycle = [n - non_lin_func_cycle[0] for n in non_lin_func_cycle]
if with_asc:
non_lin_func_cycle = [n + ASCs[1] for n in non_lin_func_cycle]
x_pt, non_lin_func_pt = non_lin_function(weights_arranged['2']['start_time_linear'], 0, 24, 10000)
if asc_normalised:
non_lin_func_pt = [n - non_lin_func_pt[0] for n in non_lin_func_pt]
if with_asc:
non_lin_func_pt = [n + ASCs[2] for n in non_lin_func_pt]
x_d, non_lin_func_driving = non_lin_function(weights_arranged['3']['start_time_linear'], 0, 24, 10000)
if asc_normalised:
non_lin_func_driving = [n - non_lin_func_driving[0] for n in non_lin_func_driving]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
sns.lineplot(x=x_w, y=non_lin_func_walk, color='b', label='Walking')
sns.lineplot(x=x_c, y=non_lin_func_cycle, color='r', label='Cycling')
sns.lineplot(x=x_pt, y=non_lin_func_pt, color='g', label='PT')
sns.lineplot(x=x_d, y=non_lin_func_driving, color='orange', label='Driving')
#plt.title('Influence of straight line distance on the utility function', fontdict={'fontsize': 16})
plt.xlabel('Departure time')
plt.ylabel('Utility')
plt.tight_layout()
if save_figure:
plt.savefig('Figures/RUMBoost/LPMC/departure_time.png')
plt.show()
#for all features parameters
if not only_tt:
for u in weights_arranged:
for i, f in enumerate(weights_arranged[u]):
#create nonlinear plot
x, non_lin_func = non_lin_function(weights_arranged[u][f], 0, 1.05*max(X[f]), 10000)
if asc_normalised:
val_0 = non_lin_func[0]
non_lin_func = [n - val_0 for n in non_lin_func]
if with_asc:
non_lin_func = [n + ASCs[int(u)] for n in non_lin_func]
#plot parameters
plt.figure(figsize=(3.49, 2.09), dpi=1000)
#plt.title('Influence of {} on the predictive function ({} utility)'.format(f, utility_names[u]), fontdict={'fontsize': 16})
plt.ylabel('{} utility'.format(utility_names[u]))
if 'dur' in f:
plt.xlabel('{} [h]'.format(f))
elif 'TIME' in f:
plt.xlabel('{} [min]'.format(f))
elif 'cost' in f:
plt.xlabel('{} [£]'.format(f))
elif 'distance' in f:
plt.xlabel('{} [km]'.format(f))
elif 'CO' in f:
plt.xlabel('{} [chf]'.format(f))
else:
plt.xlabel('{}'.format(f))
sns.lineplot(x=x, y=non_lin_func, color='k', label='RUMBoost')
plt.xlim([0-0.05*np.max(X[f]), np.max(X[f])*1.05])
plt.ylim([np.min(non_lin_func) - 0.05*(np.max(non_lin_func)-np.min(non_lin_func)), np.max(non_lin_func) + 0.05*(np.max(non_lin_func)-np.min(non_lin_func))])
plt.tight_layout()
if save_figure:
if with_fit:
plt.savefig('Figures/{}{} utility, {} feature {} technique.png'.format(utility_names[u], f, technique))
else:
plt.savefig('Figures/{}{} utility, {} feature.png'.format(save_file, utility_names[u], f))
plt.show()
[docs]
def plot_market_segm(model, X, asc_normalised: bool = True, utility_names: list[str] = ['Walking', 'Cycling', 'Public Transport', 'Driving']):
'''
Plot the market segmentation.
Parameters
----------
model : RUMBoost
A RUMBoost object.
X : pandas DataFrame
Training data.
asc_normalised : bool, optional (default = False)
If True, scale down utilities to be zero at the y axis.
utility_names : list[str], optional (default = ['Walking', 'Cycling', 'Public Transport', 'Driving'])
Names of utilities.
'''
sns.set_theme()
weights_arranged = weights_to_plot_v2(model, market_segm=True)
label = {0:'Weekdays',1:'Weekends'}
color = ['r', 'b']
for u in weights_arranged:
plt.figure(figsize=(10, 6))
for i, f in enumerate(weights_arranged[u]):
#create nonlinear plot
x, non_lin_func = non_lin_function(weights_arranged[u][f], 0, 1.05*max(X[f]), 10000)
if asc_normalised:
val_0 = non_lin_func[0]
non_lin_func = [n - val_0 for n in non_lin_func]
sns.lineplot(x=x, y=non_lin_func, lw=2, color=color[i], label=label[i])
plt.title('Impact of travel time in weekdays and weekends on {} utility'.format(utility_names[u]), fontdict={'fontsize': 16})
plt.ylabel('{} utility'.format(utility_names[u]))
plt.xlabel('Travel time [h]')
plt.show()
[docs]
def plot_util(model, data_train, points=10000):
'''
Plot the raw utility functions of all features. This is done directly from the predict attribute of lightgbm.Boosters.
Parameters
----------
model : RUMBoost
A RUMBoost object.
data_train : pandas Dataframe
The full training dataset.
points : int, optional (default = 10000)
The number of points used to draw the line plot.
'''
sns.set_theme()
for j, struct in enumerate(model.rum_structure):
booster = model.boosters[j]
for i, f in enumerate(struct['columns']):
xin = np.zeros(shape = (points, len(struct['columns'])))
xin[:, i] = np.linspace(0,1.05*max(data_train[f]),points)
ypred = booster.predict(xin)
plt.figure()
plt.plot(np.linspace(0,1.05*max(data_train[f]),points), ypred)
plt.title(f)
[docs]
def plot_util_pw(model, data_train, points = 10000):
'''
Plot the piece-wise utility function
Parameters
----------
model : RUMBoost
A RUMBoost object.
data_train : pandas Dataframe
The full training dataset.
points : int, optional (default = 10000)
The number of points used to draw the line plot.
'''
features = data_train.columns
data_to_transform = {}
for f in features:
xi = np.linspace(0, 1.05*max(data_train[f]), points)
data_to_transform[f] = xi
data_to_transform = pd.DataFrame(data_to_transform)
pw_func = stairs_to_pw(model, data_train, data_to_transform, util_for_plot = True)
return pw_func
[docs]
def plot_spline(model, data_train, spline_collection, utility_names, mean_splines = False, x_knots_dict = None, save_fig = False, lpmc_tt_cost=False, sm_tt_cost=False, save_file=''):
'''
Plot the spline interpolation for all utilities interpolated.
Parameters
----------
model : RUMBoost
A RUMBoost object.
data_train : pandas Dataframe
The full training dataset.
spline_collection : dict
A dictionary containing the optimal number of splines for each feature interpolated of each utility
mean_splines : bool, optional (default = False)
Must be True if the splines are computed at the mean distribution of data for stairs.
x_knots_dict : dict
A dictionary in the form of {utility: {attribute: x_knots}} where x_knots are the spline knots for the corresponding
utility and attributes
'''
#get weights ordered by features
weights = weights_to_plot_v2(model)
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth":0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
'legend.borderaxespad': 0.4,
'legend.borderpad': 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8,
'scatter.edgecolors': 'none'
}
sns.set_theme(font_scale=1, rc=tex_fonts)
#sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
if lpmc_tt_cost:
x_plot_w, y_plot_w = data_leaf_value(data_train['dur_walking'], weights['0']['dur_walking'], 'data_weighted')
y_plot_norm_w = [y - y_plot_w[0] for y in y_plot_w]
x_spline_w = np.linspace(np.min(data_train['dur_walking']), np.max(data_train['dur_walking']), num=10000)
x_knots_temp_w, y_knots_w = data_leaf_value(x_knots_dict['0']['dur_walking'], weights['0']['dur_walking'])
_, y_spline_w, _, x_knot_w, y_knot_w = monotone_spline(x_spline_w, weights['0']['dur_walking'], num_splines=spline_collection['0']['dur_walking'], x_knots=x_knots_temp_w, y_knots=y_knots_w)
y_spline_norm_w = [y - y_plot_w[0] for y in y_spline_w]
y_knot_norm_w = [y - y_plot_w[0] for y in y_knot_w]
plt.figure(figsize=(3.49, 2.09), dpi=1000)
#data
plt.scatter(x_plot_w, y_plot_norm_w, color='b', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_w, y_spline_norm_w, color='b', label=f'Walking travel time ({spline_collection["0"]["dur_walking"]} splines)')
#knots position
plt.scatter(x_knot_w, y_knot_norm_w, color='k', s=1)
x_plot_c, y_plot_c = data_leaf_value(data_train['dur_cycling'], weights['1']['dur_cycling'], 'data_weighted')
y_plot_norm_c = [y - y_plot_c[0] for y in y_plot_c]
x_spline_c = np.linspace(np.min(data_train['dur_cycling']), np.max(data_train['dur_cycling']), num=10000)
x_knots_temp_c, y_knots_c = data_leaf_value(x_knots_dict['1']['dur_cycling'], weights['1']['dur_cycling'])
_, y_spline_c, _, x_knot_c, y_knot_c = monotone_spline(x_spline_c, weights['1']['dur_cycling'], num_splines=spline_collection['1']['dur_cycling'], x_knots=x_knots_temp_c, y_knots=y_knots_c)
y_spline_norm_c = [y - y_plot_c[0] for y in y_spline_c]
y_knot_norm_c = [y - y_plot_c[0] for y in y_knot_c]
#data
plt.scatter(x_plot_c, y_plot_norm_c, color='r', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_c, y_spline_norm_c, color='r', label=f'Cycling travel time ({spline_collection["1"]["dur_cycling"]} splines)')
#knots position
plt.scatter(x_knot_c, y_knot_norm_c, color='k', s=1)
x_plot_p, y_plot_p = data_leaf_value(data_train['dur_pt_rail'], weights['2']['dur_pt_rail'], 'data_weighted')
y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
x_spline_p = np.linspace(np.min(data_train['dur_pt_rail']), np.max(data_train['dur_pt_rail']), num=10000)
x_knots_temp_p, y_knots_p = data_leaf_value(x_knots_dict['2']['dur_pt_rail'], weights['2']['dur_pt_rail'])
_, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(x_spline_p, weights['2']['dur_pt_rail'], num_splines=spline_collection['2']['dur_pt_rail'], x_knots=x_knots_temp_p, y_knots=y_knots_p)
y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]
#data
plt.scatter(x_plot_p, y_plot_norm_p, color='g', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_p, y_spline_norm_p, color='g', label=f'Rail travel time ({spline_collection["2"]["dur_pt_rail"]} splines)')
#knots position
plt.scatter(x_knot_p, y_knot_norm_p, color='k', s=1)
x_plot_d, y_plot_d = data_leaf_value(data_train['dur_driving'], weights['3']['dur_driving'], 'data_weighted')
y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
x_spline_d = np.linspace(np.min(data_train['dur_driving']), np.max(data_train['dur_driving']), num=10000)
x_knots_temp_d, y_knots_d = data_leaf_value(x_knots_dict['3']['dur_driving'], weights['3']['dur_driving'])
_, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(x_spline_d, weights['3']['dur_driving'], num_splines=spline_collection['3']['dur_driving'], x_knots=x_knots_temp_d, y_knots=y_knots_d)
y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]
#data
plt.scatter(x_plot_d, y_plot_norm_d, color='orange', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_d, y_spline_norm_d, color='orange', label=f'Driving travel time ({spline_collection["3"]["dur_driving"]} splines)')
#knots position
plt.scatter(x_knot_d, y_knot_norm_d, color='k', s=1, label='Knots')
#plt.title('Spline interpolation of {}'.format(f))
plt.ylabel('Utility')
plt.xlim([0, 5])
plt.xlabel('Travel time [h]')
plt.legend()
plt.tight_layout()
if save_fig:
plt.savefig("Figures/RUMBoost/LPMC/splines_travel_time.png")
plt.show()
plt.figure(figsize=(3.49, 2.09), dpi=1000)
x_plot_p, y_plot_p = data_leaf_value(data_train['cost_transit'], weights['2']['cost_transit'], 'data_weighted')
y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
x_spline_p = np.linspace(np.min(data_train['cost_transit']), np.max(data_train['cost_transit']), num=10000)
x_knots_temp_p, y_knots_p = data_leaf_value(x_knots_dict['2']['cost_transit'], weights['2']['cost_transit'])
_, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(x_spline_p, weights['2']['cost_transit'], num_splines=spline_collection['2']['cost_transit'], x_knots=x_knots_temp_p, y_knots=y_knots_p)
y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]
#data
plt.scatter(x_plot_p, y_plot_norm_p, color='g', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_p, y_spline_norm_p, color='g', label=f'PT cost ({spline_collection["2"]["cost_transit"]} splines)')
#knots position
plt.scatter(x_knot_p, y_knot_norm_p, color='k', s=1)
x_plot_d, y_plot_d = data_leaf_value(data_train['cost_driving_fuel'], weights['3']['cost_driving_fuel'], 'data_weighted')
y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
x_spline_d = np.linspace(np.min(data_train['cost_driving_fuel']), np.max(data_train['cost_driving_fuel']), num=10000)
x_knots_temp_d, y_knots_d = data_leaf_value(x_knots_dict['3']['cost_driving_fuel'], weights['3']['cost_driving_fuel'])
_, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(x_spline_d, weights['3']['cost_driving_fuel'], num_splines=spline_collection['3']['cost_driving_fuel'], x_knots=x_knots_temp_d, y_knots=y_knots_d)
y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]
#data
plt.scatter(x_plot_d, y_plot_norm_d, color='orange', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_d, y_spline_norm_d, color='orange', label=f'Driving cost ({spline_collection["3"]["cost_driving_fuel"]} splines)')
#knots position
plt.scatter(x_knot_d, y_knot_norm_d, color='k', s=1, label='Knots')
#plt.title('Spline interpolation of {}'.format(f))
plt.ylabel('Utility')
plt.xlim([0, 10])
plt.xlabel('Cost [£]')
plt.legend()
plt.tight_layout()
if save_fig:
plt.savefig("Figures/RUMBoost/LPMC/splines_cost.png")
plt.show()
if sm_tt_cost:
x_plot_p, y_plot_p = data_leaf_value(data_train['TRAIN_TT'], weights['0']['TRAIN_TT'], 'data_weighted')
y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
x_spline_p = np.linspace(np.min(data_train['TRAIN_TT']), np.max(data_train['TRAIN_TT']), num=10000)
x_knots_temp_p, y_knots_p = data_leaf_value(x_knots_dict['0']['TRAIN_TT'], weights['0']['TRAIN_TT'])
_, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(x_spline_p, weights['0']['TRAIN_TT'], num_splines=spline_collection['0']['TRAIN_TT'], x_knots=x_knots_temp_p, y_knots=y_knots_p)
y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]
plt.figure(figsize=(3.49, 2.09), dpi=1000)
#data
plt.scatter(x_plot_p/60, y_plot_norm_p, color='g', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_p/60, y_spline_norm_p, color='g', label=f'Rail travel time ({spline_collection["0"]["TRAIN_TT"]} splines)')
#knots position
plt.scatter(x_knot_p/60, y_knot_norm_p, color='k', s=1)
x_plot_s, y_plot_s = data_leaf_value(data_train['SM_TT'], weights['1']['SM_TT'], 'data_weighted')
y_plot_norm_s = [y - y_plot_s[0] for y in y_plot_s]
x_spline_s = np.linspace(np.min(data_train['SM_TT']), np.max(data_train['SM_TT']), num=10000)
x_knots_temp_s, y_knots_s = data_leaf_value(x_knots_dict['1']['SM_TT'], weights['1']['SM_TT'])
_, y_spline_s, _, x_knot_s, y_knot_s = monotone_spline(x_spline_s, weights['1']['SM_TT'], num_splines=spline_collection['1']['SM_TT'], x_knots=x_knots_temp_s, y_knots=y_knots_s)
y_spline_norm_s = [y - y_plot_s[0] for y in y_spline_s]
y_knot_norm_s = [y - y_plot_s[0] for y in y_knot_s]
#data
plt.scatter(x_plot_s/60, y_plot_norm_s, color='#6b8ba4', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_s/60, y_spline_norm_s, color='#6b8ba4', label=f'SwissMetro travel time ({spline_collection["1"]["SM_TT"]} splines)')
#knots position
plt.scatter(x_knot_s/60, y_knot_norm_s, color='k', s=1)
x_plot_d, y_plot_d = data_leaf_value(data_train['CAR_TT'], weights['2']['CAR_TT'], 'data_weighted')
y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
x_spline_d = np.linspace(np.min(data_train['CAR_TT']), np.max(data_train['CAR_TT']), num=10000)
x_knots_temp_d, y_knots_d = data_leaf_value(x_knots_dict['2']['CAR_TT'], weights['2']['CAR_TT'])
_, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(x_spline_d, weights['2']['CAR_TT'], num_splines=spline_collection['2']['CAR_TT'], x_knots=x_knots_temp_d, y_knots=y_knots_d)
y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]
#data
plt.scatter(x_plot_d/60, y_plot_norm_d, color='orange', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_d/60, y_spline_norm_d, color='orange', label=f'Driving travel time ({spline_collection["2"]["CAR_TT"]} splines)')
#knots position
plt.scatter(x_knot_d/60, y_knot_norm_d, color='k', s=1, label='Knots')
#plt.title('Spline interpolation of {}'.format(f))
plt.ylabel('Utility')
plt.xlim([0, 10])
plt.xlabel('Travel time [h]')
plt.legend()
plt.tight_layout()
if save_fig:
plt.savefig("Figures/RUMBoost/SwissMetro/splines_travel_time.png")
plt.show()
plt.figure(figsize=(3.49, 2.09), dpi=1000)
x_plot_p, y_plot_p = data_leaf_value(data_train['TRAIN_COST'], weights['0']['TRAIN_COST'], 'data_weighted')
y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
x_spline_p = np.linspace(np.min(data_train['TRAIN_COST']), np.max(data_train['TRAIN_COST']), num=10000)
x_knots_temp_p, y_knots_p = data_leaf_value(x_knots_dict['0']['TRAIN_COST'], weights['0']['TRAIN_COST'])
_, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(x_spline_p, weights['0']['TRAIN_COST'], num_splines=spline_collection['0']['TRAIN_COST'], x_knots=x_knots_temp_p, y_knots=y_knots_p)
y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]
#data
plt.scatter(x_plot_p, y_plot_norm_p, color='g', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_p, y_spline_norm_p, color='g', label=f'Rail cost ({spline_collection["0"]["TRAIN_COST"]} splines)')
#knots position
plt.scatter(x_knot_p, y_knot_norm_p, color='k', s=1)
x_plot_s, y_plot_s = data_leaf_value(data_train['SM_COST'], weights['1']['SM_COST'], 'data_weighted')
y_plot_norm_s = [y - y_plot_s[0] for y in y_plot_s]
x_spline_s = np.linspace(np.min(data_train['SM_COST']), np.max(data_train['SM_COST']), num=10000)
x_knots_temp_s, y_knots_s = data_leaf_value(x_knots_dict['1']['SM_COST'], weights['1']['SM_COST'])
_, y_spline_s, _, x_knot_s, y_knot_s = monotone_spline(x_spline_s, weights['1']['SM_COST'], num_splines=spline_collection['1']['SM_COST'], x_knots=x_knots_temp_s, y_knots=y_knots_s)
y_spline_norm_s = [y - y_plot_s[0] for y in y_spline_s]
y_knot_norm_s = [y - y_plot_s[0] for y in y_knot_s]
#data
plt.scatter(x_plot_s, y_plot_norm_s, color='#6b8ba4', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_s, y_spline_norm_s, color='#6b8ba4', label=f'SwissMetro cost ({spline_collection["1"]["SM_COST"]} splines)')
#knots position
plt.scatter(x_knot_s, y_knot_norm_s, color='k', s=1)
x_plot_d, y_plot_d = data_leaf_value(data_train['CAR_CO'], weights['2']['CAR_CO'], 'data_weighted')
y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
x_spline_d = np.linspace(np.min(data_train['CAR_CO']), np.max(data_train['CAR_CO']), num=10000)
x_knots_temp_d, y_knots_d = data_leaf_value(x_knots_dict['2']['CAR_CO'], weights['2']['CAR_CO'])
_, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(x_spline_d, weights['2']['CAR_CO'], num_splines=spline_collection['2']['CAR_CO'], x_knots=x_knots_temp_d, y_knots=y_knots_d)
y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]
#data
plt.scatter(x_plot_d, y_plot_norm_d, color='orange', s=0.3, alpha=1, edgecolors='none')
#splines
plt.plot(x_spline_d, y_spline_norm_d, color='orange', label=f'Driving cost ({spline_collection["2"]["CAR_CO"]} splines)')
#knots position
plt.scatter(x_knot_d, y_knot_norm_d, color='k', s=1, label='Knots')
#plt.title('Spline interpolation of {}'.format(f))
plt.ylabel('Utility')
plt.xlim([0, 500])
plt.xlabel('Cost [chf]')
plt.legend()
plt.tight_layout()
if save_fig:
plt.savefig("Figures/RUMBoost/SwissMetro/splines_cost.png")
plt.show()
for u in spline_collection:
for f in spline_collection[u]:
#data points and their utilities
x_plot, y_plot = data_leaf_value(data_train[f], weights[u][f], 'data_weighted')
y_plot_norm = [y - y_plot[0] for y in y_plot]
x_spline = np.linspace(np.min(data_train[f]), np.max(data_train[f]), num=10000)
#if using splines
#if mean technique
if mean_splines:
x_mean, y_mean = data_leaf_value(data_train[f], weights[u][f], technique='mean_data')
x_spline, y_spline, _, x_knot, y_knot = mean_monotone_spline(x_plot, x_mean, y_plot, y_mean, num_splines=spline_collection[u][f])
#else, i.e. linearly sampled points
else:
if x_knots_dict is not None:
x_knots_temp, y_knots = data_leaf_value(x_knots_dict[u][f], weights[u][f])
_, y_spline, _, x_knot, y_knot = monotone_spline(x_spline, weights[u][f], num_splines=spline_collection[u][f], x_knots=x_knots_temp, y_knots=y_knots)
else:
x_spline, y_spline, _, x_knot, y_knot = monotone_spline(x_plot, y_plot, num_splines=spline_collection[u][f])
y_spline_norm = [y - y_plot[0] for y in y_spline]
y_knot_norm = [y - y_plot[0] for y in y_knot]
plt.figure(figsize=(3.49, 2.09), dpi=1000)
#data
plt.scatter(x_plot, y_plot_norm, color='k', s=0.3)
#splines
plt.plot(x_spline, y_spline_norm, color='#5badc7')
#knots position
plt.scatter(x_knot, y_knot_norm, color='#CC5500', s=1)
plt.legend(['Data', 'Splines ({})'.format(spline_collection[u][f]), 'Knots'])
#plt.title('Spline interpolation of {}'.format(f))
plt.ylabel('{} utility'.format(utility_names[u]))
plt.tight_layout()
if 'dur' in f:
plt.xlabel('{} [h]'.format(f))
elif 'TIME' in f:
plt.xlabel('{} [h]'.format(f))
elif 'cost' in f:
plt.xlabel('{} [£]'.format(f))
elif 'CO' in f:
plt.xlabel('{} [chf]'.format(f))
elif 'distance' in f:
plt.xlabel('{} [km]'.format(f))
else:
plt.xlabel('{}'.format(f))
if save_fig:
plt.savefig(save_file + "{} utility, {} feature.png".format(u, f))
plt.show()
[docs]
def plot_VoT(data_train, util_collection, attribute_VoT, utility_names, draw_range, save_figure = False, num_points = 1000):
'''
The function plot the Value of Time of the attributes specified in attribute_VoT.
Parameters
----------
util_collection : dict
A dictionary containing the type of utility to use for all features in all utilities.
attribute_VoT : dict
A dictionary with keys being the utility number (as string) and values being a tuple of the attributes to compute the VoT on.
The structure follows this form: {utility: (attribute1, attribute2)}
'''
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth":0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
'legend.borderaxespad': 0.4,
'legend.borderpad': 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.1,
"ytick.major.pad": 0.1,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8
}
sns.set_theme(font_scale=1, rc=tex_fonts)
#sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
for u in attribute_VoT:
f1, f2 = attribute_VoT[u]
x_vect = np.linspace(draw_range[u][f1][0], draw_range[u][f1][1], num_points)
y_vect = np.linspace(draw_range[u][f2][0], draw_range[u][f2][1], num_points)
d_f1 = util_collection[u][f1].derivative()
d_f2 = util_collection[u][f2].derivative()
VoT = lambda x1, x2, df1 = d_f1, df2 = d_f2: df1(x1) / df2(x2)
VoT_contour_plot = np.array(np.zeros((len(x_vect), len(y_vect))))
X, Y = np.meshgrid(x_vect, y_vect, indexing='ij')
for i in range(len(x_vect)):
for j in range(len(y_vect)):
if d_f2(Y[i, j]) == 0:
VoT_contour_plot[i, j] = 100
elif VoT(X[i, j], Y[i, j]) > 100:
VoT_contour_plot[i, j] = 100
elif VoT(X[i, j], Y[i, j]) < 0.1:
VoT_contour_plot[i, j] = 0.1
else:
VoT_contour_plot[i, j] = VoT(X[i, j], Y[i, j])
fig, axes = plt.subplots(figsize=(3.49,3.49), dpi=1000)
#fig.suptitle(f'VoT ({f1} and {f2}) of {utility_names[u]}')
res = 100
c_plot = axes.contourf(X, Y, np.log(VoT_contour_plot)/np.log(10), levels=res, linewidths=0, cmap=sns.color_palette("Blues", as_cmap=True), vmin = -1, vmax = 2)
#axes.set_title(f'{utility_names[u]}')
axes.set_xlabel(f'{f1} [h]')
axes.set_ylabel(f'{f2} [£]')
cbar = fig.colorbar(c_plot, ax = axes, ticks=[-1, 0, 1, 2])
cbar.set_ticklabels([0.1, 1, 10, 100])
cbar.ax.set_ylabel('VoT [£/h]')
cbar.ax.set_ylim([-1, 2])
#plt.tight_layout()
if save_figure:
plt.savefig('Figures/RUMBoost/LPMC/VoT_{}.png'.format(utility_names[u]))
plt.show()
[docs]
def plot_pop_VoT(data_test, util_collection, attribute_VoT, save_figure = False):
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth":0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
'legend.borderaxespad': 0.4,
'legend.borderpad': 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8
}
sns.set_theme(font_scale=1, rc=tex_fonts)
#sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
for u in attribute_VoT:
f1, f2 = attribute_VoT[u]
d_f1 = util_collection[u][f1].derivative()
d_f2 = util_collection[u][f2].derivative()
VoT_pop = d_f1(data_test[f1])/d_f2(data_test[f2])
filtered_VoT_pop = VoT_pop[~np.isnan(VoT_pop)]
limited_VoT_pop = filtered_VoT_pop[(filtered_VoT_pop>0) & (filtered_VoT_pop < np.quantile(filtered_VoT_pop, 0.99))]
#fig, axes = plt.subplots(figsize=(10,8), layout='constrained')
plt.figure(figsize=(3.49, 2.09), dpi=1000)
sns.histplot(limited_VoT_pop, color='b', alpha = 0.5, kde=True, bins=50)
plt.xlabel("VoT [£/h]")
plt.tight_layout()
plt.show()
if save_figure:
plt.savefig('Figures/RUMBoost/SwissMetro/pop_VoT_{}.png'.format(u))
[docs]
def plot_ind_spec_constant(socec_model, dataset_train, alternatives: list[str]):
'''
Plot a histogram of all alternatives individual specific constant of a functional effect model.
Parameters
----------
socec_model:
The part of the functional effect model with full interactions of socio-economic characteristics.
dataset_train:
The dataset used to train the model. It must be a lightGBM Dataset object.
alternatives:
The list of alternatives name.
'''
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth": 0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
'legend.borderaxespad': 0.4,
'legend.borderpad': 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8
}
sns.set_theme(font_scale=1, rc=tex_fonts)
#sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
ind_spec_constants = socec_model.predict(dataset_train, utilities=True)
bins=np.histogram(ind_spec_constants, bins=50)[1]
sns.set_theme()
f, axes = plt.subplots(2, 2, figsize=(12, 10), tight_layout=True)
colors = ['b', 'r', 'g', 'orange']
for i, axs in enumerate(axes.flatten()):
sns.histplot(ind_spec_constants[:, i], bins=bins, alpha=0.5, ax=axs, kde=True, color=colors[i])
axs.set_title(f'{alternatives[i]}')
# Defining custom 'xlim' and 'ylim' values.
xlim = (-3.5, 3.5)
ylim = (0, 5250)
# Setting the values for all axes.
plt.setp(axes, xlim=xlim, ylim=ylim)
plt.show()
[docs]
def plot_bootstrap(models: list, dataset: pd.DataFrame, features: dict[list[str]]):
'''
Plot the bootstrap sampling.
Parameters
----------
models: list
A list containing all the trained mdoels of the bootstrap sampling
dataset: pd.DataFrame
The full dataset used for training
features: dict[list[str]]
A dictionary of lists of strings contaning the number of alternatives, and the features for that alternative,
e.g. {'0':['feature_1', ...], '1': [], ...]
'''
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth":0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
'legend.borderaxespad': 0.4,
'legend.borderpad': 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8
}
sns.set_theme(font_scale=1, rc=tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
ufs_dict = {}
for u in features:
ufs_dict[u] = {}
for f in features[u]:
ufs_dict[u][f] = {'xplot': np.linspace(0, dataset[f].max(), 1000), 'yarr': np.array([]), 'yav': []}
yi = []
for model in models:
vals = weights_to_plot_v2(model)
_, y = non_lin_function(vals[u][f], 0, dataset[f].max(), 1000)
yi.append([yii-y[0] for yii in y])
ufs_dict[u][f]['yarr'] = np.array(yi)
ufs_dict[u][f]['yav'] = ufs_dict[u][f]['yarr'].mean(axis=0)
g = sns.JointGrid(xlim=(0, np.max(dataset[f])), height=3.89)
g.figure.set_dpi(1000)
x, y = ufs_dict[u][f]['xplot'],ufs_dict[u][f]['yav']
sns.lineplot(x=x, y=y, ax=g.ax_joint,color='orange', linewidth=1, label='Average')
sns.histplot(x=dataset[f], ax=g.ax_marg_x, bins=100, color = 'orange', alpha=0.5)
for i in range(len(models)):
sns.lineplot(x=x, y=ufs_dict[u][f]['yarr'][i, :].T, color = 'orange', alpha = 0.1, ax=g.ax_joint, linewidth=0.5)
g.ax_joint.set(xlabel=f'{f}', ylabel='Utility')