Source code for explainable_rl.evaluation.evaluator

from explainable_rl.foundation.library import *

# Import functions
from explainable_rl.foundation.engine import Engine


[docs]class Evaluator: """Evaluator class which evaluates a list of trained agents and produces RL evaluation graphs."""
[docs] def __init__(self, engines): """Initialise the Evaluator. Args: engines (Engine or List[Engine]): one or list of trained engine """ self.engines = engines self.eval_results = [] # get evaluation data self._get_evaluation_results()
[docs] def hist_cum_rewards(self): """Calculate the cumulative historical rewards on test set. Returns: hist_cum_rewards (float): total reward on test set using historical policy. """ rewards = self.hist_array_rewards() return [np.sum(r) for r in rewards]
[docs] def agent_cum_rewards(self): """Calculate the cumulative agent rewards on test set. Returns: agent_cum_rewards (float): total reward on test set using historical policy. """ rewards = self.agent_array_rewards() return [np.sum(r) for r in rewards]
[docs] def hist_array_rewards(self): """Calculate the individual historical rewards for each test set sample. Returns: hist_array_rewards (List[float]): array of historical rewards on test set. """ rewards = [ np.array([r[0] for r in eval_dict["rewards_hist"]]) for eval_dict in self.eval_results ] return rewards
[docs] def agent_array_rewards(self): """Calculate the individual agent rewards for each test set sample. Returns: agent_array_rewards (List[float]): array of agent rewards on test set. """ rewards = [ np.array([r[0] for r in eval_dict["rewards_agent"]]) for eval_dict in self.eval_results ] return rewards
[docs] def _get_evaluation_results(self): """Evaluate the engines on the test set. This method fills in self.eval_results, which is a list of dictionaries containing all the relevant evaluation metrics. """ for engine in self.engines: eval_dict = {} # Save training results eval_dict["agent_cumrewards"] = engine.eval_agent_rewards eval_dict["hist_cumrewards"] = engine.eval_hist_rewards # Get test data from data handler states = engine.dh.get_states(split="test").to_numpy().tolist() actions = engine.dh.get_actions(split="test").to_numpy().tolist() rewards = engine.dh.get_rewards(split="test").to_numpy().tolist() # Get state and action indexes state_dims = list(range(engine.env.state_dim)) action_dims = list( range( engine.env.state_dim, engine.env.state_dim + engine.env.action_dim ) ) # Get the binned states b_states = engine.env.bin_states(states, idxs=state_dims) # Inverse scaling states = engine.inverse_scale_feature(states, engine.dh.state_labels) # Get the binned actions b_actions = engine.env.bin_states(actions, idxs=action_dims) # Get actions corresponding to agent's learned policy b_actions_agent = engine.agent.predict_actions(b_states) # De-bin the recommended actions actions_agent = engine.env.debin_states(b_actions_agent, idxs=action_dims) # Get reward based on agent policy rewards_agent = engine.agent.predict_rewards(b_states, b_actions_agent) # Get reward based on historical policy rewards_hist = engine.agent.predict_rewards(b_states, b_actions) # Apply inverse scaling to actions, states, and rewards eval_dict["states"] = engine.inverse_scale_feature( states, engine.dh.state_labels ) eval_dict["actions_hist"] = engine.inverse_scale_feature( actions, engine.dh.action_labels ) eval_dict["actions_agent"] = engine.inverse_scale_feature( actions_agent, engine.dh.action_labels ) eval_dict["rewards_hist"] = engine.inverse_scale_feature( rewards_hist, engine.dh.reward_labels ) eval_dict["rewards_agent"] = engine.inverse_scale_feature( rewards_agent, engine.dh.reward_labels ) # Save additional arrays eval_dict["b_actions"] = b_actions eval_dict["b_actions_agent"] = b_actions_agent eval_dict["agent_type"] = engine.dh.hyperparam_dict["agent"]["agent_type"] eval_dict["num_eval_steps"] = engine.dh.hyperparam_dict["training"][ "num_eval_steps" ] self.eval_results.append(eval_dict)
[docs] def plot_training_curve(self): """Plot the training reward for a list of runs. """ n_eval_steps = self.eval_results[0]["num_eval_steps"] train_agent_reward = [] train_hist_reward = [] for eval_dict in self.eval_results: agent = eval_dict["agent_type"] train_agent_reward.extend( [ [ agent, episode * n_eval_steps, eval_dict["agent_cumrewards"][episode], ] for episode in range(len(eval_dict["agent_cumrewards"])) ] ) train_hist_reward.extend( [ ["historical", episode * n_eval_steps, eval_dict["hist_cumrewards"]] for episode in range(len(eval_dict["agent_cumrewards"])) ] ) train_agent_reward_df = pd.DataFrame( train_agent_reward, columns=["agent", "episode", "cumulative reward"] ) train_hist_reward_df = pd.DataFrame( train_hist_reward, columns=["agent", "episode", "cumulative reward"] ) palette = { "historical": "C0", "q_learner": "C1", "double_q_learner": "C2", "sarsa": "C3", "sarsa_lambda": "C4", } sns.lineplot( data=train_agent_reward_df, x="episode", y="cumulative reward", hue="agent", palette=palette, ) sns.lineplot( data=train_hist_reward_df, x="episode", y="cumulative reward", hue="agent", palette=palette, ) plt.title("Cumulative Reward (Evaluation Set)") plt.grid() plt.savefig("cumulative.png")
[docs] def plot_reward_distribution(self): """Plot the distribution of rewards on the evaluation set. """ percentiles = np.linspace(0, 100, 101) rewards_agent, rewards_hist = [], [] for eval_dict in self.eval_results: agent = eval_dict["agent_type"] rewards_agent_array = [r[0] for r in eval_dict["rewards_agent"]] agent_percentiles = np.percentile(rewards_agent_array, q=percentiles) rewards_hist_array = [r[0] for r in eval_dict["rewards_hist"]] hist_percentiles = np.percentile(rewards_hist_array, q=percentiles) rewards_agent.extend( [ [agent, percentiles[p], agent_percentiles[p]] for p in range(percentiles.shape[0]) ] ) rewards_hist.extend( [ ["historical", percentiles[p], hist_percentiles[p]] for p in range(percentiles.shape[0]) ] ) rewards_agent_df = pd.DataFrame( rewards_agent, columns=["agent", "percentile", "reward"] ) rewards_hist_df = pd.DataFrame( rewards_hist, columns=["agent", "percentile", "reward"] ) palette = { "historical": "C0", "q_learner": "C1", "double_q_learner": "C2", "sarsa": "C3", "sarsa_lambda": "C4", } sns.lineplot( data=rewards_agent_df, x="percentile", y="reward", hue="agent", palette=palette, ) sns.lineplot( data=rewards_hist_df, x="percentile", y="reward", hue="agent", palette=palette, ) plt.title("Reward Percentiles (Evaluation Set)") plt.grid() plt.savefig("percentiles.png")