from explainable_rl.foundation.library import *
# Import functions
from explainable_rl.foundation.environment import MDP
[docs]class StrategicPricing(MDP):
"""Environment for Strategic Pricing."""
[docs] def __init__(self, dh, bins=None):
"""Initialise the Strategic Pricing MDP class.
Args:
dh (DataHandler): Data handler object.
"""
super().__init__(dh)
if bins is None:
bins = [10]
self.state_to_action = {}
self._state_mdp_data = None
self._action_mdp_data = None
self._reward_mdp_data = None
self._average_rewards = None
self.bins_dict = None
self.state_dim = self.dh.get_states().shape[1]
self.action_dim = len(self.dh.get_action_labels())
if len(bins) != self.state_dim + 1:
print(
"Warning: bins not equal to state_dim + 1. "
"Setting bins to [10] * (state_dim + 1)"
)
self.bins = [10] * (self.state_dim + 1)
else:
self.bins = bins
[docs] def initialise_env(self):
"""Create the environment given the MDP information."""
self._average_rewards = self._make_rewards_from_data()
[docs] def _join_state_action(self):
"""Join the state and action pairs together.
Returns:
list: Group of states and actions per datapoint.
"""
zipped = []
for i in range(len(self._reward_mdp_data)):
state_array = self._state_mdp_data[i].tolist()
action_array = self._action_mdp_data[i].tolist()
zipped.append(state_array + action_array)
return zipped
[docs] def _bin_state_action_space(self, zipped):
"""Bin the state-action pairs.
Args:
zipped (list): Group of states and actions per datapoint.
Returns:
np.array: Binned state-action pairs.
"""
return np.array(self.bin_states(zipped))
[docs] def bin_states(self, states, idxs=None):
""" Bin a list of states.
Args:
states (list[list]): State to bin.
idxs (list): indexes of the state dimensions. This argument can be used if the state list contains only
certain features (e.g. only actions).
Returns:
b_states (list): Binned state.
"""
b_states = []
for state in states:
b_states.append(self.bin_state(state, idxs=idxs))
return b_states
[docs] def debin_states(self, b_states, idxs=None):
""" Debin a list of binned states.
Args:
b_states (list[list]): Binned states to debin.
idxs (list): indexes of the state dimensions. This argument can be used
if the state list contains only certain features (e.g. only actions)
Returns:
states (list): Binned state.
"""
states = []
for b_state in b_states:
states.append(self._debin_state(b_state, idxs=idxs))
return states
[docs] def bin_state(self, state, idxs=None):
"""Bin a singular state.
The states are binned according to the number of bins
of each feature.
Args:
state (list): State to bin.
idxs (list): indexes of the state dimensions.
This argument can be used if the state list contains
only certain features (e.g. only actions).
Returns:
binned (list): Binned state.
"""
if idxs == None:
idxs = range(len(state))
binned = []
for i, value in zip(idxs, state):
binned.append(
np.digitize(
value,
[
n / self.bins[i] if n < self.bins[i] else 1.01
for n in range(1, self.bins[i] + 1)
],
)
)
return binned
[docs] def _debin_state(self, b_state, idxs=None):
"""Debin a singular states.
Args:
b_state (list): Binned state to de-bin.
Returns:
list: Debinned state.
"""
if idxs == None:
idxs = range(len(b_state))
state = []
for i, value in zip(idxs, b_state):
# Append middle point of the state bin
try:
state.append((value + 0.5) / self.bins[i])
except:
ipdb.set_trace()
return state
[docs] def _get_counts_and_rewards_per_bin(self, binned):
"""Create a dictionary of counts of datapoints per bin and sum the associated rewards.
Args:
binned (np.array): Binned state-action pairs.
Returns:
dict: Counts of datapoints per bin and sums the associated rewards.
"""
raise NotImplementedError
[docs] def _create_average_reward_matrix(self, bins_dict):
"""Create a sparse matrix of the state-action pairs and associated rewards from the inputted dataset.
Args:
bins_dict (dict): Dictionary of counts of datapoints per bin and sum of the associated rewards.
Returns:
sparse.COO: Sparse matrix of binned state-action pairs and their associated average reward.
"""
raise NotImplementedError
[docs] def _make_rewards_from_data(self):
"""Create sparse matrix of the state-action pairs and associated rewards from the inputted dataset.
Returns:
sparse.COO: Sparse matrix of binned state-action pairs and their associate average reward.
"""
raise NotImplementedError
[docs] def reset(self):
"""Reset environment.
Returns:
list: Randomised initial state.
"""
sample_ix_point = np.random.choice(np.arange(len(self._state_mdp_data)))
state = self._state_mdp_data[sample_ix_point].tolist()
binned_state = self.bin_state(state)
return binned_state
[docs] def _get_state_to_action(self, binned):
"""Create a dictionary of states and their associated actions.
Args:
binned (np.array): Binned state-action pairs.
Returns:
state_to_action (dict): States and their associated actions.
"""
state_to_action = {}
final_dim = binned.shape[1] - 1
binned_df = pd.DataFrame(binned)
binned_df[final_dim] = binned_df[final_dim].apply(lambda x: [x])
group_by_inds = [i for i in range(final_dim)]
binned_df = (
binned_df.groupby(group_by_inds).sum(numeric_only=False).reset_index()
)
binned_df[final_dim] = binned_df[final_dim].apply(lambda x: set(x))
binned = np.array(binned_df)
for ix, bin in enumerate(binned):
state = ",".join(str(e) for e in bin[:-1])
state_to_action[state] = bin[-1]
return state_to_action
[docs] def step(self, state, action):
"""Take a step in the environment.
Args:
state (list): Current state values of the agent.
action (int): Action for agent to take.
"""
raise NotImplementedError