Source code for explainable_rl.environments.strategic_pricing

from explainable_rl.foundation.library import *

# Import functions
from explainable_rl.foundation.environment import MDP


[docs]class StrategicPricing(MDP):
    """Environment for Strategic Pricing."""

[docs]    def __init__(self, dh, bins=None):
        """Initialise the Strategic Pricing MDP class.

        Args:
            dh (DataHandler): Data handler object.
        """
        super().__init__(dh)

        if bins is None:
            bins = [10]
        self.state_to_action = {}

        self._state_mdp_data = None
        self._action_mdp_data = None
        self._reward_mdp_data = None
        self._average_rewards = None
        self.bins_dict = None

        self.state_dim = self.dh.get_states().shape[1]
        self.action_dim = len(self.dh.get_action_labels())

        if len(bins) != self.state_dim + 1:
            print(
                "Warning: bins not equal to state_dim + 1. "
                "Setting bins to [10] * (state_dim + 1)"
            )
            self.bins = [10] * (self.state_dim + 1)
        else:
            self.bins = bins

[docs]    def initialise_env(self):
        """Create the environment given the MDP information."""
        self._average_rewards = self._make_rewards_from_data()

[docs]    def _transform_df_to_numpy(self):
        """Transform the MDP data from a dataframe to a numpy array."""
        raise NotImplementedError

[docs]    def _join_state_action(self):
        """Join the state and action pairs together.

        Returns:
            list: Group of states and actions per datapoint.
        """
        zipped = []
        for i in range(len(self._reward_mdp_data)):
            state_array = self._state_mdp_data[i].tolist()
            action_array = self._action_mdp_data[i].tolist()
            zipped.append(state_array + action_array)

        return zipped

[docs]    def _bin_state_action_space(self, zipped):
        """Bin the state-action pairs.

        Args:
            zipped (list): Group of states and actions per datapoint.

        Returns:
            np.array: Binned state-action pairs.
        """
        return np.array(self.bin_states(zipped))

[docs]    def bin_states(self, states, idxs=None):
        """ Bin a list of states.

        Args:
            states (list[list]): State to bin.
            idxs (list): indexes of the state dimensions. This argument can be used if the state list contains only
                certain features (e.g. only actions).

        Returns:
            b_states (list): Binned state.
        """
        b_states = []
        for state in states:
            b_states.append(self.bin_state(state, idxs=idxs))
        return b_states

[docs]    def debin_states(self, b_states, idxs=None):
        """ Debin a list of binned states.

        Args:
            b_states (list[list]): Binned states to debin.
            idxs (list): indexes of the state dimensions. This argument can be used
                if the state list contains only certain features (e.g. only actions)

        Returns:
            states (list): Binned state.
        """
        states = []
        for b_state in b_states:
            states.append(self._debin_state(b_state, idxs=idxs))
        return states

[docs]    def bin_state(self, state, idxs=None):
        """Bin a singular state.

        The states are binned according to the number of bins
        of each feature.

        Args:
            state (list): State to bin.
            idxs (list): indexes of the state dimensions.
                This argument can be used if the state list contains
                only certain features (e.g. only actions).

        Returns:
            binned (list): Binned state.

        """
        if idxs == None:
            idxs = range(len(state))

        binned = []
        for i, value in zip(idxs, state):
            binned.append(
                np.digitize(
                    value,
                    [
                        n / self.bins[i] if n < self.bins[i] else 1.01
                        for n in range(1, self.bins[i] + 1)
                    ],
                )
            )
        return binned

[docs]    def _debin_state(self, b_state, idxs=None):
        """Debin a singular states.

        Args:
            b_state (list): Binned state to de-bin.

        Returns:
            list: Debinned state.
        """
        if idxs == None:
            idxs = range(len(b_state))

        state = []
        for i, value in zip(idxs, b_state):
            # Append middle point of the state bin
            try:
                state.append((value + 0.5) / self.bins[i])
            except:
                ipdb.set_trace()
        return state

[docs]    def _get_counts_and_rewards_per_bin(self, binned):
        """Create a dictionary of counts of datapoints per bin and sum the associated rewards.

        Args:
            binned (np.array): Binned state-action pairs.
        Returns:
            dict: Counts of datapoints per bin and sums the associated rewards.
        """
        raise NotImplementedError

[docs]    def _create_average_reward_matrix(self, bins_dict):
        """Create a sparse matrix of the state-action pairs and associated rewards from the inputted dataset.

        Args:
            bins_dict (dict): Dictionary of counts of datapoints per bin and sum of the associated rewards.

        Returns:
            sparse.COO: Sparse matrix of binned state-action pairs and their associated average reward.
        """

        raise NotImplementedError

[docs]    def _make_rewards_from_data(self):
        """Create sparse matrix of the state-action pairs and associated rewards from the inputted dataset.

        Returns:
            sparse.COO: Sparse matrix of binned state-action pairs and their associate average reward.
        """
        raise NotImplementedError

[docs]    def reset(self):
        """Reset environment.

        Returns:
            list: Randomised initial state.
        """
        sample_ix_point = np.random.choice(np.arange(len(self._state_mdp_data)))
        state = self._state_mdp_data[sample_ix_point].tolist()
        binned_state = self.bin_state(state)
        return binned_state

[docs]    def _get_state_to_action(self, binned):
        """Create a dictionary of states and their associated actions.

        Args:
            binned (np.array): Binned state-action pairs.
        Returns:
            state_to_action (dict): States and their associated actions.
        """
        state_to_action = {}
        final_dim = binned.shape[1] - 1
        binned_df = pd.DataFrame(binned)
        binned_df[final_dim] = binned_df[final_dim].apply(lambda x: [x])
        group_by_inds = [i for i in range(final_dim)]
        binned_df = (
            binned_df.groupby(group_by_inds).sum(numeric_only=False).reset_index()
        )
        binned_df[final_dim] = binned_df[final_dim].apply(lambda x: set(x))
        binned = np.array(binned_df)
        for ix, bin in enumerate(binned):
            state = ",".join(str(e) for e in bin[:-1])
            state_to_action[state] = bin[-1]

        return state_to_action

[docs]    def step(self, state, action):
        """Take a step in the environment.

        Args:
            state (list): Current state values of the agent.
            action (int): Action for agent to take.
        """
        raise NotImplementedError