Source code for smartgrid.rewards.reward_collection

"""
The RewardCollection is responsible for computing rewards from reward functions.
"""

from typing import List, Dict

from smartgrid.agents import Agent
from .reward import Reward



[docs]
class RewardCollection:
    """
    The RewardCollection holds all desired reward functions, and computes the rewards.

    This class allows for multi-objective reinforcement learning, by holding
    several reward functions, and returning dicts of rewards (names -> values),
    instead of using a single reward function.

    The multiple reward functions can be aggregated (scalarized) to adapt to
    single-objective learning algorithms, by using a
    :py:class:`~smartgrid.wrappers.reward_aggregator.RewardAggregator` wrapper
    over the environment.
    """


[docs]
    def __init__(self, rewards: List[Reward]):
        """
        Create a RewardCollection based on a list of "reward functions".

        :param rewards: The list of "reward functions" (actually instances of
            the :py:class:`~smartgrid.rewards.reward.Reward` class). This
            list must contain at least 1 element.
        """
        assert len(rewards) > 0, "You need to specify at least one Reward."
        self.rewards = rewards



[docs]
    def compute(self, world: 'World', agent: Agent) -> Dict[str, float]:
        """
        Compute the list of :py:class:`.Reward` for the Agent.

        :param world: reference on the world for global information.
        :param agent: reference on the agent for local information.

        :return: A dictionary mapping the rewards' name to their value, for
            each reward function in this Reward Collection.
        """
        to_return = {}
        for reward in self.rewards:
            to_return[reward.name] = reward.calculate(world, agent)

        return to_return



[docs]
    def reset(self):
        """
        Reset the reward functions.
        """
        for reward in self.rewards:
            reward.reset()


    def __repr__(self):
        rewards = ' ; '.join(map(str, self.rewards))
        return 'RewardCollection{' + rewards + '}'