Source code for smartgrid.agents.agent

"""
This module contains the Agent class, its state (AgentState) and Action.
"""

import math
from collections import namedtuple

import numpy as np

from .profile import AgentProfile
from smartgrid.util.bounded import (increase_bounded, decrease_bounded)



[docs]
class AgentState(object):
    """
    The (mutable) state of an Agent.
    """

    comfort: float
    """
    The agent's current comfort, a float that *should* be in [0,1].
    """

    payoff: float
    """
    The agent's current payoff, i.e., the cumulated sum of benefits and losses.
    
    .. note::
        The payoff should be within the 
        :py:attr:`smartgrid.agents.agent.Agent.payoff_range` .
    """

    storage: float
    """
    The agent's current amount of energy stored in its personal battery.
    """

    need: float
    """
    The agent's current need, i.e., energy that it would like to consume.
    """

    production: float
    """
    The agent's energy produced at the current step, and put in its storage.
    
    .. note::
        As the storage is limited, it may happen that the difference between
        the new storage and the storage at the previous step is smaller than
        the `production`.
    """

    previous_storage: float
    """
    The previous amount of energy stored, at the previous time step.
    """


[docs]
    def __init__(self):
        self.comfort = 0
        self.payoff = 0
        self.storage = 0
        self.need = 0
        self.production = 0
        self.previous_storage = 0


    def __repr__(self):
        return '<AgentState comfort={} payoff={} storage={} need={} production={} previous_storage={}' \
            .format(self.comfort, self.payoff, self.storage, self.need, self.production, self.previous_storage)

    def reset(self):
        self.__init__()



Action = namedtuple('Action', [
    'grid_consumption',
    'storage_consumption',
    'store_energy',
    'give_energy',
    'buy_energy',
    'sell_energy'
])
Action.__doc__ = """
    An immutable (named) tuple containing action parameters.
    
    Actions may be either *intended* (i.e., what the learning algorithm or
    agent's policy would like to do) or *enacted* (i.e., what truly happened
    considering the physical constraints of the world).
"""



[docs]
class Agent(object):
    """
    An agent represents a physical entity in the world.

    It contains:
     - a (unique) name for identifying the agent;
     - a (current) state;
     - an intended action for a step, what the Agent wanted to do;
     - an enacted action for a step, what the Agent really did;
     - an agent profile, the common characteristics shared by multiple agents.
    """

    name: str
    state: AgentState
    intended_action: Action
    enacted_action: Action
    profile: AgentProfile

    # The range in which the 'payoff' can be.
    payoff_range = (-10_000, +10_000)


[docs]
    def __init__(self,
                 name: str,
                 profile: AgentProfile,
                 ):
        # Constant attributes
        self.name = name

        # Agent profile (contains "callbacks" to compute needs, productions, ...)
        self.profile = profile
        # The agent's (current) state, updated every time step in `update`.
        self.state = AgentState()

        # Note: the (intended/enacted) actions are initialized in `reset` to
        # avoid setting them twice.


[docs]
    def increase_storage(self, amount: float) -> (float, float, float):
        """
        Function for adding some energy in the storage.

        :param amount: energy for charging the battery.

        :returns: a tuple of float with the quantity in the battery,
            the energy added and the energy that cannot be stocked.
        """
        new, added, overhead = increase_bounded(self.state.storage,
                                                amount,
                                                self.profile.max_storage)
        self.state.storage = new
        return new, added, overhead



[docs]
    def decrease_storage(self, amount: float) -> (float, float, float):
        """
        Function for adding some energy in the storage.

        :param amount: energy for charging the battery.

        :returns: a tuple of float with the quantity in the battery, the energy
            took and the energy that was missing.
        """
        new, subtracted, missing = decrease_bounded(self.state.storage,
                                                    amount,
                                                    0)
        self.state.storage = new
        return new, subtracted, missing



[docs]
    def update(self, step: int) -> None:
        """
        Update the agent's current state (production, need, storage, comfort).

        :param step: The current time step.
        """
        # Compute comfort (using the previous need)
        consumption = self.enacted_action.grid_consumption + self.enacted_action.storage_consumption
        self.state.comfort = self.profile.comfort_fn(consumption, self.state.need)

        # Compute a new need
        self.state.need = self.profile.need(step)

        # Compute a new production and increase the storage accordingly
        self.state.production = self.profile.production(step)
        self.increase_storage(self.state.production)


    def reset(self):
        # Reset all state values to 0
        self.state.reset()

        # Create a fake action (0 for all parameters)
        self.intended_action = Action(*[0.0] * len(Action._fields))
        self.enacted_action = self.intended_action

        # Update state for the 1st step (need, production, storage, ...)
        # Note that comfort will most likely remain at 0 since action does nothing
        self.update(0)

    @property
    def need(self):
        return self.state.need

    @property
    def production(self):
        return self.state.production

    @property
    def comfort(self):
        return self.state.comfort

    @property
    def storage_ratio(self) -> float:
        """Return the current storage quantity over its capacity (in [0,1])."""
        return self.state.storage / (self.profile.max_storage + 10E-300)

    @property
    def payoff_ratio(self) -> float:
        """Return the current payoff scaled to [0,1]."""
        return np.interp(self.state.payoff, self.payoff_range, (0, 1))

    def __str__(self):
        return '<Agent {}>'.format(self.name)

    __repr__ = __str__


[docs]
    def handle_action(self) -> Action:
        """
        Perform the intended action and transform it into the enacted action.

        The *intended* action represents the action the agent intends to do,
        if possible, but it may happen that, due to some constraint, e.g.,
        battery capacity, it is not possible as-is.
        This method thus transforms the *intended* action into an *enacted*
        action, taking into account these constraints, and updates the Agent's
        state according to the *enacted* action.

        :return: The *enacted* action that truly happened, after the Agent's
            state was updated.
        """
        # Remember the storage before we go on to the next time step
        self.state.previous_storage = self.state.storage
        # Temporary storage (without upper limit, but still a lower ;
        # we consider that energy is exchanged more or less at the
        # same instant). This allows, e.g., to buy more energy than the capacity
        # allows, and to instantly consume this energy.
        # For example, assuming that the max storage is 500Wh, we can buy 1000Wh,
        # immediately consume 500Wh, give 150Wh, and store the remaining 350Wh.
        action = self.intended_action
        new_storage = self.state.storage

        # 1. Agent buys energy
        # (can be limited by the current payoff)
        rate = 0.1
        price = math.ceil(rate * action.buy_energy)
        # limit price by current payoff
        self.state.payoff, price, _ = decrease_bounded(self.state.payoff,
                                                       price,
                                                       self.payoff_range[0])
        # actually bought quantity
        bought = int(math.floor(price / rate))
        new_storage += bought

        # 2. Agent stores energy
        # (agent can store as much as desired)
        new_storage += action.store_energy

        # 3. Agent sells energy
        # (can be limited by the current storage, including bought and stored)
        # Note: agent could sell for more than it can really gain, because the
        # payoff is bounded. In this case, the money is "lost".
        rate = 0.1
        new_storage, sold, _ = decrease_bounded(new_storage,
                                                action.sell_energy,
                                                0)
        price = math.floor(rate * sold)
        self.state.payoff, _, _ = increase_bounded(self.state.payoff,
                                                   price,
                                                   self.payoff_range[1])

        # 4. Agent consumes from storage
        # (can be limited by the storage)
        new_storage, storage_consumed, _ = decrease_bounded(new_storage,
                                                            action.storage_consumption,
                                                            0)

        # 5. Agent gives to the grid
        # (can be limited by the storage)
        new_storage, given, _ = decrease_bounded(new_storage,
                                                 action.give_energy,
                                                 0)

        # 6. Agent consumes from the grid
        # (we assume that agent can consume as much as wanted)
        grid_consumed = action.grid_consumption

        # At this point, the new storage can be greater than the capacity
        # (overflow). This is by design (see comment at the top of the method),
        # but we now need to fix this. Possible ways include:
        # - Overflow energy could be wasted ("disappear") => not realistic.
        # - Reduce the quantity of stored energy, and/or bought energy => which
        #   should we reduce first? Is it possible that we cannot reduce them
        #   enough? What happens in this case?
        # - Increase the storage consumption => seems more realistic (energy
        #   *must* be consumed), and easier to implement than reducing other
        #   parameters. => This is the chosen way to deal with overflow.
        if new_storage > self.profile.max_storage:
            storage_consumed += (new_storage - self.profile.max_storage)
            new_storage = self.profile.max_storage

        # Set the new storage
        self.state.storage = new_storage

        # We return the actually performed action (after application of
        # constraints), so we can log it
        action_enacted = Action(
            grid_consumption=float(grid_consumed),
            storage_consumption=float(storage_consumed),
            store_energy=float(action.store_energy),
            give_energy=float(given),
            buy_energy=float(bought),
            sell_energy=float(sold)
        )
        return action_enacted