Source code for ethicalgardeners.gardenersenv

"""
The GardenersEnv module provides the main simulation environment for the
Ethical Gardeners reinforcement learning platform.

This module implements the PettingZoo AECEnv interface, serving as the primary
entry point of the simulation. It coordinates all simulation components:

1. World representation and state management (:py:mod:`.gridworld`)
2. Agent actions and interactions (:py:class:`.ActionHandler`)
3. Observation generation (:py:mod:`.observation`)
4. Reward calculation (:py:class:`.RewardFunctions`)
5. Metrics tracking (:py:class:`.MetricsCollector`)
6. Visualization rendering (:py:mod:`.renderer`)

The environment is highly configurable through Hydra configuration files.
"""
import numpy as np
from pettingzoo import AECEnv
# import agent_selector or AgentSelector depending on python version
try:
    # Python 3.13+
    from pettingzoo.utils.agent_selector import (
        AgentSelector as agent_selector)
except ImportError:
    # Python 3.12 and below
    from pettingzoo.utils import agent_selector
from gymnasium.spaces import Discrete



[docs]
class GardenersEnv(AECEnv):
    """
        Main environment class implementing the PettingZoo AECEnv interface.

        This class orchestrates the entire Ethical Gardeners simulation.

        The environment is configured through a Hydra configuration object that
        specifies grid initialization parameters, agent settings, observation
        type, rendering options, and more.

        Attributes:
            metadata (dict): Environment metadata for PettingZoo compatibility.
            random_generator (:py:class:`numpy.random.RandomState`): Random
                number generator for reproducible experiments.
            grid_world (:py:class:`.GridWorld`): The simulated 2D grid world
                environment.
            prev_grid_world (:py:class:`.GridWorld`): Copy of the previous grid
                world state.
            action_enum (:py:class:`._ActionEnum`): Enumeration of possible
                actions in the environment.
            possible_agents (list): List of all agent IDs in the environment.
            agents (dict): Mapping from agent IDs to Agent objects.
            action_handler (:py:class:`.ActionHandler`): Handler for processing
                agent actions.
            observation_strategy (:py:class:`.ObservationStrategy`): Strategy
                for generating agent observations.
            reward_functions (:py:class:`.RewardFunctions`): Functions for
                calculating agent rewards.
            metrics_collector (:py:class:`.MetricsCollector`): Collector for
                simulation metrics.
            renderers (list): List of renderer objects for visualization.
            num_iter (int): Maximum number of iterations for the simulation.
            render_mode (str): Current rendering mode ('human' or 'none').
            observations (dict): Current observations for all agents.
            rewards (dict): Current rewards for all agents.
            terminations (dict): Terminal state flags for all agents.
            truncations (dict): Truncation flags for all agents.
            infos (dict): Additional information for all agents.
            num_moves (int): Current number of moves executed in the
                simulation.
            actions_in_current_turn (int): Number of actions taken in the
                current turn.
        """
    metadata = {
        'render_modes': ['human', 'none'],
        'name': "ethical_gardeners"
    }


[docs]
    def __init__(self, random_generator, grid_world, action_enum, num_iter,
                 render_mode, action_handler, observation_strategy,
                 reward_functions, metrics_collector, renderers):
        """
        Create the Ethical Gardeners environment.

        This method sets up the entire simulation environment based on the
        provided configuration.

        Args:
            random_generator (:py:class:`.numpy.random.RandomState`): Random
                number generator for reproducibility.
            grid_world (:py:class:`.GridWorld`): The grid world representing
                the simulation environment.
            action_enum (:py:class:`._ActionEnum`): Enumeration of possible
                actions in the environment.
            num_iter (int): Maximum number of iterations for the simulation.
            render_mode (str): Rendering mode for the environment ('human' or
                'none').
            action_handler (:py:class:`.ActionHandler`): Handler for processing
                agent actions.
            observation_strategy (:py:class:`.ObservationStrategy`): Strategy
                for generating agent observations.
            reward_functions (:py:class:`.RewardFunctions`): Functions for
                calculating agent rewards.
            metrics_collector (:py:class:`.MetricsCollector`): Collector for
                simulation metrics.
            renderers (list): List of renderer objects for visualization.
        """
        super().__init__()

        # Set random generator
        self.random_generator = random_generator

        # Set the grid world
        self.grid_world = grid_world

        # Set PettingZoo parameters
        self.num_iter = num_iter
        self.render_mode = render_mode
        self.possible_agents = [f"agent_{i}" for i in
                                range(len(self.grid_world.agents))]
        self.agents = {self.possible_agents[i]: self.grid_world.agents[i] for i
                       in range(len(self.grid_world.agents))}

        # Set environment components
        self.action_enum = action_enum
        self.action_handler = action_handler
        self.observation_strategy = observation_strategy
        self.reward_functions = reward_functions
        self.metrics_collector = metrics_collector
        self.renderers = renderers

        # Initialize renderers
        for renderer in self.renderers:
            renderer.init(self.grid_world)



[docs]
    def action_space(self, agent_id):
        """
        Return the action space for a specific agent.

        This method returns a Discrete space representing all possible actions
        the agent can take in the environment.

        Args:
            agent_id (str): The ID of the agent to get the action space for.

        Returns:
            gymnasium.spaces.Discrete: The action space for the specified
            agent.
        """
        return Discrete(len(self.action_enum))



[docs]
    def observation_space(self, agent_id):
        """
        Return the observation space for a specific agent.

        This method delegates to the observation strategy to return the
        appropriate observation space based on the configured observation type.

        Args:
            agent_id (str): The ID of the agent to get the observation space
                for.

        Returns:
            gymnasium.spaces.Space: The observation space for the specified
            agent.
        """
        agent = self.agents[agent_id]
        return self.observation_strategy.observation_space(agent)



[docs]
    def reset(self, seed=None, options=None):
        """
        Reset the environment to its initial state.

        This method resets the agent selector, metrics collector, move counter,
        and initializes the observations, rewards, terminations, truncations,
        and info dictionaries for all agents.

        Args:
            seed (int, optional): Random seed for environment initialization.
            options (dict, optional): Additional options for reset
                customization.

        Returns:
            tuple: A tuple containing:
                - observations (dict): Initial observations for all agents.
                - infos (dict): Additional information for all agents.
        """
        # Initialise the agent selector
        self._agent_selector = agent_selector(self.possible_agents)
        self.agent_selection = self._agent_selector.next()

        # Set the random generator if a seed is provided
        if seed is not None:
            self.random_generator = np.random.RandomState(seed)

        # Reset the grid world
        self.grid_world.reset(self.random_generator)

        # Reset the agents mapping with the new agents from the reset grid
        self.agents = {self.possible_agents[i]: self.grid_world.agents[i] for i
                       in range(len(self.grid_world.agents))}

        # Initialize renderers
        for renderer in self.renderers:
            renderer.init(self.grid_world)

        # Reset metrics
        self.metrics_collector.reset_metrics()

        # Reset move counter
        self.num_moves = 0

        # Reset the counter of actions in the turn
        self.actions_in_current_turn = 0

        # Save the previous grid world state for rewards calculation
        self.prev_grid_world = self.grid_world.copy()

        # Initialise needed data structures for all agents
        self.observations = {agent_id: None for agent_id in
                             self.possible_agents}
        self.rewards = {agent_id: 0 for agent_id in self.possible_agents}
        self.terminations = {agent_id: False for agent_id in
                             self.possible_agents}
        self.truncations = {agent_id: False for agent_id in
                            self.possible_agents}
        self.infos = {agent_id: {} for agent_id in self.possible_agents}

        # Update action masks for all agents
        for agent_id in self.possible_agents:
            self.action_handler.update_action_mask(self.agents[agent_id])

        # Initialise the observations for all agents
        for agent_id in self.possible_agents:
            self.observations[agent_id] = {
                "observation": self._get_observations(agent_id),
                "action_mask": self.agents[agent_id].action_mask
            }

        return self.observations, self.infos



[docs]
    def step(self, action: int):
        """
        Execute a step in the environment for the current agent.

        This method processes the action for the current agent, updates the
        environment state, calculates rewards, generates new observations,
        updates metrics, and selects the next agent to act.

        If all agents have taken an action in the current turn, it updates the
        environmental conditions (pollution, flower growth).

        Args:
            action (int): The action to take for the current agent.

        Returns:
            dict: The observation for the next agent to act.
        """
        if (self.truncations[self.agent_selection] or
                self.terminations[self.agent_selection]):
            self._was_dead_step(action)
            return

        agent_id = self.agent_selection
        agent = self.agents[agent_id]

        # Handle the action for the agent
        action_enum_value = list(self.action_enum)[action]
        self.action_handler.handle_action(agent, action_enum_value)

        # Increment action counter for the current turn
        self.actions_in_current_turn += 1

        # Count active agents (those that are not terminated or truncated)
        active_agents = sum(1 for a in self.possible_agents if
                            not (self.terminations[a] or self.truncations[a]))

        # Update pollution once all active agents have acted
        if self.actions_in_current_turn >= active_agents:
            self.grid_world.update_cell()
            self.actions_in_current_turn = 0

        # Update observation and action mask for all agents
        for ag_id in self.possible_agents:
            ag = self.agents[ag_id]
            self.action_handler.update_action_mask(ag)

            self.observations[ag_id] = {
                "observation": self._get_observations(ag_id),
                "action_mask": ag.action_mask
            }

        # Update the rewards, and info for the agent
        rewards = self._get_rewards(agent_id, action_enum_value)
        self.rewards[agent_id] = rewards['total']
        self.infos[agent_id] = self._get_info(agent_id, rewards)

        # Update metrics
        self.metrics_collector.update_metrics(
            self.grid_world,
            self.rewards,
            self.agent_selection
        )

        # Export and send metrics if configured
        self.metrics_collector.export_metrics()
        self.metrics_collector.send_metrics()

        # Save the current grid world state for the next step
        self.prev_grid_world = self.grid_world.copy()

        self.num_moves += 1

        # Check if the agent has reached a terminal state
        self.truncations = {agent: self.num_moves >= self.num_iter
                            for agent in self.possible_agents}

        # Check if the episode is done for all agents
        if all(self.terminations[agent] or self.truncations[agent]
               for agent in self.possible_agents):
            # Finalize metrics for the episode
            self.metrics_collector.finish_episode()

        # Selects the next agent
        self.agent_selection = self._agent_selector.next()

        self.render()

        return self.observe(self.agent_selection)



[docs]
    def observe(self, agent_id):
        """
        Return the current observation for a specific agent.

        Args:
            agent_id (str): The ID of the agent to get the observation for.

        Returns:
            dict: The observation for the specified agent, containing:
                - observation: The agent's view of the environment.
                - action_mask: Binary mask indicating valid actions.
        """
        return self.observations[agent_id]



[docs]
    def render(self):
        """
        Render the current state of the environment.

        This method uses all configured renderers to visualize the current
        state of the grid world and agents.
        """
        for renderer in self.renderers:
            renderer.render(self.grid_world, self.agents)

            if self.render_mode == "human":
                renderer.display_render()



[docs]
    def close(self):
        """
        Close the environment and clean up resources.

        This method finalizes all renderers and closes the metrics_collector.
        """
        for renderer in self.renderers:
            renderer.end_render()

        self.metrics_collector.close()



[docs]
    def _get_observations(self, agent_id):
        """
        Generate the observation for a specific agent.

        This method delegates to the observation strategy to generate the
        appropriate observation based on the agent's configured observation
        type.

        Args:
            agent_id (str): The ID of the agent to generate the observation
                for.

        Returns:
            object: The observation for the specified agent.
        """
        agent = self.agents[agent_id]
        return self.observation_strategy.get_observation(self.grid_world,
                                                         agent)



[docs]
    def _get_rewards(self, agent_id, action):
        """
        Calculate the rewards for a specific agent.

        This method delegates to the reward functions to calculate the
        appropriate rewards based on the agent's actions and changes in the
        environment.

        Args:
            agent_id (str): The ID of the agent to calculate rewards for.
            action (:py:class:`._ActionEnum`): The action taken by the agent.

        Returns:
            dict: Dictionary of reward components and total reward with the
            following keys:
                - 'total': The mono-objective reward for the agent. Computed
                  as the average of all reward components.
                - 'ecology': The ecological reward component.
                - 'wellbeing': The wellbeing reward component.
                - 'biodiversity': The biodiversity reward component.
        """
        # get the agent from its ID
        agent = self.agents[agent_id]

        # Compute the rewards
        rewards = self.reward_functions.compute_reward(
            self.prev_grid_world,
            self.grid_world,
            agent,
            action
        )

        return rewards



[docs]
    def _get_info(self, agent_id, rewards):
        """
        Generate additional information for a specific agent.

        This method creates a dictionary of additional information that is
        provided alongside the observation and reward.

        Args:
            agent_id (str): The ID of the agent to generate info for.
            rewards (dict): The reward components for the agent.

        Returns:
            dict: Additional information for the specified agent with the
            following keys:
                - 'rewards': The reward dict for the agent containing each
                  reward component and the total reward.
        """
        return {
            'rewards': rewards,
        }



[docs]
    def last(self):
        """
        Return the most recent environment step information.

        This method returns all relevant information about the most recent
        step taken by the current agent.

        Returns:
            tuple: A tuple containing:
                - observation (dict): The current observation. The dictionary
                  contains:

                  - observation (:py:class:`numpy.ndarray`): The agent's view
                    of the environment.
                  - action_mask (:py:class:`numpy.ndarray`): Binary mask
                    indicating valid actions.
                - reward (float): The most recent reward.
                - termination (bool): Whether the agent is in a terminal state.
                - truncation (bool): Whether the episode was truncated.
                - info (dict): Additional information about the agent. Refer to
                  :py:meth:`_get_info` for details on the returned value.
        """
        agent_id = self.agent_selection
        observation = self.observations[agent_id]
        reward = self.rewards[agent_id]
        termination = self.terminations[agent_id]
        truncation = self.truncations[agent_id]
        info = self.infos[agent_id]

        return observation, reward, termination, truncation, info



[docs]
    def _was_dead_step(self, action=None):
        """
        Handle a step for an agent that is already terminated or truncated.

        This method is called when an agent attempts to take an action after
        it has already reached a terminal state or the episode has been
        truncated. It assigns zero reward and selects the next agent.

        Args:
            action (int, optional): The action that was attempted.
        """
        agent_id = self.agent_selection
        self.rewards[agent_id] = 0
        self._agent_selector.next()