Source code for ethicalgardeners.gardenersenv

"""
The GardenersEnv module provides the main simulation environment for the
Ethical Gardeners reinforcement learning platform.

This module implements the PettingZoo AECEnv interface, serving as the primary
entry point of the simulation. It coordinates all simulation components:

1. World representation and state management (:py:mod:`.gridworld`)
2. Agent actions and interactions (:py:class:`.ActionHandler`)
3. Observation generation (:py:mod:`.observation`)
4. Reward calculation (:py:class:`.RewardFunctions`)
5. Metrics tracking (:py:class:`.MetricsCollector`)
6. Visualization rendering (:py:mod:`.renderer`)

The environment is highly configurable through Hydra configuration files.
"""
import numpy as np
from pettingzoo import AECEnv
# import agent_selector or AgentSelector depending on python version
try:
    # Python 3.13+
    from pettingzoo.utils.agent_selector import (
        AgentSelector as agent_selector)
except ImportError:
    # Python 3.12 and below
    from pettingzoo.utils import agent_selector
from gymnasium.spaces import Discrete


[docs] class GardenersEnv(AECEnv): """ Main environment class implementing the PettingZoo AECEnv interface. This class orchestrates the entire Ethical Gardeners simulation. The environment is configured through a Hydra configuration object that specifies grid initialization parameters, agent settings, observation type, rendering options, and more. Attributes: metadata (dict): Environment metadata for PettingZoo compatibility. random_generator (:py:class:`numpy.random.RandomState`): Random number generator for reproducible experiments. grid_world (:py:class:`.GridWorld`): The simulated 2D grid world environment. prev_grid_world (:py:class:`.GridWorld`): Copy of the previous grid world state. action_enum (:py:class:`._ActionEnum`): Enumeration of possible actions in the environment. possible_agents (list): List of all agent IDs in the environment. agents (dict): Mapping from agent IDs to Agent objects. action_handler (:py:class:`.ActionHandler`): Handler for processing agent actions. observation_strategy (:py:class:`.ObservationStrategy`): Strategy for generating agent observations. reward_functions (:py:class:`.RewardFunctions`): Functions for calculating agent rewards. metrics_collector (:py:class:`.MetricsCollector`): Collector for simulation metrics. renderers (list): List of renderer objects for visualization. num_iter (int): Maximum number of iterations for the simulation. render_mode (str): Current rendering mode ('human' or 'none'). observations (dict): Current observations for all agents. rewards (dict): Current rewards for all agents. terminations (dict): Terminal state flags for all agents. truncations (dict): Truncation flags for all agents. infos (dict): Additional information for all agents. num_moves (int): Current number of moves executed in the simulation. actions_in_current_turn (int): Number of actions taken in the current turn. """ metadata = { 'render_modes': ['human', 'none'], 'name': "ethical_gardeners" }
[docs] def __init__(self, random_generator, grid_world, action_enum, num_iter, render_mode, action_handler, observation_strategy, reward_functions, metrics_collector, renderers): """ Create the Ethical Gardeners environment. This method sets up the entire simulation environment based on the provided configuration. Args: random_generator (:py:class:`.numpy.random.RandomState`): Random number generator for reproducibility. grid_world (:py:class:`.GridWorld`): The grid world representing the simulation environment. action_enum (:py:class:`._ActionEnum`): Enumeration of possible actions in the environment. num_iter (int): Maximum number of iterations for the simulation. render_mode (str): Rendering mode for the environment ('human' or 'none'). action_handler (:py:class:`.ActionHandler`): Handler for processing agent actions. observation_strategy (:py:class:`.ObservationStrategy`): Strategy for generating agent observations. reward_functions (:py:class:`.RewardFunctions`): Functions for calculating agent rewards. metrics_collector (:py:class:`.MetricsCollector`): Collector for simulation metrics. renderers (list): List of renderer objects for visualization. """ super().__init__() # Set random generator self.random_generator = random_generator # Set the grid world self.grid_world = grid_world # Set PettingZoo parameters self.num_iter = num_iter self.render_mode = render_mode self.possible_agents = [f"agent_{i}" for i in range(len(self.grid_world.agents))] self.agents = {self.possible_agents[i]: self.grid_world.agents[i] for i in range(len(self.grid_world.agents))} # Set environment components self.action_enum = action_enum self.action_handler = action_handler self.observation_strategy = observation_strategy self.reward_functions = reward_functions self.metrics_collector = metrics_collector self.renderers = renderers # Initialize renderers for renderer in self.renderers: renderer.init(self.grid_world)
[docs] def action_space(self, agent_id): """ Return the action space for a specific agent. This method returns a Discrete space representing all possible actions the agent can take in the environment. Args: agent_id (str): The ID of the agent to get the action space for. Returns: gymnasium.spaces.Discrete: The action space for the specified agent. """ return Discrete(len(self.action_enum))
[docs] def observation_space(self, agent_id): """ Return the observation space for a specific agent. This method delegates to the observation strategy to return the appropriate observation space based on the configured observation type. Args: agent_id (str): The ID of the agent to get the observation space for. Returns: gymnasium.spaces.Space: The observation space for the specified agent. """ agent = self.agents[agent_id] return self.observation_strategy.observation_space(agent)
[docs] def reset(self, seed=None, options=None): """ Reset the environment to its initial state. This method resets the agent selector, metrics collector, move counter, and initializes the observations, rewards, terminations, truncations, and info dictionaries for all agents. Args: seed (int, optional): Random seed for environment initialization. options (dict, optional): Additional options for reset customization. Returns: tuple: A tuple containing: - observations (dict): Initial observations for all agents. - infos (dict): Additional information for all agents. """ # Initialise the agent selector self._agent_selector = agent_selector(self.possible_agents) self.agent_selection = self._agent_selector.next() # Set the random generator if a seed is provided if seed is not None: self.random_generator = np.random.RandomState(seed) # Reset the grid world self.grid_world.reset(self.random_generator) # Reset the agents mapping with the new agents from the reset grid self.agents = {self.possible_agents[i]: self.grid_world.agents[i] for i in range(len(self.grid_world.agents))} # Initialize renderers for renderer in self.renderers: renderer.init(self.grid_world) # Reset metrics self.metrics_collector.reset_metrics() # Reset move counter self.num_moves = 0 # Reset the counter of actions in the turn self.actions_in_current_turn = 0 # Save the previous grid world state for rewards calculation self.prev_grid_world = self.grid_world.copy() # Initialise needed data structures for all agents self.observations = {agent_id: None for agent_id in self.possible_agents} self.rewards = {agent_id: 0 for agent_id in self.possible_agents} self.terminations = {agent_id: False for agent_id in self.possible_agents} self.truncations = {agent_id: False for agent_id in self.possible_agents} self.infos = {agent_id: {} for agent_id in self.possible_agents} # Update action masks for all agents for agent_id in self.possible_agents: self.action_handler.update_action_mask(self.agents[agent_id]) # Initialise the observations for all agents for agent_id in self.possible_agents: self.observations[agent_id] = { "observation": self._get_observations(agent_id), "action_mask": self.agents[agent_id].action_mask } return self.observations, self.infos
[docs] def step(self, action: int): """ Execute a step in the environment for the current agent. This method processes the action for the current agent, updates the environment state, calculates rewards, generates new observations, updates metrics, and selects the next agent to act. If all agents have taken an action in the current turn, it updates the environmental conditions (pollution, flower growth). Args: action (int): The action to take for the current agent. Returns: dict: The observation for the next agent to act. """ if (self.truncations[self.agent_selection] or self.terminations[self.agent_selection]): self._was_dead_step(action) return agent_id = self.agent_selection agent = self.agents[agent_id] # Handle the action for the agent action_enum_value = list(self.action_enum)[action] self.action_handler.handle_action(agent, action_enum_value) # Increment action counter for the current turn self.actions_in_current_turn += 1 # Count active agents (those that are not terminated or truncated) active_agents = sum(1 for a in self.possible_agents if not (self.terminations[a] or self.truncations[a])) # Update pollution once all active agents have acted if self.actions_in_current_turn >= active_agents: self.grid_world.update_cell() self.actions_in_current_turn = 0 # Update observation and action mask for all agents for ag_id in self.possible_agents: ag = self.agents[ag_id] self.action_handler.update_action_mask(ag) self.observations[ag_id] = { "observation": self._get_observations(ag_id), "action_mask": ag.action_mask } # Update the rewards, and info for the agent rewards = self._get_rewards(agent_id, action_enum_value) self.rewards[agent_id] = rewards['total'] self.infos[agent_id] = self._get_info(agent_id, rewards) # Update metrics self.metrics_collector.update_metrics( self.grid_world, self.rewards, self.agent_selection ) # Export and send metrics if configured self.metrics_collector.export_metrics() self.metrics_collector.send_metrics() # Save the current grid world state for the next step self.prev_grid_world = self.grid_world.copy() self.num_moves += 1 # Check if the agent has reached a terminal state self.truncations = {agent: self.num_moves >= self.num_iter for agent in self.possible_agents} # Check if the episode is done for all agents if all(self.terminations[agent] or self.truncations[agent] for agent in self.possible_agents): # Finalize metrics for the episode self.metrics_collector.finish_episode() # Selects the next agent self.agent_selection = self._agent_selector.next() self.render() return self.observe(self.agent_selection)
[docs] def observe(self, agent_id): """ Return the current observation for a specific agent. Args: agent_id (str): The ID of the agent to get the observation for. Returns: dict: The observation for the specified agent, containing: - observation: The agent's view of the environment. - action_mask: Binary mask indicating valid actions. """ return self.observations[agent_id]
[docs] def render(self): """ Render the current state of the environment. This method uses all configured renderers to visualize the current state of the grid world and agents. """ for renderer in self.renderers: renderer.render(self.grid_world, self.agents) if self.render_mode == "human": renderer.display_render()
[docs] def close(self): """ Close the environment and clean up resources. This method finalizes all renderers and closes the metrics_collector. """ for renderer in self.renderers: renderer.end_render() self.metrics_collector.close()
[docs] def _get_observations(self, agent_id): """ Generate the observation for a specific agent. This method delegates to the observation strategy to generate the appropriate observation based on the agent's configured observation type. Args: agent_id (str): The ID of the agent to generate the observation for. Returns: object: The observation for the specified agent. """ agent = self.agents[agent_id] return self.observation_strategy.get_observation(self.grid_world, agent)
[docs] def _get_rewards(self, agent_id, action): """ Calculate the rewards for a specific agent. This method delegates to the reward functions to calculate the appropriate rewards based on the agent's actions and changes in the environment. Args: agent_id (str): The ID of the agent to calculate rewards for. action (:py:class:`._ActionEnum`): The action taken by the agent. Returns: dict: Dictionary of reward components and total reward with the following keys: - 'total': The mono-objective reward for the agent. Computed as the average of all reward components. - 'ecology': The ecological reward component. - 'wellbeing': The wellbeing reward component. - 'biodiversity': The biodiversity reward component. """ # get the agent from its ID agent = self.agents[agent_id] # Compute the rewards rewards = self.reward_functions.compute_reward( self.prev_grid_world, self.grid_world, agent, action ) return rewards
[docs] def _get_info(self, agent_id, rewards): """ Generate additional information for a specific agent. This method creates a dictionary of additional information that is provided alongside the observation and reward. Args: agent_id (str): The ID of the agent to generate info for. rewards (dict): The reward components for the agent. Returns: dict: Additional information for the specified agent with the following keys: - 'rewards': The reward dict for the agent containing each reward component and the total reward. """ return { 'rewards': rewards, }
[docs] def last(self): """ Return the most recent environment step information. This method returns all relevant information about the most recent step taken by the current agent. Returns: tuple: A tuple containing: - observation (dict): The current observation. The dictionary contains: - observation (:py:class:`numpy.ndarray`): The agent's view of the environment. - action_mask (:py:class:`numpy.ndarray`): Binary mask indicating valid actions. - reward (float): The most recent reward. - termination (bool): Whether the agent is in a terminal state. - truncation (bool): Whether the episode was truncated. - info (dict): Additional information about the agent. Refer to :py:meth:`_get_info` for details on the returned value. """ agent_id = self.agent_selection observation = self.observations[agent_id] reward = self.rewards[agent_id] termination = self.terminations[agent_id] truncation = self.truncations[agent_id] info = self.infos[agent_id] return observation, reward, termination, truncation, info
[docs] def _was_dead_step(self, action=None): """ Handle a step for an agent that is already terminated or truncated. This method is called when an agent attempts to take an action after it has already reached a terminal state or the episode has been truncated. It assigns zero reward and selects the next agent. Args: action (int, optional): The action that was attempted. """ agent_id = self.agent_selection self.rewards[agent_id] = 0 self._agent_selector.next()