Source code for ethicalgardeners.rewardfunctions

"""
Module containing reward functions for the Ethical Gardeners environment.

This module defines some reward function used to compute rewards for agents:

* :py:meth:`~RewardFunctions.compute_ecology_reward`: Computes the ecological
  reward based on the agent's action, specifically for planting and
  harvesting flowers.
* :py:meth:`~RewardFunctions.compute_wellbeing_reward`: Computes the well-being
  reward based on the agent's action, specifically for selling flowers and
  giving a penalty for not earning money.
* :py:meth:`~RewardFunctions.compute_biodiversity_reward`: Computes the
  biodiversity reward based on the number of different flower types planted
  by all the agents and how much the agent helps increase diversity.
"""
from math import log

from ethicalgardeners.agent import Agent
from ethicalgardeners.constants import MAX_PENALTY_TURNS


[docs] class RewardFunctions: """ Class for computing rewards in the Ethical Gardeners environment. This class is responsible for calculating different types of rewards for agents based on their actions in the environment. The rewards are designed to encourage ecologically beneficial behaviors, well-being, and biodiversity. Each reward component is normalized to a range between -1 and 1. Attributes: action_enum (enum): An enumeration of possible actions (UP, DOWN, LEFT, RIGHT, HARVEST, WAIT, PLANT_TYPE_i). Created dynamically based on the number of flower types available. """
[docs] def __init__(self, action_enum): """Create the RewardFunctions object. Args: action_enum (enum): An enumeration of possible actions (UP, DOWN, LEFT, RIGHT, HARVEST, WAIT, PLANT_TYPE_i). Created dynamically based on the number of flower types available. """ self.action_enum = action_enum
[docs] def compute_reward(self, grid_world_prev, grid_world, agent: Agent, action): """ Compute the mono-objective reward for an agent based on its action in the environment. The reward is a combination of ecological, well-being, and biodiversity rewards, normalized to a range between -1 and 1. Args: grid_world_prev (:py:class:`.GridWorld`): The grid world environment before the action. grid_world (:py:class:`.GridWorld`): The grid world environment. agent (:py:class:`.Agent`): The agent performing the action. action (:py:attr:`action_enum`): The action performed. Returns: dict: A dictionary containing the ecological, well-being, and biodiversity rewards, as well as the total reward averaged across these components. """ ecology_reward = self.compute_ecology_reward(grid_world_prev, grid_world, agent, action) wellbeing_reward = self.compute_wellbeing_reward(grid_world_prev, grid_world, agent, action) biodiversity_reward = self.compute_biodiversity_reward(grid_world_prev, grid_world, agent, action) return {'ecology': ecology_reward, 'wellbeing': wellbeing_reward, 'biodiversity': biodiversity_reward, 'total': (ecology_reward + wellbeing_reward + biodiversity_reward) / 3}
[docs] def compute_ecology_reward(self, grid_world_prev, grid_world, agent: Agent, action): """ Compute the ecological reward for an agent based on its action in the environment. For planting actions, calculates the expected future impact of pollution reduction, normalized against the maximum theoretical impact. For harvesting actions, multiply the impact the flower had on the environment before harvesting with the pollution of the cell, also normalized against the maximum. Penalizes harvesting actions only if the pollution level is above the minimum pollution level. Args: grid_world_prev (:py:class:`.GridWorld`): The grid world environment before the action. grid_world (:py:class:`.GridWorld`): The grid world environment. agent (:py:class:`.Agent`): The agent performing the action. action (:py:attr:`action_enum`): The action performed. Returns: float: The normalized ecological reward (between -1 and 1) for planting and harvesting actions, 0 for other actions. """ # Reward computed only for planting and harvesting actions if action not in self.action_enum.get_non_planting_actions(): p_max = grid_world.max_pollution p_min = grid_world.min_pollution position = agent.position cell = grid_world.get_cell(position) # Check if a flower has been planted in the cell if not cell.has_flower(): return 0.0 flower = cell.flower flower_type = flower.flower_type flower_pollution_reduction = ( grid_world.flowers_data[flower_type]['pollution_reduction']) # Current pollution level in the cell cell_pollution = cell.pollution # Compute the maximum possible impact r_max = (p_max - p_min) * 1/0.01 # Compute the expected future impact of pollution reduction r_plant = (sum(flower_pollution_reduction) * 1 / ( cell_pollution - p_max + 0.01)) # Avoid zero-division # Normalize the reward against the maximum possible impact if r_max > 0: return r_plant / r_max else: return 0.0 elif action == self.action_enum.HARVEST: p_max = grid_world.max_pollution p_min = grid_world.min_pollution position = agent.position prev_cell = grid_world_prev.get_cell(position) cell = grid_world.get_cell(position) # Check if a flower has been harvested in the cell if cell.has_flower(): return 0.0 # Check if the previous cell had a flower if not prev_cell.has_flower(): return 0.0 flower = prev_cell.flower flower_type = flower.flower_type if len(grid_world.flowers_data[ flower_type]['pollution_reduction']) == 0: return 0.0 flower_pollution_grown_reduction = ( grid_world.flowers_data[flower_type]['pollution_reduction'][-1] ) # Current pollution level in the cell cell_pollution = cell.pollution # Compute the maximum possible impact r_max = p_max - p_min # Compute the expected future impact of pollution reduction r_harvest = (flower_pollution_grown_reduction * (cell_pollution - p_min)) # Normalize the reward against the maximum possible impact if r_max > 0: return r_harvest / r_max else: return 0.0 else: return 0.0
[docs] def compute_wellbeing_reward(self, grid_world_prev, grid_world, agent: Agent, action): """ Compute the well-being reward for an agent based on its action in the environment. Well-being rewards are calculated based on the price of the harvested flowers compared to the most expensive flower type. Penalises the agent for not earning money by giving a penalty based on the number of turns without income, normalized to a maximum penalty. Args: grid_world_prev (:py:class:`.GridWorld`): The grid world environment before the action. grid_world (:py:class:`.GridWorld`): The grid world environment. agent (:py:class:`.Agent`): The agent performing the action. action (:py:attr:`action_enum`): The action performed. Returns: float: The normalized well-being reward (between -1 and 1) for harvesting actions, a penalty for other actions. """ # Reward computed only for harvesting actions if action == self.action_enum.HARVEST: position = agent.position prev_cell = grid_world_prev.get_cell(position) cell = grid_world.get_cell(position) # Check if a flower has been harvested in the cell if cell.has_flower(): return 0.0 # Check if the previous cell had a flower if not prev_cell.has_flower(): return 0.0 flower = prev_cell.flower flower_type = flower.flower_type # Get the monetary value of the flower flower_value = grid_world.flowers_data[flower_type]['price'] # Normalize the reward based on the maximum possible value highest_flower_value = max( grid_world.flowers_data[ft]['price'] for ft in grid_world.flowers_data) return flower_value / highest_flower_value else: # Calculate penalty for not earning money return -min(agent.turns_without_income / MAX_PENALTY_TURNS, 1.0)
[docs] def compute_biodiversity_reward(self, grid_world_prev, grid_world, agent: Agent, action): """ Compute the biodiversity reward for an agent based on its action in the environment. Biodiversity rewards are calculated based on the number of different flower types planted by the agent using the Shannon-Wiener index. Compares the index before and after the planting action to determine the impact. Args: grid_world_prev (:py:class:`.GridWorld`): The grid world environment before the action. grid_world (:py:class:`.GridWorld`): The grid world environment. agent (:py:class:`.Agent`): The agent performing the action. action (:py:attr:`action_enum`): The action performed. Returns: float: The normalized biodiversity reward (between -1 and 1) for planting actions, 0 for other actions. """ if action in self.action_enum.get_non_planting_actions(): return 0.0 position = agent.position cell = grid_world.get_cell(position) # Check if a flower has been planted in the cell if not cell.has_flower(): return 0.0 # Get the flower type that has been planted planted_flower_type = cell.flower.flower_type # Count the number of different flower types planted by all agents flowers = {flower_type: 0 for flower_type in grid_world.flowers_data.keys()} total_flowers = 0 for agent in grid_world.agents: for flower_type, count in agent.flowers_planted.items(): flowers[flower_type] += count total_flowers += count # Create a dictionary to hold the flower counts before planting prev_flowers = dict(flowers) # Copy current flower counts prev_flowers[planted_flower_type] -= 1 # Remove the planted flower prev_total = total_flowers - 1 # Compute the ratio of each flower type and use Shannon-Wiener index # to compute biodiversity # before prev_biodiversity = 0 for flower_type in prev_flowers: if prev_flowers[flower_type] > 0: ratio = prev_flowers[flower_type] / prev_total prev_biodiversity -= ratio * log(ratio) # after biodiversity = 0 for flower_type in flowers: if flowers[flower_type] > 0: ratio = flowers[flower_type] / total_flowers biodiversity -= ratio * log(ratio) max_biodiversity = log(len(grid_world.flowers_data)) # Compute the impact of the agent's planting action and normalize if max_biodiversity > 0: return (biodiversity - prev_biodiversity) / max_biodiversity else: return 0.0