"""
Module containing reward functions for the Ethical Gardeners environment.
This module defines some reward function used to compute rewards for agents:
* :py:meth:`~RewardFunctions.compute_ecology_reward`: Computes the ecological
reward based on the agent's action, specifically for planting and
harvesting flowers.
* :py:meth:`~RewardFunctions.compute_wellbeing_reward`: Computes the well-being
reward based on the agent's action, specifically for selling flowers and
giving a penalty for not earning money.
* :py:meth:`~RewardFunctions.compute_biodiversity_reward`: Computes the
biodiversity reward based on the number of different flower types planted
by all the agents and how much the agent helps increase diversity.
"""
from math import log
from ethicalgardeners.agent import Agent
from ethicalgardeners.constants import MAX_PENALTY_TURNS
[docs]
class RewardFunctions:
"""
Class for computing rewards in the Ethical Gardeners environment.
This class is responsible for calculating different types of rewards for
agents based on their actions in the environment. The rewards are designed
to encourage ecologically beneficial behaviors, well-being, and
biodiversity.
Each reward component is normalized to a range between -1 and 1.
Attributes:
action_enum (enum): An enumeration of possible actions (UP, DOWN,
LEFT, RIGHT, HARVEST, WAIT, PLANT_TYPE_i). Created dynamically
based on the number of flower types available.
"""
[docs]
def __init__(self, action_enum):
"""Create the RewardFunctions object.
Args:
action_enum (enum): An enumeration of possible actions (UP, DOWN,
LEFT, RIGHT, HARVEST, WAIT, PLANT_TYPE_i). Created dynamically
based on the number of flower types available.
"""
self.action_enum = action_enum
[docs]
def compute_reward(self, grid_world_prev, grid_world, agent: Agent,
action):
"""
Compute the mono-objective reward for an agent based on its action in
the environment.
The reward is a combination of ecological, well-being, and biodiversity
rewards, normalized to a range between -1 and 1.
Args:
grid_world_prev (:py:class:`.GridWorld`): The grid world
environment before the action.
grid_world (:py:class:`.GridWorld`): The grid world environment.
agent (:py:class:`.Agent`): The agent performing the action.
action (:py:attr:`action_enum`): The action performed.
Returns:
dict: A dictionary containing the ecological, well-being, and
biodiversity rewards, as well as the total reward averaged across
these components.
"""
ecology_reward = self.compute_ecology_reward(grid_world_prev,
grid_world, agent, action)
wellbeing_reward = self.compute_wellbeing_reward(grid_world_prev,
grid_world, agent,
action)
biodiversity_reward = self.compute_biodiversity_reward(grid_world_prev,
grid_world,
agent, action)
return {'ecology': ecology_reward,
'wellbeing': wellbeing_reward,
'biodiversity': biodiversity_reward,
'total': (ecology_reward + wellbeing_reward +
biodiversity_reward) / 3}
[docs]
def compute_ecology_reward(self, grid_world_prev, grid_world, agent: Agent,
action):
"""
Compute the ecological reward for an agent based on its action in the
environment.
For planting actions, calculates the expected future impact of
pollution reduction, normalized against the maximum theoretical impact.
For harvesting actions, multiply the impact the flower had on the
environment before harvesting with the pollution of the cell, also
normalized against the maximum. Penalizes harvesting actions only if
the pollution level is above the minimum pollution level.
Args:
grid_world_prev (:py:class:`.GridWorld`): The grid world
environment before the action.
grid_world (:py:class:`.GridWorld`): The grid world environment.
agent (:py:class:`.Agent`): The agent performing the action.
action (:py:attr:`action_enum`): The action performed.
Returns:
float: The normalized ecological reward (between -1 and 1) for
planting and harvesting actions, 0 for other actions.
"""
# Reward computed only for planting and harvesting actions
if action not in self.action_enum.get_non_planting_actions():
p_max = grid_world.max_pollution
p_min = grid_world.min_pollution
position = agent.position
cell = grid_world.get_cell(position)
# Check if a flower has been planted in the cell
if not cell.has_flower():
return 0.0
flower = cell.flower
flower_type = flower.flower_type
flower_pollution_reduction = (
grid_world.flowers_data[flower_type]['pollution_reduction'])
# Current pollution level in the cell
cell_pollution = cell.pollution
# Compute the maximum possible impact
r_max = (p_max - p_min) * 1/0.01
# Compute the expected future impact of pollution reduction
r_plant = (sum(flower_pollution_reduction) * 1 / (
cell_pollution - p_max + 0.01)) # Avoid zero-division
# Normalize the reward against the maximum possible impact
if r_max > 0:
return r_plant / r_max
else:
return 0.0
elif action == self.action_enum.HARVEST:
p_max = grid_world.max_pollution
p_min = grid_world.min_pollution
position = agent.position
prev_cell = grid_world_prev.get_cell(position)
cell = grid_world.get_cell(position)
# Check if a flower has been harvested in the cell
if cell.has_flower():
return 0.0
# Check if the previous cell had a flower
if not prev_cell.has_flower():
return 0.0
flower = prev_cell.flower
flower_type = flower.flower_type
if len(grid_world.flowers_data[
flower_type]['pollution_reduction']) == 0:
return 0.0
flower_pollution_grown_reduction = (
grid_world.flowers_data[flower_type]['pollution_reduction'][-1]
)
# Current pollution level in the cell
cell_pollution = cell.pollution
# Compute the maximum possible impact
r_max = p_max - p_min
# Compute the expected future impact of pollution reduction
r_harvest = (flower_pollution_grown_reduction *
(cell_pollution - p_min))
# Normalize the reward against the maximum possible impact
if r_max > 0:
return r_harvest / r_max
else:
return 0.0
else:
return 0.0
[docs]
def compute_wellbeing_reward(self, grid_world_prev, grid_world,
agent: Agent, action):
"""
Compute the well-being reward for an agent based on its action in the
environment.
Well-being rewards are calculated based on the price of the harvested
flowers compared to the most expensive flower type. Penalises the agent
for not earning money by giving a penalty based on the number of turns
without income, normalized to a maximum penalty.
Args:
grid_world_prev (:py:class:`.GridWorld`): The grid world
environment before the action.
grid_world (:py:class:`.GridWorld`): The grid world environment.
agent (:py:class:`.Agent`): The agent performing the action.
action (:py:attr:`action_enum`): The action performed.
Returns:
float: The normalized well-being reward (between -1 and 1) for
harvesting actions, a penalty for other actions.
"""
# Reward computed only for harvesting actions
if action == self.action_enum.HARVEST:
position = agent.position
prev_cell = grid_world_prev.get_cell(position)
cell = grid_world.get_cell(position)
# Check if a flower has been harvested in the cell
if cell.has_flower():
return 0.0
# Check if the previous cell had a flower
if not prev_cell.has_flower():
return 0.0
flower = prev_cell.flower
flower_type = flower.flower_type
# Get the monetary value of the flower
flower_value = grid_world.flowers_data[flower_type]['price']
# Normalize the reward based on the maximum possible value
highest_flower_value = max(
grid_world.flowers_data[ft]['price'] for ft in
grid_world.flowers_data)
return flower_value / highest_flower_value
else:
# Calculate penalty for not earning money
return -min(agent.turns_without_income / MAX_PENALTY_TURNS, 1.0)
[docs]
def compute_biodiversity_reward(self, grid_world_prev, grid_world,
agent: Agent, action):
"""
Compute the biodiversity reward for an agent based on its action in the
environment.
Biodiversity rewards are calculated based on the number of different
flower types planted by the agent using the Shannon-Wiener index.
Compares the index before and after the planting action to determine
the impact.
Args:
grid_world_prev (:py:class:`.GridWorld`): The grid world
environment before the action.
grid_world (:py:class:`.GridWorld`): The grid world environment.
agent (:py:class:`.Agent`): The agent performing the action.
action (:py:attr:`action_enum`): The action performed.
Returns:
float: The normalized biodiversity reward (between -1 and 1) for
planting actions, 0 for other actions.
"""
if action in self.action_enum.get_non_planting_actions():
return 0.0
position = agent.position
cell = grid_world.get_cell(position)
# Check if a flower has been planted in the cell
if not cell.has_flower():
return 0.0
# Get the flower type that has been planted
planted_flower_type = cell.flower.flower_type
# Count the number of different flower types planted by all agents
flowers = {flower_type: 0 for flower_type in
grid_world.flowers_data.keys()}
total_flowers = 0
for agent in grid_world.agents:
for flower_type, count in agent.flowers_planted.items():
flowers[flower_type] += count
total_flowers += count
# Create a dictionary to hold the flower counts before planting
prev_flowers = dict(flowers) # Copy current flower counts
prev_flowers[planted_flower_type] -= 1 # Remove the planted flower
prev_total = total_flowers - 1
# Compute the ratio of each flower type and use Shannon-Wiener index
# to compute biodiversity
# before
prev_biodiversity = 0
for flower_type in prev_flowers:
if prev_flowers[flower_type] > 0:
ratio = prev_flowers[flower_type] / prev_total
prev_biodiversity -= ratio * log(ratio)
# after
biodiversity = 0
for flower_type in flowers:
if flowers[flower_type] > 0:
ratio = flowers[flower_type] / total_flowers
biodiversity -= ratio * log(ratio)
max_biodiversity = log(len(grid_world.flowers_data))
# Compute the impact of the agent's planting action and normalize
if max_biodiversity > 0:
return (biodiversity - prev_biodiversity) / max_biodiversity
else:
return 0.0