Source code for algorithms.util.action_perturbator

"""
This module defines several classes to perturb (explore) actions.

An ActionPerturbator takes a continuous action, i.e., a vector of parameters,
and returns a vector with same shape but different values.
The difference between the original vector and the resulting one is called
the perturbation, is often implemented as a random noise, and allows exploring
the action space.

We have implemented 4 perturbators:

- Epsilon applies a uniform random noise to all dimensions.
- Gaussian applies a gaussian random noise to all dimensions.
- MultiDim has a probability to apply a uniform noise to each dimension.
- Identity performs no perturbation.
"""

import numpy as np


[docs] class ActionPerturbator(object):
[docs] def perturb(self, action, clip=True): """ Add a noise to the proposed action. :param action: A 1D vector of values representing the action. :type action: np.ndarray :param clip: Controls whether to clip the noised values in [0,1]. :return: A vector of noised values, with the same shape as `action`. :rtype: np.ndarray """ raise NotImplementedError()
def __call__(self, *args, **kwargs): return self.perturb(*args, **kwargs)
[docs] class EpsilonActionPerturbator(ActionPerturbator): """Implements a simple ε noise."""
[docs] def __init__(self, noise: float): self.noise = noise
[docs] def perturb(self, action, clip=True): # Epsilon-based perturbation noise = np.random.uniform(-self.noise, self.noise, len(action)) action += noise if clip: action = np.clip(action, 0.0, 1.0) return action
[docs] class GaussianActionPerturbator(ActionPerturbator): """Implements a Gaussian (σ) noise."""
[docs] def __init__(self, noise: float): self.noise = noise
[docs] def perturb(self, action, clip=True): # Gaussian-based perturbation noise = np.random.normal(0, self.noise, len(action)) action = action + noise if clip: action = np.clip(action, 0.0, 1.0) return action
[docs] class MultiDimActionPerturbator(ActionPerturbator): """Custom algorithm, with a probability to noise each dimension."""
[docs] def __init__(self, noise: float, probability: float): self.noise = noise self.proba = probability
[docs] def perturb(self, action, clip=True): # Draw a die for each dimension to apply (or not) a random noise for k in range(len(action)): if np.random.random() < self.proba: # Apply noise to this dimension noise = np.random.uniform(-self.noise, self.noise) action[k] = action[k] + noise if clip: action[k] = np.clip(action[k], 0.0, 1.0) return action
[docs] class IdentityAction(ActionPerturbator): """Returns the same action (no perturbation)."""
[docs] def perturb(self, action, clip=True): if clip: action = np.clip(action, 0.0, 1.0) return action