using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits { public class TruncatedNormalBandit : IBandit { public int NumArms { get; private set; } public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret public int OptimalExpectedRewardArm { get; private set; } // the arm with highest expected reward also has the highest probability of return a reward of 1.0 public int OptimalMaximalRewardArm { get { return OptimalExpectedRewardArm; } } private readonly Random random; private readonly double[] expReward; public TruncatedNormalBandit(Random random, int nArms) { this.random = random; this.NumArms = nArms; // expected reward of arms is iid and uniformly distributed expReward = new double[nArms]; OptimalExpectedReward = double.NegativeInfinity; for (int i = 0; i < nArms; i++) { expReward[i] = random.NextDouble() * 0.7; if (expReward[i] > OptimalExpectedReward) { OptimalExpectedReward = expReward[i]; OptimalExpectedRewardArm = i; } } } // pulling an arm results in a truncated normally distributed reward // with mean expReward[i] and std.dev 0.1 public double Pull(int arm) { double x = 0; do { var z = Rand.RandNormal(random); x = z * 0.1 + expReward[arm]; } while (x < 0 || x > 1); return x; } } }