using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Algorithms.Bandits { public class BernoulliBandit { public int NumArms { get; private set; } public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret private readonly Random random; private readonly double[] expReward; public BernoulliBandit(Random random, int nArms) { this.random = random; this.NumArms = nArms; // expected reward of arms is iid and uniformly distributed expReward = new double[nArms]; OptimalExpectedReward = double.NegativeInfinity; for (int i = 0; i < nArms; i++) { expReward[i] = random.NextDouble(); if (expReward[i] > OptimalExpectedReward) OptimalExpectedReward = expReward[i]; } } // pulling an arm results in a bernoulli distributed reward // with mean expReward[i] public double Pull(int arm) { return random.NextDouble() <= expReward[arm] ? 1 : 0; } } }