using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits { // custom testcase for extreme hunter policy public class MixtureBandit : IBandit { private double[] mu; private double[] sigma; private double[] pZero; public int NumArms { get { return mu.Length; } } public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret public int OptimalExpectedRewardArm { get; private set; } public int OptimalMaximalRewardArm { get; private set; } public double MaxReward { get; private set; } public double MinReward { get; private set; } private readonly Random random; public MixtureBandit(Random random, IEnumerable mu) : this(random, mu, mu.Select(_ => 1.0), mu.Select(_ => 0.0), double.PositiveInfinity) { } public MixtureBandit(Random random, IEnumerable mu, IEnumerable sigma, IEnumerable pZero, double maxReward) { // probability of a zero reward this.mu = mu.ToArray(); this.sigma = sigma.ToArray(); this.pZero = pZero.ToArray(); this.MaxReward = maxReward; this.random = random; double optimalExpectedReward; int bestArmForMaxReward, bestArmForExpReward; BanditHelper.SampleArms(random, this, 100000, out optimalExpectedReward, out bestArmForExpReward, out bestArmForMaxReward); OptimalExpectedReward = optimalExpectedReward; OptimalExpectedRewardArm = bestArmForExpReward; OptimalMaximalRewardArm = bestArmForMaxReward; } public double Pull(int arm) { if (random.NextDouble() < pZero[arm]) return 0.0; var z = Rand.RandNormal(random); var x = z * sigma[arm] + mu[arm]; return Math.Max(0, Math.Min(MaxReward, x)); } } }