using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits { // uses a gaussian mixture reward distribution for each arm public class GaussianMixtureBandit : IBandit { public int NumArms { get; private set; } public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret public int OptimalExpectedRewardArm { get; private set; } public int OptimalMaximalRewardArm { get; private set; } private readonly Random random; private readonly double[] expReward; // for each component components private readonly double[][] componentProb; // arms x components public GaussianMixtureBandit(Random random, int nArms) { this.random = random; this.NumArms = nArms; var numComponents = 0; expReward = new double[] { 0.1, 0.3, 0.5, 0.7, 0.9 }; componentProb = new double[nArms][]; OptimalExpectedReward = double.NegativeInfinity; // decide on optimal arm OptimalMaximalRewardArm = random.Next(NumArms); OptimalExpectedRewardArm = OptimalMaximalRewardArm; for (int i = 0; i < nArms; i++) { componentProb[i] = new double[numComponents]; if (i == OptimalMaximalRewardArm) { componentProb[i] = new double[] { 0.24, 0.24, 0.24, 0.24, 0.04 }; } else { componentProb[i] = new double[] { 0.25, 0.25, 0.25, 0.25, 0 }; } } OptimalExpectedReward = Enumerable.Range(0, 100000).Select(_ => Pull(OptimalExpectedRewardArm)).Average(); } // std.dev = 0.1 // and truncation to the interval [0..1] public double Pull(int arm) { double x = 0; do { var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm]); var z = Rand.RandNormal(random); x = z * 0.1 + expReward[k]; } while (x < 0 || x > 1); return x; } } }