Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.Bandits/GaussianMixtureBandit.cs @ 13325

Visit:

Last change on this file since 13325 was 11849, checked in by gkronber, 10 years ago
#2283: solution reorganization
File size: 2.0 KB

Rev	Line
[11731]	1	using System;
	2	using System.Collections.Generic;
	3	using System.Linq;
	4	using System.Text;
	5	using System.Threading.Tasks;
	6	using HeuristicLab.Common;
	7
	8	namespace HeuristicLab.Algorithms.Bandits {
	9	// uses a gaussian mixture reward distribution for each arm
	10	public class GaussianMixtureBandit : IBandit {
	11	public int NumArms { get; private set; }
	12	public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
	13	public int OptimalExpectedRewardArm { get; private set; }
	14	public int OptimalMaximalRewardArm { get; private set; }
	15
	16	private readonly Random random;
	17	private readonly double[] expReward; // for each component components
	18	private readonly double[][] componentProb; // arms x components
	19	public GaussianMixtureBandit(Random random, int nArms) {
	20	this.random = random;
	21	this.NumArms = nArms;
	22	var numComponents = 0;
	23	expReward = new double[] { 0.1, 0.3, 0.5, 0.7, 0.9 };
	24	componentProb = new double[nArms][];
	25	OptimalExpectedReward = double.NegativeInfinity;
	26	// decide on optimal arm
	27	OptimalMaximalRewardArm = random.Next(NumArms);
	28	OptimalExpectedRewardArm = OptimalMaximalRewardArm;
	29	for (int i = 0; i < nArms; i++) {
	30	componentProb[i] = new double[numComponents];
	31	if (i == OptimalMaximalRewardArm) {
	32	componentProb[i] = new double[] { 0.24, 0.24, 0.24, 0.24, 0.04 };
	33	} else {
	34	componentProb[i] = new double[] { 0.25, 0.25, 0.25, 0.25, 0 };
	35	}
	36	}
	37
	38	OptimalExpectedReward = Enumerable.Range(0, 100000).Select(_ => Pull(OptimalExpectedRewardArm)).Average();
	39	}
	40
	41	// std.dev = 0.1
	42	// and truncation to the interval [0..1]
	43	public double Pull(int arm) {
	44	double x = 0;
	45	do {
[11799]	46	var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm]);
[11731]	47
	48	var z = Rand.RandNormal(random);
	49	x = z * 0.1 + expReward[k];
	50	}
	51	while (x < 0 \|\| x > 1);
	52	return x;
	53	}
	54	}
	55	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences