Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/GaussianMixtureBandit.cs @ 13236

Visit:

Last change on this file since 13236 was 11849, checked in by gkronber, 10 years ago
#2283: solution reorganization
File size: 2.0 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Linq;
4	using System.Text;
5	using System.Threading.Tasks;
6	using HeuristicLab.Common;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	// uses a gaussian mixture reward distribution for each arm
10	public class GaussianMixtureBandit : IBandit {
11	public int NumArms { get; private set; }
12	public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
13	public int OptimalExpectedRewardArm { get; private set; }
14	public int OptimalMaximalRewardArm { get; private set; }
15
16	private readonly Random random;
17	private readonly double[] expReward; // for each component components
18	private readonly double[][] componentProb; // arms x components
19	public GaussianMixtureBandit(Random random, int nArms) {
20	this.random = random;
21	this.NumArms = nArms;
22	var numComponents = 0;
23	expReward = new double[] { 0.1, 0.3, 0.5, 0.7, 0.9 };
24	componentProb = new double[nArms][];
25	OptimalExpectedReward = double.NegativeInfinity;
26	// decide on optimal arm
27	OptimalMaximalRewardArm = random.Next(NumArms);
28	OptimalExpectedRewardArm = OptimalMaximalRewardArm;
29	for (int i = 0; i < nArms; i++) {
30	componentProb[i] = new double[numComponents];
31	if (i == OptimalMaximalRewardArm) {
32	componentProb[i] = new double[] { 0.24, 0.24, 0.24, 0.24, 0.04 };
33	} else {
34	componentProb[i] = new double[] { 0.25, 0.25, 0.25, 0.25, 0 };
35	}
36	}
37
38	OptimalExpectedReward = Enumerable.Range(0, 100000).Select(_ => Pull(OptimalExpectedRewardArm)).Average();
39	}
40
41	// std.dev = 0.1
42	// and truncation to the interval [0..1]
43	public double Pull(int arm) {
44	double x = 0;
45	do {
46	var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm]);
47
48	var z = Rand.RandNormal(random);
49	x = z * 0.1 + expReward[k];
50	}
51	while (x < 0 \|\| x > 1);
52	return x;
53	}
54	}
55	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences