Context Navigation

TruncatedNormalBandit.cs @ 11730

Visit:

Last change on this file since 11730 was 11730, checked in by gkronber, 9 years ago
#2283: several major extensions for grammatical optimization
File size: 1.6 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Linq;
4	using System.Text;
5	using System.Threading.Tasks;
6	using HeuristicLab.Common;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	public class TruncatedNormalBandit : IBandit {
10	public int NumArms { get; private set; }
11	public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
12	public int OptimalExpectedRewardArm { get; private set; }
13	// the arm with highest expected reward also has the highest probability of return a reward of 1.0
14	public int OptimalMaximalRewardArm { get { return OptimalExpectedRewardArm; } }
15
16	private readonly Random random;
17	private readonly double[] expReward;
18	public TruncatedNormalBandit(Random random, int nArms) {
19	this.random = random;
20	this.NumArms = nArms;
21	// expected reward of arms is iid and uniformly distributed
22	expReward = new double[nArms];
23	OptimalExpectedReward = double.NegativeInfinity;
24	for (int i = 0; i < nArms; i++) {
25	expReward[i] = random.NextDouble() * 0.7;
26	if (expReward[i] > OptimalExpectedReward) {
27	OptimalExpectedReward = expReward[i];
28	OptimalExpectedRewardArm = i;
29	}
30	}
31	}
32
33	// pulling an arm results in a truncated normally distributed reward
34	// with mean expReward[i] and std.dev 0.1
35	public double Pull(int arm) {
36	double x = 0;
37	do {
38	var z = Rand.RandNormal(random);
39	x = z * 0.1 + expReward[arm];
40	}
41	while (x < 0 \|\| x > 1);
42	return x;
43	}
44	}
45	}

Note: See TracBrowser for help on using the repository browser.