Context Navigation

TruncatedNormalBandit.cs @ 11710

Visit:

Last change on this file since 11710 was 11710, checked in by gkronber, 9 years ago
#2283: more bandit policies and tests
File size: 1.4 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Linq;
4	using System.Text;
5	using System.Threading.Tasks;
6
7	namespace HeuristicLab.Algorithms.Bandits {
8	public class TruncatedNormalBandit {
9	public int NumArms { get; private set; }
10	public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
11	private readonly Random random;
12	private readonly double[] expReward;
13	public TruncatedNormalBandit(Random random, int nArms) {
14	this.random = random;
15	this.NumArms = nArms;
16	// expected reward of arms is iid and uniformly distributed
17	expReward = new double[nArms];
18	OptimalExpectedReward = double.NegativeInfinity;
19	for (int i = 0; i < nArms; i++) {
20	expReward[i] = random.NextDouble();
21	if (expReward[i] > OptimalExpectedReward) OptimalExpectedReward = expReward[i];
22	}
23	}
24
25	// pulling an arm results in a truncated normally distributed reward
26	// with mean expReward[i] and std.dev 0.1
27	public double Pull(int arm) {
28	double x = 0;
29	do {
30	var z = Transform(random.NextDouble(), random.NextDouble());
31	x = z * 0.1 + expReward[arm];
32	}
33	while (x < 0 \|\| x > 1);
34	return x;
35	}
36
37	// box muller transform
38	private double Transform(double u1, double u2) {
39	return Math.Sqrt(-2 * Math.Log(u1)) * Math.Cos(2 * Math.PI * u2);
40	}
41	}
42	}

Note: See TracBrowser for help on using the repository browser.