Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/GaussianBandit.cs @ 13234

Visit:

Last change on this file since 13234 was 12876, checked in by gkronber, 9 years ago
#2283: implemented first crude version of extreme hunter algorithm in branch
File size: 2.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Linq;
4	using System.Text;
5	using System.Threading.Tasks;
6	using HeuristicLab.Common;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	public class GaussianBandit : IBandit {
10	public int NumArms { get; private set; }
11	public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
12	public int OptimalExpectedRewardArm { get; private set; }
13	public int OptimalMaximalRewardArm { get; private set; }
14	public double MaxReward { get; private set; }
15	public double MinReward { get; private set; }
16	private readonly Random random;
17	private readonly double[] exp;
18	private readonly double[] stdDev;
19	public GaussianBandit(Random random, int nArms, double minReward = double.NegativeInfinity, double maxReward = double.PositiveInfinity) {
20	this.MaxReward = maxReward;
21	this.MinReward = minReward;
22	this.random = random;
23	this.NumArms = nArms;
24	// expected reward of arms is iid and uniformly distributed
25	exp = new double[nArms];
26	stdDev = new double[nArms];
27	OptimalExpectedReward = double.NegativeInfinity;
28	var bestQ = double.NegativeInfinity;
29	for (int i = 0; i < nArms; i++) {
30	exp[i] = Rand.RandNormal(random); // exp values for arms is N(0,1) distributed
31	stdDev[i] = 1.0 / Rand.GammaRand(random, 1); // variance is inv-gamma distributed
32	if (exp[i] > OptimalExpectedReward) {
33	OptimalExpectedReward = exp[i];
34	OptimalExpectedRewardArm = i;
35	}
36	var q = alglib.invnormaldistribution(0.999) * stdDev[i] + exp[i];
37	if (q > bestQ) {
38	bestQ = q;
39	OptimalMaximalRewardArm = i;
40	}
41	}
42	}
43
44	// pulling an arm results in a normally distributed reward
45	// with mean expReward[i] and std.dev
46	public double Pull(int arm) {
47	double x;
48	do {
49	var z = Rand.RandNormal(random);
50	x = z * stdDev[arm] + exp[arm];
51	} while (x <= MinReward \|\| x > MaxReward);
52	return x;
53	}
54	}
55	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences