Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/MixtureBandit.cs @ 13231

Visit:

Last change on this file since 13231 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 1.9 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Linq;
4	using System.Text;
5	using System.Threading.Tasks;
6	using HeuristicLab.Common;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	// custom testcase for extreme hunter policy
10	public class MixtureBandit : IBandit {
11	private double[] mu;
12	private double[] sigma;
13	private double[] pZero;
14	public int NumArms { get { return mu.Length; } }
15	public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
16	public int OptimalExpectedRewardArm { get; private set; }
17	public int OptimalMaximalRewardArm { get; private set; }
18	public double MaxReward { get; private set; }
19	public double MinReward { get; private set; }
20	private readonly Random random;
21	public MixtureBandit(Random random, IEnumerable<double> mu) : this(random, mu, mu.Select(_ => 1.0), mu.Select(_ => 0.0), double.PositiveInfinity) { }
22	public MixtureBandit(Random random, IEnumerable<double> mu, IEnumerable<double> sigma, IEnumerable<double> pZero, double maxReward) { // probability of a zero reward
23	this.mu = mu.ToArray();
24	this.sigma = sigma.ToArray();
25	this.pZero = pZero.ToArray();
26	this.MaxReward = maxReward;
27	this.random = random;
28
29	double optimalExpectedReward;
30	int bestArmForMaxReward, bestArmForExpReward;
31
32	BanditHelper.SampleArms(random, this, 100000, out optimalExpectedReward, out bestArmForExpReward, out bestArmForMaxReward);
33	OptimalExpectedReward = optimalExpectedReward;
34	OptimalExpectedRewardArm = bestArmForExpReward;
35	OptimalMaximalRewardArm = bestArmForMaxReward;
36	}
37
38	public double Pull(int arm) {
39	if (random.NextDouble() < pZero[arm]) return 0.0;
40	var z = Rand.RandNormal(random);
41	var x = z * sigma[arm] + mu[arm];
42	return Math.Max(0, Math.Min(MaxReward, x));
43	}
44	}
45	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences