Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/MixtureBandit.cs @ 13231

Last change on this file since 13231 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 1.9 KB
RevLine 
[12893]1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using HeuristicLab.Common;
7
8namespace HeuristicLab.Algorithms.Bandits {
9  // custom testcase for extreme hunter policy
10  public class MixtureBandit : IBandit {
11    private double[] mu;
12    private double[] sigma;
13    private double[] pZero;
14    public int NumArms { get { return mu.Length; } }
15    public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
16    public int OptimalExpectedRewardArm { get; private set; }
17    public int OptimalMaximalRewardArm { get; private set; }
18    public double MaxReward { get; private set; }
19    public double MinReward { get; private set; }
20    private readonly Random random;
21    public MixtureBandit(Random random, IEnumerable<double> mu) : this(random, mu, mu.Select(_ => 1.0), mu.Select(_ => 0.0), double.PositiveInfinity) { }
22    public MixtureBandit(Random random, IEnumerable<double> mu, IEnumerable<double> sigma, IEnumerable<double> pZero, double maxReward) { // probability of a zero reward
23      this.mu = mu.ToArray();
24      this.sigma = sigma.ToArray();
25      this.pZero = pZero.ToArray();
26      this.MaxReward = maxReward;
27      this.random = random;
28
29      double optimalExpectedReward;
30      int bestArmForMaxReward, bestArmForExpReward;
31
32      BanditHelper.SampleArms(random, this, 100000, out optimalExpectedReward, out bestArmForExpReward, out bestArmForMaxReward);
33      OptimalExpectedReward = optimalExpectedReward;
34      OptimalExpectedRewardArm = bestArmForExpReward;
35      OptimalMaximalRewardArm = bestArmForMaxReward;
36    }
37
38    public double Pull(int arm) {
39      if (random.NextDouble() < pZero[arm]) return 0.0;
40      var z = Rand.RandNormal(random);
41      var x = z * sigma[arm] + mu[arm];
42      return Math.Max(0, Math.Min(MaxReward, x));
43    }
44  }
45}
Note: See TracBrowser for help on using the repository browser.