Free cookie consent management tool by TermsFeed Policy Generator

# source:branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Bandits/TruncatedNormalBandit.cs@11730

Last change on this file since 11730 was 11730, checked in by gkronber, 8 years ago

#2283: several major extensions for grammatical optimization

File size: 1.6 KB
Line
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
6using HeuristicLab.Common;
7
8namespace HeuristicLab.Algorithms.Bandits {
9  public class TruncatedNormalBandit : IBandit {
10    public int NumArms { get; private set; }
11    public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
12    public int OptimalExpectedRewardArm { get; private set; }
13    // the arm with highest expected reward also has the highest probability of return a reward of 1.0
14    public int OptimalMaximalRewardArm { get { return OptimalExpectedRewardArm; } }
15
18    public TruncatedNormalBandit(Random random, int nArms) {
19      this.random = random;
20      this.NumArms = nArms;
21      // expected reward of arms is iid and uniformly distributed
22      expReward = new double[nArms];
23      OptimalExpectedReward = double.NegativeInfinity;
24      for (int i = 0; i < nArms; i++) {
25        expReward[i] = random.NextDouble() * 0.7;
26        if (expReward[i] > OptimalExpectedReward) {
27          OptimalExpectedReward = expReward[i];
28          OptimalExpectedRewardArm = i;
29        }
30      }
31    }
32
33    // pulling an arm results in a truncated normally distributed reward
34    // with mean expReward[i] and std.dev 0.1
35    public double Pull(int arm) {
36      double x = 0;
37      do {
38        var z = Rand.RandNormal(random);
39        x = z * 0.1 + expReward[arm];
40      }
41      while (x < 0 || x > 1);
42      return x;
43    }
44  }
45}
Note: See TracBrowser for help on using the repository browser.