Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/17/15 19:13:19 (9 years ago)
Author:
gkronber
Message:

#2283: implemented first crude version of extreme hunter algorithm in branch

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/GaussianBandit.cs

    r11849 r12876  
    1212    public int OptimalExpectedRewardArm { get; private set; }
    1313    public int OptimalMaximalRewardArm { get; private set; }
    14 
     14    public double MaxReward { get; private set; }
     15    public double MinReward { get; private set; }
    1516    private readonly Random random;
    1617    private readonly double[] exp;
    1718    private readonly double[] stdDev;
    18     public GaussianBandit(Random random, int nArms) {
     19    public GaussianBandit(Random random, int nArms, double minReward = double.NegativeInfinity, double maxReward = double.PositiveInfinity) {
     20      this.MaxReward = maxReward;
     21      this.MinReward = minReward;
    1922      this.random = random;
    2023      this.NumArms = nArms;
     
    3134          OptimalExpectedRewardArm = i;
    3235        }
    33         var q = alglib.invnormaldistribution(0.99) * stdDev[i] + exp[i];
     36        var q = alglib.invnormaldistribution(0.999) * stdDev[i] + exp[i];
    3437        if (q > bestQ) {
    3538          bestQ = q;
     
    3942    }
    4043
    41     // pulling an arm results in a truncated normally distributed reward
    42     // with mean expReward[i] and std.dev 0.1
     44    // pulling an arm results in a normally distributed reward
     45    // with mean expReward[i] and std.dev
    4346    public double Pull(int arm) {
    44       var z = Rand.RandNormal(random);
    45       var x = z * stdDev[arm] + exp[arm];
     47      double x;
     48      do {
     49        var z = Rand.RandNormal(random);
     50        x = z * stdDev[arm] + exp[arm];
     51      } while (x <= MinReward || x > MaxReward);
    4652      return x;
    4753    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/HeuristicLab.Problems.Bandits.csproj

    r11981 r12876  
    4141    <Compile Include="BanditHelper.cs" />
    4242    <Compile Include="BernoulliBandit.cs" />
     43    <Compile Include="ParetoBandit.cs" />
    4344    <Compile Include="GaussianBandit.cs" />
    4445    <Compile Include="GaussianMixtureBandit.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/IBandit.cs

    r11849 r12876  
    1212    int OptimalMaximalRewardArm { get; } // arm which is optimal for optimization of maximal reward
    1313
    14     double Pull(int arm); // pulling an arm returns a regret
     14    double Pull(int arm); // pulling an arm returns a reward
    1515  }
    1616}
Note: See TracChangeset for help on using the changeset viewer.