Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/17/15 19:13:19 (9 years ago)
Author:
gkronber
Message:

#2283: implemented first crude version of extreme hunter algorithm in branch

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies
Files:
3 added
4 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs

    r11806 r12876  
    99namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    1010  public class ActiveLearningPolicy : IBanditPolicy {
     11    public double MaxReward { get; private set; }
     12    public ActiveLearningPolicy(double maxReward = 1.0) {
     13      this.MaxReward = maxReward;
     14    }
    1115    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1216      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
     
    2933          q = aInfo.SumReward / aInfo.Tries;
    3034          var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries));
    31           u = q + 0.5 * b;
    32           l = q - 0.5 * b;
     35          u = q + MaxReward * b;
     36          l = q - MaxReward * b;
    3337        }
    3438        bestActions.Add(aIdx);
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ChernoffIntervalEstimationPolicy.cs

    r11806 r12876  
    3535          var avgReward = aInfo.SumReward / aInfo.Tries;
    3636
    37           // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
     37          // page 5 of "A simple distribution-free approach to the max k-armed bandit problem"
    3838          // var alpha = Math.Log(2 * totalTries * k / delta);
    3939          double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta);
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs

    r11806 r12876  
    1010  // policy for k-armed bandit (see Auer et al. 2002)
    1111  public class UCB1Policy : IBanditPolicy {
     12    public double MaxReward { get; private set; }
     13    public UCB1Policy(double maxReward = 1.0) {
     14      this.MaxReward = maxReward;
     15    }
    1216    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1317      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
     
    2428        } else {
    2529
    26           q = aInfo.SumReward / aInfo.Tries + 0.5 * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
     30          q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
    2731        }
    2832        if (q > bestQ) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs

    r11832 r12876  
    99namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    1010  // policy for k-armed bandit (see Auer et al. 2002)
     11  // specific to Bernoulli distributed rewards
    1112  public class UCB1TunedPolicy : IBanditPolicy {
    1213
Note: See TracChangeset for help on using the changeset viewer.