Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/17/15 19:13:19 (8 years ago)
Author:
gkronber
Message:

#2283: implemented first crude version of extreme hunter algorithm in branch

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs

    r11806 r12876  
    1010  // policy for k-armed bandit (see Auer et al. 2002)
    1111  public class UCB1Policy : IBanditPolicy {
     12    public double MaxReward { get; private set; }
     13    public UCB1Policy(double maxReward = 1.0) {
     14      this.MaxReward = maxReward;
     15    }
    1216    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1317      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
     
    2428        } else {
    2529
    26           q = aInfo.SumReward / aInfo.Tries + 0.5 * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
     30          q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
    2731        }
    2832        if (q > bestQ) {
Note: See TracChangeset for help on using the changeset viewer.