Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/09/15 14:57:28 (9 years ago)
Author:
gkronber
Message:

#2283 refactoring

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies
Files:
1 edited
1 moved

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs

    r11732 r11742  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class EpsGreedyPolicy : IPolicy {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class EpsGreedyPolicy : IBanditPolicy {
    1011    private readonly double eps;
    1112    private readonly RandomPolicy randomPolicy;
     13    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
     14    private readonly string desc;
    1215
    13     public EpsGreedyPolicy(double eps) {
     16
     17    public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward, string.Empty) { }
     18
     19    public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction, string desc) {
    1420      this.eps = eps;
    1521      this.randomPolicy = new RandomPolicy();
     22      this.valueFunction = valueFunction;
     23      this.desc = desc;
    1624    }
    17     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     25
     26    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1827      Debug.Assert(actionInfos.Any());
    1928      if (random.NextDouble() > eps) {
    2029        // select best
    2130        var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    22         int bestAction = -1;
     31        var bestActions = new List<int>();
    2332        double bestQ = double.NegativeInfinity;
     33
    2434        int aIdx = -1;
    2535        foreach (var aInfo in myActionInfos) {
    26 
    2736          aIdx++;
    2837          if (aInfo.Disabled) continue;
    29           if (aInfo.Tries == 0) return aIdx;
    3038
     39          var q = valueFunction(aInfo);
    3140
    32           var avgReward = aInfo.SumReward / aInfo.Tries;         
    33           //var q = avgReward;
    34           var q = aInfo.MaxReward;
    3541          if (q > bestQ) {
     42            bestActions.Clear();
     43            bestActions.Add(aIdx);
    3644            bestQ = q;
    37             bestAction = aIdx;
     45          } else if (q.IsAlmost(bestQ)) {
     46            bestActions.Add(aIdx);
    3847          }
    3948        }
    40         Debug.Assert(bestAction >= 0);
    41         return bestAction;
     49        Debug.Assert(bestActions.Any());
     50        return bestActions.SelectRandom(random);
    4251      } else {
    4352        // select random
     
    4655    }
    4756
    48     public IPolicyActionInfo CreateActionInfo() {
     57    public IBanditPolicyActionInfo CreateActionInfo() {
    4958      return new DefaultPolicyActionInfo();
    5059    }
     
    5261
    5362    public override string ToString() {
    54       return string.Format("EpsGreedyPolicy({0:F2})", eps);
     63      return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc);
    5564    }
    5665  }
Note: See TracChangeset for help on using the changeset viewer.