Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/07/15 14:31:06 (10 years ago)
Author:
gkronber
Message:

#2283 created a new branch to separate development from aballeit

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr
Files:
4 edited
1 copied

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/DefaultPolicyActionInfo.cs

    r11849 r12290  
    1212    public int Tries { get; private set; }
    1313    public double MaxReward { get; private set; }
     14    private double avgValue = 0.0;
    1415    public double Value {
    1516      get {
    16         return Tries > 0 ? SumReward / Tries : 0.0;
     17        return Tries > 0 ? avgValue : double.PositiveInfinity;
    1718      }
    1819    }
     
    2526      SumReward += reward;
    2627      MaxReward = Math.Max(MaxReward, reward);
     28      var delta = reward - avgValue;
     29      //var alpha = 0.01;
     30      var alpha = Math.Max(1.0/Tries, 0.01);
     31      avgValue = avgValue + alpha * delta;
    2732    }
    2833
     
    3136      Tries = 0;
    3237      MaxReward = 0.0;
     38      avgValue = 0.0;
    3339    }
    3440
     
    3642      return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries);
    3743    }
    38 
    39     public static Func<DefaultPolicyActionInfo, double> AverageReward {
    40       get {
    41         return (aInfo) =>
    42           aInfo.Tries == 0 ?
    43           double.PositiveInfinity :
    44           aInfo.SumReward / (double)aInfo.Tries;
    45       }
    46     }
    4744  }
    4845}
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/MeanAndVariancePolicyActionInfo.cs

    r11849 r12290  
    2626      estimator.Reset();
    2727    }
     28
     29    public override string ToString() {
     30      return string.Format("{0:N3} {1,3}", AvgReward, Tries);
     31    }
    2832  }
    2933}
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/BoltzmannExplorationPolicy.cs

    r11806 r12290  
    1111  public class BoltzmannExplorationPolicy : IBanditPolicy {
    1212    private readonly double beta;
    13     private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
    1413
    15     public BoltzmannExplorationPolicy(double beta) : this(beta, DefaultPolicyActionInfo.AverageReward) { }
    16 
    17     public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) {
     14    public BoltzmannExplorationPolicy(double beta)  {
    1815      if (beta < 0) throw new ArgumentException();
    1916      this.beta = beta;
    20       this.valueFunction = valueFunction;
    2117    }
    2218    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     
    3733
    3834      var w = from aInfo in myActionInfos
    39               select Math.Exp(beta * valueFunction(aInfo));
     35              select Math.Exp(beta * aInfo.Value);
    4036
    4137      var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w);
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs

    r11806 r12290  
    1111    private readonly double eps;
    1212    private readonly RandomPolicy randomPolicy;
    13     private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
    1413    private readonly string desc;
    1514
    1615
    17     public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward, string.Empty) { }
     16    public EpsGreedyPolicy(double eps) : this(eps, string.Empty) { }
    1817
    19     public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction, string desc) {
     18    public EpsGreedyPolicy(double eps, string desc) {
    2019      this.eps = eps;
    2120      this.randomPolicy = new RandomPolicy();
    22       this.valueFunction = valueFunction;
    2321      this.desc = desc;
    2422    }
     
    3634          aIdx++;
    3735
    38           var q = valueFunction(aInfo);
     36          var q = aInfo.Value;
    3937
    4038          if (q > bestQ) {
Note: See TracChangeset for help on using the changeset viewer.