Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/27/15 16:34:34 (10 years ago)
Author:
gkronber
Message:

linear value function approximation and good results for poly-10 benchmark

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits
Files:
2 added
8 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BernoulliPolicyActionInfo.cs

    r11747 r11832  
    99namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    1010  public class BernoulliPolicyActionInfo : IBanditPolicyActionInfo {
    11     private double knownValue;
    12     public bool Disabled { get { return NumSuccess == -1; } }
    1311    public int NumSuccess { get; private set; }
    1412    public int NumFailure { get; private set; }
     
    1614    public double Value {
    1715      get {
    18         if (Disabled) return knownValue;
    19         else
    20           return NumSuccess / (double)(Tries);
     16        return NumSuccess / (double)(Tries);
    2117      }
    2218    }
    2319    public void UpdateReward(double reward) {
    24       Debug.Assert(!Disabled);
    2520      //Debug.Assert(reward.IsAlmost(0.0) || reward.IsAlmost(1.0));
    2621
     
    2924      else NumFailure++;
    3025    }
    31     public void Disable(double reward) {
    32       this.NumSuccess = -1;
    33       this.NumFailure = -1;
    34       this.knownValue = reward;
    35     }
    3626    public void Reset() {
    3727      NumSuccess = 0;
    3828      NumFailure = 0;
    39       knownValue = 0.0;
    4029    }
    4130    public void PrintStats() {
    42       Console.WriteLine("expected value {0,5:F2} disabled {1}", Value, Disabled);
     31      Console.WriteLine("expected value {0,5:F2}", Value);
    4332    }
    4433  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BernoulliThompsonSamplingPolicy.cs

    r11742 r11832  
    2121      foreach (var aInfo in myActionInfos) {
    2222        aIdx++;
    23         if (aInfo.Disabled) continue;
    2423        var theta = Rand.BetaRand(random, aInfo.NumSuccess + alpha, aInfo.NumFailure + beta);
    2524        if (theta > maxTheta) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/DefaultPolicyActionInfo.cs

    r11806 r11832  
    1414    public double Value {
    1515      get {
    16           return Tries > 0 ? SumReward / Tries : 0.0;
     16        return Tries > 0 ? SumReward / Tries : 0.0;
    1717      }
    1818    }
    1919    public DefaultPolicyActionInfo() {
    20       MaxReward = double.MinValue;
     20      MaxReward = 0.0;
    2121    }
    2222
     
    3333    }
    3434
     35    public override string ToString() {
     36      return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries);
     37    }
     38
    3539    public static Func<DefaultPolicyActionInfo, double> AverageReward {
    3640      get {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GaussianThompsonSamplingPolicy.cs

    r11742 r11832  
    3131      foreach (var aInfo in myActionInfos) {
    3232        aIdx++;
    33         if (aInfo.Disabled) continue;
    3433
    3534        var tries = aInfo.Tries;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/MeanAndVariancePolicyActionInfo.cs

    r11806 r11832  
    88namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    99  public class MeanAndVariancePolicyActionInfo : IBanditPolicyActionInfo {
    10     private bool disabled;
    11     public bool Disabled { get { return disabled; } }
    1210    private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
    13     private double knownValue;
    1411    public int Tries { get { return estimator.N; } }
    1512    public double SumReward { get { return estimator.Sum; } }
     
    1815    public double Value {
    1916      get {
    20         if (disabled) return knownValue;
    21         else
    22           return AvgReward;
     17        return AvgReward;
    2318      }
    2419    }
    2520
    2621    public void UpdateReward(double reward) {
    27       Debug.Assert(!Disabled);
    2822      estimator.UpdateReward(reward);
    2923    }
    3024
    31     public void Disable(double reward) {
    32       disabled = true;
    33       this.knownValue = reward;
    34     }
    35 
    3625    public void Reset() {
    37       disabled = false;
    38       knownValue = 0.0;
    3926      estimator.Reset();
    4027    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs

    r11806 r11832  
    1414      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    1515
    16       int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
     16      int totalTries = myActionInfos.Sum(a => a.Tries);
    1717
    1818      int aIdx = -1;
     
    2121      foreach (var aInfo in myActionInfos) {
    2222        aIdx++;
    23         if (aInfo.Disabled) continue;
    2423        double q;
    2524        if (aInfo.Tries == 0) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCBNormalPolicy.cs

    r11806 r11832  
    1212    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1313      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    14       int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
     14      int totalTries = myActionInfos.Sum(a => a.Tries);
    1515      double bestQ = double.NegativeInfinity;
    1616      int aIdx = -1;
     
    1818      foreach (var aInfo in myActionInfos) {
    1919        aIdx++;
    20         if (aInfo.Disabled) continue;
    2120        double q;
    2221        if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

    r11806 r11832  
    6767    <Compile Include="Bandits\IBandit.cs" />
    6868    <Compile Include="Bandits\TruncatedNormalBandit.cs" />
     69    <Compile Include="GrammarPolicies\GenericFunctionApproximationGrammarPolicy.cs" />
     70    <Compile Include="GrammarPolicies\QLearningGrammarPolicy.cs" />
     71    <Compile Include="GrammarPolicies\GenericContextualGrammarPolicy.cs" />
    6972    <Compile Include="GrammarPolicies\GenericTDPolicy.cs" />
    7073    <Compile Include="GrammarPolicies\GenericGrammarPolicy.cs">
Note: See TracChangeset for help on using the changeset viewer.