Changeset 11832 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits
- Timestamp:
- 01/27/15 16:34:34 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits
- Files:
-
- 2 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BernoulliPolicyActionInfo.cs
r11747 r11832 9 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 10 public class BernoulliPolicyActionInfo : IBanditPolicyActionInfo { 11 private double knownValue;12 public bool Disabled { get { return NumSuccess == -1; } }13 11 public int NumSuccess { get; private set; } 14 12 public int NumFailure { get; private set; } … … 16 14 public double Value { 17 15 get { 18 if (Disabled) return knownValue; 19 else 20 return NumSuccess / (double)(Tries); 16 return NumSuccess / (double)(Tries); 21 17 } 22 18 } 23 19 public void UpdateReward(double reward) { 24 Debug.Assert(!Disabled);25 20 //Debug.Assert(reward.IsAlmost(0.0) || reward.IsAlmost(1.0)); 26 21 … … 29 24 else NumFailure++; 30 25 } 31 public void Disable(double reward) {32 this.NumSuccess = -1;33 this.NumFailure = -1;34 this.knownValue = reward;35 }36 26 public void Reset() { 37 27 NumSuccess = 0; 38 28 NumFailure = 0; 39 knownValue = 0.0;40 29 } 41 30 public void PrintStats() { 42 Console.WriteLine("expected value {0,5:F2} disabled {1}", Value, Disabled);31 Console.WriteLine("expected value {0,5:F2}", Value); 43 32 } 44 33 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BernoulliThompsonSamplingPolicy.cs
r11742 r11832 21 21 foreach (var aInfo in myActionInfos) { 22 22 aIdx++; 23 if (aInfo.Disabled) continue;24 23 var theta = Rand.BetaRand(random, aInfo.NumSuccess + alpha, aInfo.NumFailure + beta); 25 24 if (theta > maxTheta) { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/DefaultPolicyActionInfo.cs
r11806 r11832 14 14 public double Value { 15 15 get { 16 16 return Tries > 0 ? SumReward / Tries : 0.0; 17 17 } 18 18 } 19 19 public DefaultPolicyActionInfo() { 20 MaxReward = double.MinValue;20 MaxReward = 0.0; 21 21 } 22 22 … … 33 33 } 34 34 35 public override string ToString() { 36 return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries); 37 } 38 35 39 public static Func<DefaultPolicyActionInfo, double> AverageReward { 36 40 get { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GaussianThompsonSamplingPolicy.cs
r11742 r11832 31 31 foreach (var aInfo in myActionInfos) { 32 32 aIdx++; 33 if (aInfo.Disabled) continue;34 33 35 34 var tries = aInfo.Tries; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/MeanAndVariancePolicyActionInfo.cs
r11806 r11832 8 8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 9 public class MeanAndVariancePolicyActionInfo : IBanditPolicyActionInfo { 10 private bool disabled;11 public bool Disabled { get { return disabled; } }12 10 private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator(); 13 private double knownValue;14 11 public int Tries { get { return estimator.N; } } 15 12 public double SumReward { get { return estimator.Sum; } } … … 18 15 public double Value { 19 16 get { 20 if (disabled) return knownValue; 21 else 22 return AvgReward; 17 return AvgReward; 23 18 } 24 19 } 25 20 26 21 public void UpdateReward(double reward) { 27 Debug.Assert(!Disabled);28 22 estimator.UpdateReward(reward); 29 23 } 30 24 31 public void Disable(double reward) {32 disabled = true;33 this.knownValue = reward;34 }35 36 25 public void Reset() { 37 disabled = false;38 knownValue = 0.0;39 26 estimator.Reset(); 40 27 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs
r11806 r11832 14 14 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>(); 15 15 16 int totalTries = myActionInfos. Where(a => !a.Disabled).Sum(a => a.Tries);16 int totalTries = myActionInfos.Sum(a => a.Tries); 17 17 18 18 int aIdx = -1; … … 21 21 foreach (var aInfo in myActionInfos) { 22 22 aIdx++; 23 if (aInfo.Disabled) continue;24 23 double q; 25 24 if (aInfo.Tries == 0) { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCBNormalPolicy.cs
r11806 r11832 12 12 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 13 13 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>(); 14 int totalTries = myActionInfos. Where(a => !a.Disabled).Sum(a => a.Tries);14 int totalTries = myActionInfos.Sum(a => a.Tries); 15 15 double bestQ = double.NegativeInfinity; 16 16 int aIdx = -1; … … 18 18 foreach (var aInfo in myActionInfos) { 19 19 aIdx++; 20 if (aInfo.Disabled) continue;21 20 double q; 22 21 if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r11806 r11832 67 67 <Compile Include="Bandits\IBandit.cs" /> 68 68 <Compile Include="Bandits\TruncatedNormalBandit.cs" /> 69 <Compile Include="GrammarPolicies\GenericFunctionApproximationGrammarPolicy.cs" /> 70 <Compile Include="GrammarPolicies\QLearningGrammarPolicy.cs" /> 71 <Compile Include="GrammarPolicies\GenericContextualGrammarPolicy.cs" /> 69 72 <Compile Include="GrammarPolicies\GenericTDPolicy.cs" /> 70 73 <Compile Include="GrammarPolicies\GenericGrammarPolicy.cs">
Note: See TracChangeset
for help on using the changeset viewer.