Changeset 12893 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits
- Timestamp:
- 08/24/15 13:56:27 (9 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits
- Files:
-
- 2 added
- 15 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/BernoulliPolicyActionInfo.cs
r11849 r12893 12 12 public int NumFailure { get; private set; } 13 13 public int Tries { get { return NumSuccess + NumFailure; } } 14 public double MaxReward { get; private set; } 14 15 public double Value { 15 16 get { … … 21 22 22 23 //if (reward.IsAlmost(1.0)) NumSuccess++; 24 MaxReward = Math.Max(MaxReward, reward); 23 25 if (reward > 0) NumSuccess++; 24 26 else NumFailure++; … … 27 29 NumSuccess = 0; 28 30 NumFailure = 0; 31 MaxReward = double.NegativeInfinity; 32 29 33 } 30 34 public void PrintStats() { -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/DefaultPolicyActionInfo.cs
r12876 r12893 22 22 } 23 23 24 public void UpdateReward(double reward) { 24 25 public void UpdateReward(double reward) 26 { 27 MaxReward = Math.Max(MaxReward, reward); 25 28 Tries++; 26 29 SumReward += reward; 27 MaxReward = Math.Max(MaxReward, reward);28 30 var delta = reward - avgValue; 29 31 double alpha = 1.0 / Tries; … … 34 36 SumReward = 0.0; 35 37 Tries = 0; 36 MaxReward = 0.0;38 MaxReward = double.NegativeInfinity; 37 39 avgValue = 0.0; 38 40 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/ExtremeHunterActionInfo.cs
r12876 r12893 22 22 if (minHeap.Count <= 1) return double.PositiveInfinity; 23 23 double xk = minHeap.GetMin(); 24 if (xk.IsAlmost(0.0)) return double. NegativeInfinity;24 if (xk.IsAlmost(0.0)) return double.PositiveInfinity; 25 25 var alpha = 1.0 / (minHeap.Count - 1) * minHeap.Skip(1).Sum(x => Math.Log(x) - Math.Log(xk)); 26 26 Debug.Assert(alpha > 0); … … 55 55 56 56 Debug.Assert(minHeap.Count == ((int)Math.Floor(n * R))); 57 Debug.Assert(maxHeap.Count == 0 || minHeap.Count == 0 || maxHeap.GetMin() < minHeap.GetMin());57 Debug.Assert(maxHeap.Count == 0 || minHeap.Count == 0 || maxHeap.GetMin() <= minHeap.GetMin()); 58 58 } 59 59 } … … 64 64 private OnlineHillEstimator hillEstimator; 65 65 private List<double> rewards; 66 66 public double MaxReward { get; private set; } 67 67 public double Value { 68 68 get { … … 76 76 public void UpdateReward(double reward) { 77 77 if (reward < 0.0) throw new ArgumentException("reward"); 78 MaxReward = Math.Max(MaxReward, reward); 78 79 Tries++; 80 reward = (1 / (1 - reward)); // transformation from [0..1] 79 81 rewards.Add(reward); 80 82 hillEstimator.Update(reward); … … 82 84 83 85 public void Reset() { 86 MaxReward = double.NegativeInfinity; 87 84 88 this.hillEstimator = new OnlineHillEstimator(); 85 89 this.rewards = new List<double>(); -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/MeanAndVariancePolicyActionInfo.cs
r12290 r12893 12 12 public double SumReward { get { return estimator.Sum; } } 13 13 public double AvgReward { get { return estimator.Avg; } } 14 public double MaxReward { get; private set; } 14 15 public double RewardVariance { get { return estimator.Variance; } } 15 16 public double Value { … … 20 21 21 22 public void UpdateReward(double reward) { 23 MaxReward = Math.Max(MaxReward, reward); 22 24 estimator.UpdateReward(reward); 23 25 } 24 26 25 27 public void Reset() { 28 MaxReward = double.NegativeInfinity; 26 29 estimator.Reset(); 27 30 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/ModelPolicyActionInfo.cs
r11851 r12893 10 10 public class ModelPolicyActionInfo : IBanditPolicyActionInfo { 11 11 private readonly IModel model; 12 public double MaxReward { get; private set; } 12 13 public double Value { 13 14 get { … … 23 24 public void UpdateReward(double reward) { 24 25 Tries++; 26 MaxReward = Math.Max(MaxReward, reward); 25 27 model.Update(reward); 26 28 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r12876 r12893 29 29 <ErrorReport>prompt</ErrorReport> 30 30 <WarningLevel>4</WarningLevel> 31 <UseVSHostingProcess>true</UseVSHostingProcess> 31 32 </PropertyGroup> 32 33 <ItemGroup> … … 50 51 <Compile Include="Policies\BoltzmannExplorationPolicy.cs" /> 51 52 <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs" /> 53 <Compile Include="Policies\BoltzmannExplorationWithCoolingPolicy.cs" /> 54 <Compile Include="Policies\SingleArmPolicy.cs" /> 52 55 <Compile Include="Policies\IntervalEstimationPolicy.cs" /> 53 56 <Compile Include="Policies\ExtremeHunterPolicy.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/IBanditPolicyActionInfo.cs
r11806 r12893 2 2 public interface IBanditPolicyActionInfo { 3 3 //bool Disabled { get; } 4 double MaxReward { get; } 4 5 double Value { get; } 5 6 int Tries { get; } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs
r12876 r12893 31 31 l = double.NegativeInfinity; 32 32 } else { 33 q = aInfo. SumReward / aInfo.Tries;33 q = aInfo.MaxReward; 34 34 var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries)); 35 35 u = q + MaxReward * b; -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/BoltzmannExplorationPolicy.cs
r12290 r12893 12 12 private readonly double beta; 13 13 14 public BoltzmannExplorationPolicy(double beta) {14 public BoltzmannExplorationPolicy(double beta) { 15 15 if (beta < 0) throw new ArgumentException(); 16 16 this.beta = beta; … … 19 19 Debug.Assert(actionInfos.Any()); 20 20 21 // select best22 21 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 23 22 24 23 // try any of the untries actions randomly 25 // for RoyalSequence it is much better to select the actions in the order of occurrence (all terminal alternatives first) 26 //if (myActionInfos.Any(aInfo => !aInfo.Disabled && aInfo.Tries == 0)) { 27 // return myActionInfos 28 // .Select((aInfo, idx) => new { aInfo, idx }) 29 // .Where(p => !p.aInfo.Disabled) 30 // .Where(p => p.aInfo.Tries == 0) 31 // .SelectRandom(random).idx; 24 if (myActionInfos.Any(aInfo => aInfo.Tries == 0)) { 25 return myActionInfos 26 .Select((aInfo, idx) => new { aInfo, idx }) 27 .Where(p => p.aInfo.Tries == 0) 28 .SelectRandom(random).idx; 29 } 30 31 // using ranks 32 //var qualities = actionInfos.Select(i => i.MaxReward).ToArray(); // largest reward should have largest rank 33 //var ranks = Enumerable.Range(0, myActionInfos.Count()).ToArray(); 34 //Array.Sort(qualities, ranks); 35 // 36 //// set same rank for same quality 37 ////for (int i = 0; i < ranks.Length - 1; i++) { 38 //// if (qualities[i] == qualities[i + 1]) ranks[i + 1] = ranks[i]; 39 ////} 40 //// 41 // 42 //var rankForAction = new int[myActionInfos.Count()]; 43 //for (int i = 0; i < rankForAction.Length; i++) { 44 // rankForAction[ranks[i]] = i; 32 45 //} 46 // 47 //var w = from idx in Enumerable.Range(0, myActionInfos.Count()) 48 // select Math.Exp(beta * rankForAction[idx]); 33 49 34 var w = from aInfo in myActionInfos 35 select Math.Exp(beta * aInfo.Value); 50 51 // windowing 52 var max = actionInfos.Select(i => i.MaxReward).Max(); 53 var min = actionInfos.Select(i => i.MaxReward).Min(); 54 double range = max - min; 55 var w = from aInfo in actionInfos 56 select Math.Exp(beta * (aInfo.MaxReward - min) / range); 36 57 37 58 var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w); -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ChernoffIntervalEstimationPolicy.cs
r12876 r12893 33 33 } else { 34 34 35 var avgReward = aInfo.SumReward / aInfo.Tries; 35 var avgReward = aInfo.SumReward / aInfo.Tries; 36 36 37 37 // page 5 of "A simple distribution-free approach to the max k-armed bandit problem" -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs
r12290 r12893 24 24 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 25 25 Debug.Assert(actionInfos.Any()); 26 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 27 int totalTries = myActionInfos.Select(i => i.Tries).Sum(); 28 29 //var eps = Math.Exp(Math.Exp(-totalTries/200.0)) - 1; 30 26 31 if (random.NextDouble() >= eps) { // eps == 0 should be equivalent to pure exploitation, eps == 1 is pure exploration 27 32 // select best 28 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();29 33 var bestActions = new List<int>(); 30 34 double bestQ = double.NegativeInfinity; … … 34 38 aIdx++; 35 39 36 var q = aInfo. Value;40 var q = aInfo.MaxReward; 37 41 38 42 if (q > bestQ) { … … 45 49 } 46 50 Debug.Assert(bestActions.Any()); 47 return bestActions.SelectRandom(random); 51 //return bestActions.SelectRandom(random); 52 return bestActions.First(); 48 53 } else { 49 54 // select random -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ExtremeHunterPolicy.cs
r12876 r12893 16 16 public double delta { get; set; } 17 17 public double b { get; set; } 18 public double n { get; set; } 19 public int minPulls { get; set; } 18 20 19 public ExtremeHunterPolicy(double E = 1.0E-3, double D = 1.0E-2, double b = 1.0 ) {21 public ExtremeHunterPolicy(double E = 1.0E-3, double D = 1.0E-2, double b = 1.0, double n = 1.0E4, int minPulls = 100) { 20 22 this.E = E; // parameter TODO 21 23 this.D = D; // parameter TODO 22 this.b = b; // parameter TODO24 this.b = b; // parameter: set to 1 in original paper "to consider a wide class of distributions" 23 25 // private communication with Alexandra Carpentier: 24 26 // For instance, on our synthetic experiments, we calibrated the constants by … … 26 28 // out that taking E = 1e-3 is acceptable. For all the datasets 27 29 // (exact Pareto, approximate Pareto, and network data), we kept this same constant 30 31 // minPulls seems to be set to 100 in the experiments in extreme bandit paper 32 this.minPulls = minPulls; // parameter: TODO (there are conditions for N given in the paper) 33 this.n = n; 28 34 } 29 35 … … 31 37 var myActionInfos = actionInfos.OfType<ExtremeHunterActionInfo>(); 32 38 double bestQ = double.NegativeInfinity; 33 int totalTries = myActionInfos.Sum(a => a.Tries);39 // int totalTries = myActionInfos.Sum(a => a.Tries); 34 40 int K = myActionInfos.Count(); 35 double n = 1.0E2; // total tries parameter36 double minPulls = 100; // parameter: TODO (there are conditions for N given in the paper)37 41 38 42 this.delta = Math.Exp(-Math.Log(Math.Log(n))) / (2.0 * n * K); // TODO … … 48 52 double t = aInfo.Tries; 49 53 double h = aInfo.Value; 50 var thres = Math.Pow(t, h / (2 * b + 1)); 51 double c = Math.Pow(t, 1.0 / (2 * b + 1)) * ((1.0 / t) * aInfo.Rewards.Count(r => r >= thres)); 52 q = Math.Pow((c + B2(t)) * n, h + B1(t)) * Gamma(h, B1(t)); // eqn (5) 53 Debug.Assert(q > 0); 54 if (double.IsInfinity(h)) q = 0; 55 else { 56 var thres = Math.Pow(t, h / (2 * b + 1)); 57 double c = Math.Pow(t, 1.0 / (2 * b + 1)) * ((1.0 / t) * aInfo.Rewards.Count(r => r >= thres)); 58 q = Math.Pow((c + B2(t)) * n, h + B1(t)) * Gamma(h, B1(t)); // eqn (5) 59 Debug.Assert(q > 0); 60 } 54 61 } 55 62 if (q > bestQ) { … … 84 91 } 85 92 public override string ToString() { 86 return "ExtremeHunter";93 return string.Format("ExtremeHunter(E={0:F2},D={1:F2},b={2:F2},n={3:F0},minPulls={4:F0}", E, D, b, n, minPulls); 87 94 } 88 95 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ThresholdAscentPolicy.cs
r11976 r12893 28 28 public int Tries { get; private set; } 29 29 public int thresholdBin = 1; 30 30 //public double MaxReward { get { return Value; }} 31 public double MaxReward { get; private set; } 31 32 public double Value { 32 33 get { … … 37 38 38 39 public void UpdateReward(double reward) { 40 MaxReward = Math.Max(MaxReward, reward); 39 41 Tries++; 40 42 for (var idx = thresholdBin; idx <= RewardBin(reward); idx++) … … 43 45 44 46 public void Reset() { 47 MaxReward = double.NegativeInfinity; 45 48 Tries = 0; 46 49 thresholdBin = 1; … … 84 87 double bestQ = double.NegativeInfinity; 85 88 int k = myActionInfos.Count(); 86 var totalTries = myActionInfos.Sum(a => a.Tries); 89 //var totalTries = myActionInfos.Sum(a => a.Tries); 90 var totalTries = 100000; 87 91 int aIdx = -1; 88 92 foreach (var aInfo in myActionInfos) { -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs
r12876 r12893 28 28 } else { 29 29 30 q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries); 30 //q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries); 31 q = aInfo.MaxReward + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries); 31 32 } 32 33 if (q > bestQ) { … … 46 47 } 47 48 public override string ToString() { 48 return "UCB1Policy";49 return string.Format("UCB1Policy({0})", MaxReward); 49 50 } 50 51 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs
r12876 r12893 29 29 var tries = aInfo.Tries; 30 30 31 //var avgReward = aInfo.MaxReward; 31 32 var avgReward = sumReward / tries; 32 33 q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries)));
Note: See TracChangeset
for help on using the changeset viewer.