Changeset 11742
- Timestamp:
- 01/09/15 14:57:28 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 4 added
- 1 deleted
- 41 edited
- 3 moved
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BernoulliPolicyActionInfo.cs
r11732 r11742 7 7 using HeuristicLab.Common; 8 8 9 namespace HeuristicLab.Algorithms.Bandits {10 public class BernoulliPolicyActionInfo : I PolicyActionInfo {9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 public class BernoulliPolicyActionInfo : IBanditPolicyActionInfo { 11 11 public bool Disabled { get { return NumSuccess == -1; } } 12 12 public int NumSuccess { get; private set; } 13 13 public int NumFailure { get; private set; } 14 public int Tries { get { return NumSuccess + NumFailure; } } 15 public double Value { get { return NumSuccess / (double)(Tries); } } 14 16 public void UpdateReward(double reward) { 15 17 Debug.Assert(!Disabled); … … 29 31 } 30 32 public void PrintStats() { 31 Console.WriteLine("expected value {0,5:F2} disabled {1}", NumSuccess / (double)NumFailure, Disabled);33 Console.WriteLine("expected value {0,5:F2} disabled {1}", Value, Disabled); 32 34 } 33 35 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BernoulliThompsonSamplingPolicy.cs
r11732 r11742 7 7 using HeuristicLab.Common; 8 8 9 namespace HeuristicLab.Algorithms.Bandits {10 public class BernoulliThompsonSamplingPolicy : I Policy {9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 public class BernoulliThompsonSamplingPolicy : IBanditPolicy { 11 11 // parameters of beta prior distribution 12 12 private readonly double alpha = 1.0; 13 13 private readonly double beta = 1.0; 14 14 15 public int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos) {16 var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>(); // TODO: performance15 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 16 var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>(); 17 17 int bestAction = -1; 18 18 double maxTheta = double.NegativeInfinity; … … 32 32 } 33 33 34 public I PolicyActionInfo CreateActionInfo() {34 public IBanditPolicyActionInfo CreateActionInfo() { 35 35 return new BernoulliPolicyActionInfo(); 36 36 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs
r11732 r11742 7 7 using HeuristicLab.Common; 8 8 9 namespace HeuristicLab.Algorithms.Bandits {9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 10 // also called softmax policy 11 public class BoltzmannExplorationPolicy : I Policy {11 public class BoltzmannExplorationPolicy : IBanditPolicy { 12 12 private readonly double beta; 13 private readonly Func<DefaultPolicyActionInfo, double> valueFunction; 13 14 14 public BoltzmannExplorationPolicy(double beta) { 15 public BoltzmannExplorationPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward) { } 16 17 public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) { 15 18 if (beta < 0) throw new ArgumentException(); 16 19 this.beta = beta; 20 this.valueFunction = valueFunction; 17 21 } 18 public int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos) {22 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 19 23 Debug.Assert(actionInfos.Any()); 20 24 21 25 // select best 22 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>() .ToArray(); // TODO: performance26 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 23 27 Debug.Assert(myActionInfos.Any(a => !a.Disabled)); 24 double[] w = new double[myActionInfos.Length];25 28 26 for (int a = 0; a < myActionInfos.Length; a++) { 27 if (myActionInfos[a].Disabled) { 28 w[a] = 0; continue; 29 } 30 if (myActionInfos[a].Tries == 0) return a; 31 var sumReward = myActionInfos[a].SumReward; 32 var tries = myActionInfos[a].Tries; 33 var avgReward = sumReward / tries; 34 w[a] = Math.Exp(beta * avgReward); 35 } 29 var w = from aInfo in myActionInfos 30 select aInfo.Disabled 31 ? 0.0 32 : Math.Exp(beta * valueFunction(aInfo)); 36 33 37 38 var bestAction = Enumerable.Range(0, w.Length).SampleProportional(random, w).First(); 34 var bestAction = myActionInfos 35 .Select((aInfo, idx) => new { aInfo, idx }) 36 .SampleProportional(random, w) 37 .Select(p => p.idx) 38 .First(); 39 39 Debug.Assert(bestAction >= 0); 40 Debug.Assert(bestAction < w.Length);41 Debug.Assert(!myActionInfos[bestAction].Disabled);42 40 return bestAction; 43 41 } 44 42 45 public I PolicyActionInfo CreateActionInfo() {43 public IBanditPolicyActionInfo CreateActionInfo() { 46 44 return new DefaultPolicyActionInfo(); 47 45 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs
r11732 r11742 6 6 using System.Threading.Tasks; 7 7 8 namespace HeuristicLab.Algorithms.Bandits {8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 9 /* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings of the 12th 10 10 International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */ 11 11 12 public class ChernoffIntervalEstimationPolicy : I Policy {12 public class ChernoffIntervalEstimationPolicy : IBanditPolicy { 13 13 private readonly double delta; 14 14 … … 16 16 this.delta = delta; 17 17 } 18 public int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos) {18 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 19 19 Debug.Assert(actionInfos.Any()); 20 20 // select best 21 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>() .ToArray(); // TODO: performance22 int k = myActionInfos. Length;21 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 22 int k = myActionInfos.Count(a => !a.Disabled); 23 23 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 24 24 int bestAction = -1; 25 25 double bestQ = double.NegativeInfinity; 26 for (int a = 0; a < myActionInfos.Length; a++) { 27 if (myActionInfos[a].Disabled) continue; 28 if (myActionInfos[a].Tries == 0) return a; 26 var aIdx = -1; 27 foreach (var aInfo in myActionInfos) { 28 aIdx++; 29 if (aInfo.Disabled) continue; 30 if (aInfo.Tries == 0) return aIdx; 29 31 30 var sumReward = myActionInfos[a].SumReward; 31 var tries = myActionInfos[a].Tries; 32 33 var avgReward = sumReward / tries; 32 var avgReward = aInfo.SumReward / aInfo.Tries; 34 33 35 34 // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem" 36 35 // var alpha = Math.Log(2 * totalTries * k / delta); 37 double alpha = Math.Log(2 ) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper38 var q = avgReward + (alpha + Math.Sqrt(2 * tries * avgReward * alpha + alpha * alpha)) / tries;36 double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper 37 var q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries; 39 38 if (q > bestQ) { 40 39 bestQ = q; 41 bestAction = a ;40 bestAction = aIdx; 42 41 } 43 42 } … … 46 45 } 47 46 48 public I PolicyActionInfo CreateActionInfo() {47 public IBanditPolicyActionInfo CreateActionInfo() { 49 48 return new DefaultPolicyActionInfo(); 50 49 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/DefaultPolicyActionInfo.cs
r11732 r11742 6 6 using System.Threading.Tasks; 7 7 8 namespace HeuristicLab.Algorithms.Bandits {8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 9 // stores information that is relevant for most of the policies 10 public class DefaultPolicyActionInfo : I PolicyActionInfo {10 public class DefaultPolicyActionInfo : IBanditPolicyActionInfo { 11 11 public bool Disabled { get { return Tries == -1; } } 12 12 public double SumReward { get; private set; } 13 public int Tries { get; private set; } 13 14 public double MaxReward { get; private set; } 14 public int Tries { get; private set; } 15 15 public double Value { get { return SumReward / Tries; } } 16 16 public DefaultPolicyActionInfo() { 17 MaxReward = double. NegativeInfinity;17 MaxReward = double.MinValue; 18 18 } 19 19 … … 37 37 Console.WriteLine("avg reward {0,5:F2} disabled {1}", SumReward / Tries, Disabled); 38 38 } 39 40 public static Func<DefaultPolicyActionInfo, double> AverageReward { 41 get { 42 return (aInfo) => 43 aInfo.Tries == 0 ? 44 double.PositiveInfinity : 45 aInfo.SumReward / (double)aInfo.Tries; 46 } 47 } 39 48 } 40 49 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EmptyPolicyActionInfo.cs
r11732 r11742 6 6 using System.Threading.Tasks; 7 7 8 namespace HeuristicLab.Algorithms.Bandits { 9 public class EmptyPolicyActionInfo : IPolicyActionInfo { 8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 public class EmptyPolicyActionInfo : IBanditPolicyActionInfo { 10 public double Value { get { return 0.0; } } 11 10 12 public bool Disabled { get; private set; } 11 13 public void UpdateReward(double reward) { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs
r11732 r11742 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 7 8 8 namespace HeuristicLab.Algorithms.Bandits {9 public class EpsGreedyPolicy : I Policy {9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 public class EpsGreedyPolicy : IBanditPolicy { 10 11 private readonly double eps; 11 12 private readonly RandomPolicy randomPolicy; 13 private readonly Func<DefaultPolicyActionInfo, double> valueFunction; 14 private readonly string desc; 12 15 13 public EpsGreedyPolicy(double eps) { 16 17 public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward, string.Empty) { } 18 19 public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction, string desc) { 14 20 this.eps = eps; 15 21 this.randomPolicy = new RandomPolicy(); 22 this.valueFunction = valueFunction; 23 this.desc = desc; 16 24 } 17 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 25 26 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 18 27 Debug.Assert(actionInfos.Any()); 19 28 if (random.NextDouble() > eps) { 20 29 // select best 21 30 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 22 int bestAction = -1;31 var bestActions = new List<int>(); 23 32 double bestQ = double.NegativeInfinity; 33 24 34 int aIdx = -1; 25 35 foreach (var aInfo in myActionInfos) { 26 27 36 aIdx++; 28 37 if (aInfo.Disabled) continue; 29 if (aInfo.Tries == 0) return aIdx;30 38 39 var q = valueFunction(aInfo); 31 40 32 var avgReward = aInfo.SumReward / aInfo.Tries;33 //var q = avgReward;34 var q = aInfo.MaxReward;35 41 if (q > bestQ) { 42 bestActions.Clear(); 43 bestActions.Add(aIdx); 36 44 bestQ = q; 37 bestAction = aIdx; 45 } else if (q.IsAlmost(bestQ)) { 46 bestActions.Add(aIdx); 38 47 } 39 48 } 40 Debug.Assert(bestAction >= 0);41 return bestAction ;49 Debug.Assert(bestActions.Any()); 50 return bestActions.SelectRandom(random); 42 51 } else { 43 52 // select random … … 46 55 } 47 56 48 public I PolicyActionInfo CreateActionInfo() {57 public IBanditPolicyActionInfo CreateActionInfo() { 49 58 return new DefaultPolicyActionInfo(); 50 59 } … … 52 61 53 62 public override string ToString() { 54 return string.Format("EpsGreedyPolicy({0:F2} )", eps);63 return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc); 55 64 } 56 65 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/Exp3Policy.cs
r11730 r11742 6 6 using System.Threading.Tasks; 7 7 8 namespace HeuristicLab.Algorithms.Bandits {8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 9 public class Exp3Policy : BanditPolicy { 10 10 private readonly Random random; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GaussianThompsonSamplingPolicy.cs
r11732 r11742 5 5 using HeuristicLab.Common; 6 6 7 namespace HeuristicLab.Algorithms.Bandits {7 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 8 8 9 public class GaussianThompsonSamplingPolicy : IPolicy { 9 [Obsolete("Replaced by GenericThompsonSamplingPolicy(GaussianModel(0.5, 1.0, 0.1))")] 10 public class GaussianThompsonSamplingPolicy : IBanditPolicy { 10 11 private bool compatibility; 11 12 … … 22 23 } 23 24 24 public int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos) {25 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 25 26 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>(); 26 27 int bestAction = -1; … … 38 39 double theta; 39 40 if (compatibility) { 41 // old code used for old experiments (preserved because it performed very well) 40 42 if (tries < 2) return aIdx; 41 43 var mu = sampleMean; … … 65 67 } 66 68 67 public I PolicyActionInfo CreateActionInfo() {69 public IBanditPolicyActionInfo CreateActionInfo() { 68 70 return new MeanAndVariancePolicyActionInfo(); 69 71 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GenericThompsonSamplingPolicy.cs
r11732 r11742 7 7 using HeuristicLab.Common; 8 8 9 namespace HeuristicLab.Algorithms.Bandits {10 public class GenericThompsonSamplingPolicy : I Policy {9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 public class GenericThompsonSamplingPolicy : IBanditPolicy { 11 11 private readonly IModel model; 12 12 … … 15 15 } 16 16 17 public int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos) {17 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 18 18 var myActionInfos = actionInfos.OfType<ModelPolicyActionInfo>(); 19 19 int bestAction = -1; … … 34 34 } 35 35 36 public I PolicyActionInfo CreateActionInfo() {36 public IBanditPolicyActionInfo CreateActionInfo() { 37 37 return new ModelPolicyActionInfo((IModel)model.Clone()); 38 38 } 39 39 40 40 public override string ToString() { 41 return string.Format("GenericThompsonSamplingPolicy( {0})", model);41 return string.Format("GenericThompsonSamplingPolicy(\"{0}\")", model); 42 42 } 43 43 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/MeanAndVariancePolicyActionInfo.cs
r11732 r11742 6 6 using System.Threading.Tasks; 7 7 8 namespace HeuristicLab.Algorithms.Bandits {9 public class MeanAndVariancePolicyActionInfo : I PolicyActionInfo {8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 public class MeanAndVariancePolicyActionInfo : IBanditPolicyActionInfo { 10 10 private bool disabled; 11 11 public bool Disabled { get { return disabled; } } … … 15 15 public double AvgReward { get { return estimator.Avg; } } 16 16 public double RewardVariance { get { return estimator.Variance; } } 17 public double Value { get { return AvgReward; } } 17 18 18 19 public void UpdateReward(double reward) { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ModelPolicyActionInfo.cs
r11732 r11742 6 6 using System.Threading.Tasks; 7 7 8 namespace HeuristicLab.Algorithms.Bandits {8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 9 // uses a statistical model to sample and update posterior distribution p(Reward | Data) 10 public class ModelPolicyActionInfo : I PolicyActionInfo {10 public class ModelPolicyActionInfo : IBanditPolicyActionInfo { 11 11 private readonly IModel model; 12 12 public bool Disabled { get { return Tries == -1; } } 13 public double Value { get { return model.SampleExpectedReward(new Random()); } } 13 14 14 15 public int Tries { get; private set; } … … 38 39 model.PrintStats(); 39 40 } 41 42 public override string ToString() { 43 return string.Format("disabled {0} model {1}", Disabled, model); 44 } 40 45 } 41 46 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/RandomPolicy.cs
r11732 r11742 7 7 using HeuristicLab.Common; 8 8 9 namespace HeuristicLab.Algorithms.Bandits {10 public class RandomPolicy : I Policy {9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 public class RandomPolicy : IBanditPolicy { 11 11 12 12 public override string ToString() { … … 14 14 } 15 15 16 public int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos) {16 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 17 17 return actionInfos 18 .Select((a , i) => Tuple.Create(a, i))18 .Select((aInfo, idx) => Tuple.Create(aInfo, idx)) 19 19 .Where(p => !p.Item1.Disabled) 20 20 .SelectRandom(random).Item2; 21 21 } 22 22 23 public I PolicyActionInfo CreateActionInfo() {24 return new EmptyPolicyActionInfo();23 public IBanditPolicyActionInfo CreateActionInfo() { 24 return new DefaultPolicyActionInfo(); 25 25 } 26 26 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs
r11730 r11742 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 7 8 8 namespace HeuristicLab.Algorithms.Bandits {9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 10 /* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings of the 12th 10 11 International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */ 11 12 12 public class ThresholdAscentPolicy : BanditPolicy {13 const int numBins = 101;14 const double binSize = 1.0 / (numBins - 1);13 public class ThresholdAscentPolicy : IBanditPolicy { 14 public const int numBins = 101; 15 public const double binSize = 1.0 / (numBins - 1); 15 16 16 // for each arm store the number of observed rewards for each bin of size delta 17 // for delta = 0.01 we have 101 bins 18 // the first bin is freq of rewards >= 0 // all 19 // the second bin is freq of rewards > 0 20 // the third bin is freq of rewards > 0.01 21 // the last bin is for rewards > 0.99 22 // 23 // (also see RewardBin function) 24 private readonly int[,] armRewardHistogram; // for performance reasons we store cumulative counts (freq of rewards > lower threshold) 17 private class ThresholdAscentActionInfo : IBanditPolicyActionInfo { 25 18 19 // for each arm store the number of observed rewards for each bin of size delta 20 // for delta = 0.01 we have 101 bins 21 // the first bin is freq of rewards >= 0 // all 22 // the second bin is freq of rewards > 0 23 // the third bin is freq of rewards > 0.01 24 // the last bin is for rewards > 0.99 25 // 26 // (also see RewardBin function) 27 public int[] rewardHistogram = new int[numBins]; // for performance reasons we store cumulative counts (freq of rewards > lower threshold) 28 public int Tries { get; private set; } 29 public int thresholdBin = 1; 30 public double Value { get { return rewardHistogram[thresholdBin] / (double)Tries; } } 26 31 27 private readonly int[] tries; 32 public bool Disabled { get { return Tries == -1; } } 33 34 public void UpdateReward(double reward) { 35 Tries++; 36 for (var idx = thresholdBin; idx <= RewardBin(reward); idx++) 37 rewardHistogram[idx]++; 38 } 39 40 public void Disable() { 41 Tries = -1; 42 } 43 44 public void Reset() { 45 Tries = 0; 46 thresholdBin = 1; 47 Array.Clear(rewardHistogram, 0, rewardHistogram.Length); 48 } 49 50 public void PrintStats() { 51 if (Tries >= 0) { 52 Console.Write("{0,6}", Tries); 53 } else { 54 Console.Write("{0,6}", ""); 55 } 56 } 57 58 // maps a reward value to it's bin 59 private static int RewardBin(double reward) { 60 Debug.Assert(reward >= 0 && reward <= 1.0); 61 // reward = 0 => 0 62 // ]0.00 .. 0.01] => 1 63 // ]0.01 .. 0.02] => 2 64 // ... 65 // ]0.99 .. 1.00] => 100 66 if (reward <= 0) return 0; 67 return (int)Math.Ceiling((reward / binSize)); 68 } 69 } 70 28 71 private readonly int s; 29 72 private readonly double delta; 30 73 31 private int totalTries = 0; 32 private int thresholdBin; // bin index of current threshold 33 private const double maxTries = 1E6; 34 35 public ThresholdAscentPolicy(int numActions, int s = 100, double delta = 0.05) 36 : base(numActions) { 37 this.thresholdBin = 1; // first bin to check is bin idx 1 == freq of rewards > 0 74 public ThresholdAscentPolicy(int s = 100, double delta = 0.05) { 38 75 this.s = s; 39 76 this.delta = delta; 40 this.armRewardHistogram = new int[numActions, numBins];41 this.tries = new int[numActions];42 77 } 43 78 44 // maps a reward value to it's bin 45 private static int RewardBin(double reward) { 46 Debug.Assert(reward >= 0 && reward <= 1.0); 47 // reward = 0 => 0 48 // ]0.00 .. 0.01] => 1 49 // ]0.01 .. 0.02] => 2 50 // ... 51 // ]0.99 .. 1.00] => 100 52 if (reward <= 0) return 0; 53 return (int)Math.Ceiling((reward / binSize)); 54 } 55 56 57 private double U(double mu, int n, int k) { 79 private double U(double mu, int totalTries, int n, int k) { 58 80 //var alpha = Math.Log(2.0 * totalTries * k / delta); 59 double alpha = Math.Log(2) + Math.Log( maxTries) + Math.Log(k) - Math.Log(delta); // totalTries is max iterations in original paper81 double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); 60 82 return mu + (alpha + Math.Sqrt(2 * n * mu * alpha + alpha * alpha)) / n; 61 83 } 62 84 63 85 64 public override int SelectAction() { 65 Debug.Assert(Actions.Any()); 66 UpdateThreshold(); 86 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 87 Debug.Assert(actionInfos.Any()); 88 var myActionInfos = actionInfos.OfType<ThresholdAscentActionInfo>(); 89 UpdateThreshold(myActionInfos); 90 67 91 int bestAction = -1; 68 92 double bestQ = double.NegativeInfinity; 69 int k = Actions.Count(); 70 foreach (var a in Actions) { 71 if (tries[a] == 0) return a; 72 double mu = armRewardHistogram[a, thresholdBin] / (double)tries[a]; // probability of rewards > T 73 double q = U(mu, tries[a], k); 93 int k = myActionInfos.Count(a => !a.Disabled); 94 var totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 95 int aIdx = -1; 96 foreach (var aInfo in myActionInfos) { 97 aIdx++; 98 if (aInfo.Disabled) continue; 99 if (aInfo.Tries == 0) return aIdx; 100 double mu = aInfo.Value; // probability of rewards > T 101 double q = U(mu, totalTries, aInfo.Tries, k); // totalTries is max iterations in original paper 74 102 if (q > bestQ) { 75 103 bestQ = q; 76 bestAction = a ;104 bestAction = aIdx; 77 105 } 78 106 } 79 Debug.Assert( Actions.Contains(bestAction));107 Debug.Assert(bestAction > -1); 80 108 return bestAction; 81 109 } 82 110 83 private void UpdateThreshold() { 84 while (thresholdBin < (numBins - 1) && Actions.Sum(a => armRewardHistogram[a, thresholdBin]) >= s) { 111 112 private void UpdateThreshold(IEnumerable<ThresholdAscentActionInfo> actionInfos) { 113 var thresholdBin = 1; // first bin to check is bin idx 1 == freq of rewards > 0 114 while (thresholdBin < (numBins - 1) && actionInfos.Sum(a => a.rewardHistogram[thresholdBin]) >= s) { 85 115 thresholdBin++; 86 116 // Console.WriteLine("New threshold {0:F2}", T); 87 117 } 118 foreach (var aInfo in actionInfos) { 119 aInfo.thresholdBin = thresholdBin; 120 } 88 121 } 89 122 90 public override void UpdateReward(int action, double reward) { 91 Debug.Assert(Actions.Contains(action)); 92 totalTries++; 93 tries[action]++; 94 // efficiency: we can start at the current threshold bin because all bins below that are not accessed in select-action 95 for (var idx = thresholdBin; idx <= RewardBin(reward); idx++) 96 armRewardHistogram[action, idx]++; 123 124 public IBanditPolicyActionInfo CreateActionInfo() { 125 return new ThresholdAscentActionInfo(); 97 126 } 98 127 99 public override void DisableAction(int action) {100 base.DisableAction(action);101 totalTries -= tries[action];102 tries[action] = -1;103 }104 105 public override void Reset() {106 base.Reset();107 totalTries = 0;108 thresholdBin = 1;109 Array.Clear(tries, 0, tries.Length);110 Array.Clear(armRewardHistogram, 0, armRewardHistogram.Length);111 }112 113 public override void PrintStats() {114 for (int i = 0; i < tries.Length; i++) {115 if (tries[i] >= 0) {116 Console.Write("{0,6}", tries[i]);117 } else {118 Console.Write("{0,6}", "");119 }120 }121 Console.WriteLine();122 }123 128 public override string ToString() { 124 129 return string.Format("ThresholdAscentPolicy({0},{1:F2})", s, delta); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs
r11732 r11742 6 6 using System.Threading.Tasks; 7 7 8 namespace HeuristicLab.Algorithms.Bandits { 9 public class UCB1Policy : IPolicy { 10 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 // policy for k-armed bandit (see Auer et al. 2002) 10 public class UCB1Policy : IBanditPolicy { 11 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 11 12 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance 12 13 int bestAction = -1; … … 27 28 } 28 29 29 public I PolicyActionInfo CreateActionInfo() {30 public IBanditPolicyActionInfo CreateActionInfo() { 30 31 return new DefaultPolicyActionInfo(); 31 32 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs
r11732 r11742 6 6 using System.Threading.Tasks; 7 7 8 namespace HeuristicLab.Algorithms.Bandits { 9 public class UCB1TunedPolicy : IPolicy { 8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 // policy for k-armed bandit (see Auer et al. 2002) 10 public class UCB1TunedPolicy : IBanditPolicy { 10 11 11 public int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos) {12 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>() .ToArray(); // TODO: performance12 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 13 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>(); 13 14 int bestAction = -1; 14 15 double bestQ = double.NegativeInfinity; 15 16 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 16 17 17 for (int a = 0; a < myActionInfos.Length; a++) { 18 if (myActionInfos[a].Disabled) continue; 19 if (myActionInfos[a].Tries == 0) return a; 18 int aIdx = -1; 19 foreach (var aInfo in myActionInfos) { 20 aIdx++; 21 if (aInfo.Disabled) continue; 22 if (aInfo.Tries == 0) return aIdx; 20 23 21 var sumReward = myActionInfos[a].SumReward;22 var tries = myActionInfos[a].Tries;24 var sumReward = aInfo.SumReward; 25 var tries = aInfo.Tries; 23 26 24 27 var avgReward = sumReward / tries; 25 var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V( myActionInfos[a], totalTries))); // 1/4 is upper bound of bernoulli distributed variable28 var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); // 1/4 is upper bound of bernoulli distributed variable 26 29 if (q > bestQ) { 27 30 bestQ = q; 28 bestAction = a ;31 bestAction = aIdx; 29 32 } 30 33 } … … 33 36 } 34 37 35 public I PolicyActionInfo CreateActionInfo() {38 public IBanditPolicyActionInfo CreateActionInfo() { 36 39 return new MeanAndVariancePolicyActionInfo(); 37 40 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCBNormalPolicy.cs
r11732 r11742 6 6 using System.Threading.Tasks; 7 7 8 namespace HeuristicLab.Algorithms.Bandits {9 public class UCBNormalPolicy : I Policy {8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 public class UCBNormalPolicy : IBanditPolicy { 10 10 11 public int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos) {12 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>() .ToArray(); // TODO: performance11 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 12 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>(); 13 13 int bestAction = -1; 14 14 double bestQ = double.NegativeInfinity; 15 15 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 16 int aIdx = -1; 17 foreach (var aInfo in myActionInfos) { 18 aIdx++; 19 if (aInfo.Disabled) continue; 20 if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return aIdx; 16 21 17 for (int a = 0; a < myActionInfos.Length; a++) { 18 if (myActionInfos[a].Disabled) continue; 19 if (totalTries <= 1 || myActionInfos[a].Tries <= 1 || myActionInfos[a].Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return a; 20 21 var tries = myActionInfos[a].Tries; 22 var avgReward = myActionInfos[a].AvgReward; 23 var rewardVariance = myActionInfos[a].RewardVariance; 24 var estVariance = 16 * rewardVariance * (Math.Log(totalTries - 1) / tries); 25 if (estVariance < 0) estVariance = 0; // numerical problems 26 var q = avgReward 27 + Math.Sqrt(estVariance); 22 var tries = aInfo.Tries; 23 var avgReward = aInfo.AvgReward; 24 var rewardVariance = aInfo.RewardVariance; 25 var estVariance = 16.0 * rewardVariance * (Math.Log(totalTries - 1) / tries); 26 var q = avgReward + Math.Sqrt(estVariance); 28 27 if (q > bestQ) { 29 28 bestQ = q; 30 bestAction = a ;29 bestAction = aIdx; 31 30 } 32 31 } … … 35 34 } 36 35 37 public I PolicyActionInfo CreateActionInfo() {36 public IBanditPolicyActionInfo CreateActionInfo() { 38 37 return new MeanAndVariancePolicyActionInfo(); 39 38 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCTPolicy.cs
r11732 r11742 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 8 namespace HeuristicLab.Algorithms.Bandits { 7 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 8 /* Kocsis et al. Bandit based Monte-Carlo Planning */ 10 public class UCTPolicy : I Policy {9 public class UCTPolicy : IBanditPolicy { 11 10 private readonly double c; 12 11 … … 16 15 17 16 18 public int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos) {19 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>() .ToArray(); // TODO: performance17 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 18 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 20 19 int bestAction = -1; 21 20 double bestQ = double.NegativeInfinity; 22 21 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 23 22 24 for (int a = 0; a < myActionInfos.Length; a++) { 25 if (myActionInfos[a].Disabled) continue; 26 if (myActionInfos[a].Tries == 0) return a; 27 var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + 2 * c * Math.Sqrt(Math.Log(totalTries) / myActionInfos[a].Tries); 23 int aIdx = -1; 24 foreach (var aInfo in myActionInfos) { 25 aIdx++; 26 if (aInfo.Disabled) continue; 27 if (aInfo.Tries == 0) return aIdx; 28 var q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries); 28 29 if (q > bestQ) { 29 30 bestQ = q; 30 bestAction = a ;31 bestAction = aIdx; 31 32 } 32 33 } … … 35 36 } 36 37 37 public I PolicyActionInfo CreateActionInfo() {38 public IBanditPolicyActionInfo CreateActionInfo() { 38 39 return new DefaultPolicyActionInfo(); 39 40 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r11732 r11742 44 44 <ItemGroup> 45 45 <Compile Include="BanditHelper.cs" /> 46 <Compile Include="BanditPolicies\BernoulliPolicyActionInfo.cs" /> 47 <Compile Include="BanditPolicies\BernoulliThompsonSamplingPolicy.cs" /> 48 <Compile Include="BanditPolicies\BoltzmannExplorationPolicy.cs" /> 49 <Compile Include="BanditPolicies\ChernoffIntervalEstimationPolicy.cs" /> 50 <Compile Include="BanditPolicies\DefaultPolicyActionInfo.cs" /> 51 <Compile Include="BanditPolicies\EpsGreedyPolicy.cs" /> 52 <Compile Include="BanditPolicies\GaussianThompsonSamplingPolicy.cs" /> 53 <Compile Include="BanditPolicies\GenericThompsonSamplingPolicy.cs" /> 54 <Compile Include="BanditPolicies\MeanAndVariancePolicyActionInfo.cs" /> 55 <Compile Include="BanditPolicies\ModelPolicyActionInfo.cs" /> 56 <Compile Include="BanditPolicies\RandomPolicy.cs" /> 57 <Compile Include="BanditPolicies\ThresholdAscentPolicy.cs" /> 58 <Compile Include="BanditPolicies\UCB1Policy.cs" /> 59 <Compile Include="BanditPolicies\UCB1TunedPolicy.cs" /> 60 <Compile Include="BanditPolicies\UCBNormalPolicy.cs" /> 61 <Compile Include="BanditPolicies\UCTPolicy.cs" /> 46 62 <Compile Include="Bandits\BernoulliBandit.cs" /> 47 63 <Compile Include="Bandits\GaussianBandit.cs" /> … … 49 65 <Compile Include="Bandits\IBandit.cs" /> 50 66 <Compile Include="Bandits\TruncatedNormalBandit.cs" /> 67 <Compile Include="GrammarPolicies\RandomPolicy.cs" /> 68 <Compile Include="IPolicy.cs" /> 69 <Compile Include="IBanditPolicy.cs" /> 70 <Compile Include="IBanditPolicyActionInfo.cs" /> 51 71 <Compile Include="OnlineMeanAndVarianceEstimator.cs" /> 52 <Compile Include="IPolicyActionInfo.cs" />53 72 <Compile Include="Models\BernoulliModel.cs" /> 54 73 <Compile Include="Models\GaussianModel.cs" /> 55 74 <Compile Include="Models\IModel.cs" /> 56 <Compile Include="Policies\BernoulliThompsonSamplingPolicy.cs">57 <SubType>Code</SubType>58 </Compile>59 <Compile Include="Policies\BoltzmannExplorationPolicy.cs">60 <SubType>Code</SubType>61 </Compile>62 <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs">63 <SubType>Code</SubType>64 </Compile>65 <Compile Include="Policies\BernoulliPolicyActionInfo.cs" />66 <Compile Include="Policies\ModelPolicyActionInfo.cs" />67 <Compile Include="Policies\EpsGreedyPolicy.cs">68 <SubType>Code</SubType>69 </Compile>70 <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs">71 <SubType>Code</SubType>72 </Compile>73 <Compile Include="Policies\GenericThompsonSamplingPolicy.cs">74 <SubType>Code</SubType>75 </Compile>76 <Compile Include="Policies\MeanAndVariancePolicyActionInfo.cs" />77 <Compile Include="Policies\DefaultPolicyActionInfo.cs" />78 <Compile Include="Policies\EmptyPolicyActionInfo.cs" />79 <Compile Include="Policies\RandomPolicy.cs" />80 <Compile Include="Policies\UCB1Policy.cs" />81 <Compile Include="IPolicy.cs" />82 <Compile Include="Policies\UCB1TunedPolicy.cs">83 <SubType>Code</SubType>84 </Compile>85 <Compile Include="Policies\UCBNormalPolicy.cs">86 <SubType>Code</SubType>87 </Compile>88 <Compile Include="Policies\UCTPolicy.cs">89 <SubType>Code</SubType>90 </Compile>91 75 <Compile Include="Properties\AssemblyInfo.cs" /> 92 76 </ItemGroup> … … 95 79 <Project>{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}</Project> 96 80 <Name>HeuristicLab.Common</Name> 81 </ProjectReference> 82 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj"> 83 <Project>{cb9dccf6-667e-4a13-b82d-dbd6b45a045e}</Project> 84 <Name>HeuristicLab.Problems.GrammaticalOptimization</Name> 97 85 </ProjectReference> 98 86 </ItemGroup> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IBanditPolicy.cs
r11737 r11742 6 6 7 7 namespace HeuristicLab.Algorithms.Bandits { 8 // this interface represents a policy for reinforcement learning9 public interface I Policy {10 int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos);11 I PolicyActionInfo CreateActionInfo();8 // this interface represents a policy for bandit problems 9 public interface IBanditPolicy { 10 int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actions); 11 IBanditPolicyActionInfo CreateActionInfo(); 12 12 } 13 13 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IBanditPolicyActionInfo.cs
r11737 r11742 6 6 7 7 namespace HeuristicLab.Algorithms.Bandits { 8 public interface I PolicyActionInfo {8 public interface IBanditPolicyActionInfo { 9 9 bool Disabled { get; } 10 double Value { get; } 11 int Tries { get; } 10 12 void UpdateReward(double reward); 11 13 void Disable(); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs
r11732 r11742 9 9 namespace HeuristicLab.Algorithms.Bandits.Models { 10 10 public class BernoulliModel : IModel { 11 11 12 private int success; 12 13 private int failure; … … 47 48 return new BernoulliModel() { failure = this.failure, success = this.success }; 48 49 } 50 51 public override string ToString() { 52 return string.Format("Bernoulli with Beta prior: mu={0:F2}", (success + alpha) / (success + alpha + failure + beta)); 53 } 49 54 } 50 55 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs
r11732 r11742 7 7 // 2) unknown mean and unknown variance 8 8 public class GaussianModel : IModel { 9 9 10 private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator(); 10 11 … … 82 83 } 83 84 85 84 86 // sample from the posterior marginal for mu (expected value) equ. 91 85 87 // p(µ|D) = T2αn (µ| µn, βn/(αnκn)) 86 88 87 // sample from Tk distribution : http://stats.stackexchange.com/a/7027088 89 var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble()); 89 90 … … 91 92 return theta; 92 93 93 //return alglib.invnormaldistribution(random.NextDouble()) * + theta; 94 //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean; 94 95 /* 96 * value function : 0.99-quantile 97 // sample posterior mean and posterior variance independently 98 var sampledPrec = Rand.GammaRand(random, posteriorAlpha) * posteriorBeta; 99 var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble()); 100 var sampledMean = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean; 101 102 return alglib.invnormaldistribution(0.99) / Math.Sqrt(sampledPrec) + sampledMean; 103 */ 95 104 } 96 105 … … 114 123 return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta); 115 124 } 125 126 public override string ToString() { 127 if (knownVariance) { 128 var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance); 129 var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance); 130 return string.Format("Gaussian(mu, var=0.1), mu ~ Gaussian(mu'={0:F3}, var'={1:F3})", posteriorMeanMean, posteriorMeanVariance); 131 } else { 132 var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N); 133 var posteriorK = priorK + estimator.N; 134 var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0; 135 double posteriorBeta; 136 if (estimator.N > 0) { 137 posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N)); 138 } else { 139 posteriorBeta = precisionPriorBeta; 140 } 141 var nu = (int)(2 * posteriorAlpha); 142 var meanVariance = posteriorBeta / (posteriorAlpha * posteriorK) * (nu / (double)(nu - 2)); 143 return string.Format("Gaussian(mu, var), mu ~ T{0}(mu'={1:F3}, var'={2:F3}), 1.0/var ~ Gamma(mu={3:F3}, var={4:F3})", 144 nu, posteriorMean, meanVariance, 145 posteriorAlpha / posteriorBeta, posteriorAlpha / (posteriorBeta * posteriorBeta)); 146 } 147 } 116 148 } 117 149 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs
r11732 r11742 17 17 private readonly Random random; 18 18 private readonly int contextLen; 19 private readonly I Policy policy;19 private readonly IBanditPolicy policy; 20 20 21 public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, I Policy policy) {21 public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, IBanditPolicy policy) { 22 22 this.maxLen = maxLen; 23 23 this.problem = problem; … … 45 45 46 46 47 private Dictionary<string, I PolicyActionInfo[]> contextActionInfos;47 private Dictionary<string, IBanditPolicyActionInfo[]> contextActionInfos; 48 48 private List<Tuple<string, int>> updateChain; 49 49 50 50 private void InitPolicies(IGrammar grammar) { 51 this.contextActionInfos = new Dictionary<string, I PolicyActionInfo[]>();51 this.contextActionInfos = new Dictionary<string, IBanditPolicyActionInfo[]>(); 52 52 this.updateChain = new List<Tuple<string, int>>(); 53 53 } … … 82 82 var endIdx = Math.Min(startIdx + contextLen, ntIdx); 83 83 var lft = phrase.Subsequence(startIdx, endIdx - startIdx + 1).ToString(); 84 lft = problem. Hash(lft);84 lft = problem.CanonicalRepresentation(lft); 85 85 if (!contextActionInfos.ContainsKey(lft)) { 86 86 contextActionInfos.Add(lft, g.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray()); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs
r11732 r11742 16 16 private readonly Random random; 17 17 private readonly IProblem problem; 18 private readonly I Policy policy;18 private readonly IBanditPolicy policy; 19 19 20 public AlternativesSampler(IProblem problem, I Policy policy, int maxLen) {20 public AlternativesSampler(IProblem problem, IBanditPolicy policy, int maxLen) { 21 21 this.problem = problem; 22 22 this.maxLen = maxLen; … … 43 43 44 44 45 private Dictionary<char, I PolicyActionInfo[]> ntActionInfos;45 private Dictionary<char, IBanditPolicyActionInfo[]> ntActionInfos; 46 46 private List<Tuple<char, int>> updateChain; 47 47 48 48 private void InitPolicies(IGrammar grammar) { 49 this.ntActionInfos = new Dictionary<char, I PolicyActionInfo[]>();49 this.ntActionInfos = new Dictionary<char, IBanditPolicyActionInfo[]>(); 50 50 this.updateChain = new List<Tuple<char, int>>(); 51 51 foreach (var nt in grammar.NonTerminalSymbols) { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj
r11732 r11742 46 46 <Compile Include="AlternativesContextSampler.cs" /> 47 47 <Compile Include="ExhaustiveRandomFirstSearch.cs" /> 48 <Compile Include="MctsContextualSampler.cs"> 49 <SubType>Code</SubType> 50 </Compile> 48 51 <Compile Include="MctsSampler.cs" /> 49 52 <Compile Include="ExhaustiveDepthFirstSearch.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs
r11732 r11742 12 12 public string ident; 13 13 public int randomTries; 14 public int policyTries; 15 public IPolicyActionInfo actionInfo; 14 public IBanditPolicyActionInfo actionInfo; 16 15 public TreeNode[] children; 17 16 public bool done = false; … … 22 21 23 22 public override string ToString() { 24 return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, actionInfo);23 return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, actionInfo.Tries, done, actionInfo); 25 24 } 26 25 } … … 34 33 private readonly Random random; 35 34 private readonly int randomTries; 36 private readonly I Policy policy;35 private readonly IBanditPolicy policy; 37 36 38 37 private List<TreeNode> updateChain; … … 47 46 // } 48 47 49 public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, I Policy policy) {48 public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) { 50 49 this.maxLen = maxLen; 51 50 this.problem = problem; … … 78 77 public void PrintStats() { 79 78 var n = rootNode; 80 Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries);79 Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, n.actionInfo.Tries); 81 80 while (n.children != null) { 82 81 Console.WriteLine(); 83 82 Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident)))); 84 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.randomTries + ch.policyTries)))); 83 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.actionInfo.Value * 10)))); 84 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.actionInfo.Tries.ToString())))); 85 85 //n.policy.PrintStats(); 86 n = n.children. OrderByDescending(c => c.policyTries).First();86 n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First(); 87 87 } 88 88 Console.ReadLine(); … … 108 108 if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException(); 109 109 TreeNode n = rootNode; 110 bool done = phrase.IsTerminal;111 110 var curDepth = 0; 112 while (! done) {111 while (!phrase.IsTerminal) { 113 112 updateChain.Add(n); 114 113 … … 127 126 if (n.randomTries == randomTries && n.children == null) { 128 127 n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative 129 //n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative130 128 foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo(); 131 129 treeSize += n.children.Length; 132 130 } 133 n.policyTries++;134 131 // => select using bandit policy 135 132 int selectedAltIdx = policy.SelectAction(random, n.children.Select(c => c.actionInfo)); … … 140 137 141 138 curDepth++; 142 143 done = phrase.IsTerminal;144 139 145 140 // prepare for next iteration … … 153 148 // the last node is a leaf node (sentence is done), so we never need to visit this node again 154 149 n.done = true; 155 n.actionInfo.Disable();156 150 157 151 treeDepth = Math.Max(treeDepth, curDepth); … … 165 159 foreach (var e in updateChain) { 166 160 var node = e; 161 if (node.done) node.actionInfo.Disable(); 167 162 if (node.children != null && node.children.All(c => c.done)) { 168 163 node.done = true; … … 171 166 if (!node.done) { 172 167 node.actionInfo.UpdateReward(reward); 173 //policy.UpdateReward(action, reward / updateChain.Count);174 168 } 175 169 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs
r11732 r11742 8 8 public static class Extensions { 9 9 public static bool IsAlmost(this double x, double y) { 10 return Math.Abs(x - y) < 1.0e- 6;10 return Math.Abs(x - y) < 1.0e-12; 11 11 } 12 12 … … 38 38 var ssX = 0.0; 39 39 var ssY = 0.0; 40 foreach (var p in xs.Zip(ys, (x, y) => new { x, y })) { 41 s += (p.x - meanX) * (p.y - meanY); 42 ssX += Math.Pow(p.x - meanX, 2); 43 ssY += Math.Pow(p.y - meanY, 2); 40 var xEnum = xs.GetEnumerator(); 41 var yEnum = ys.GetEnumerator(); 42 while (xEnum.MoveNext() & yEnum.MoveNext()) { 43 var x = xEnum.Current; 44 var y = yEnum.Current; 45 s += (x - meanX) * (y - meanY); 46 ssX += Math.Pow(x - meanX, 2); 47 ssY += Math.Pow(y - meanY, 2); 44 48 } 49 if (xEnum.MoveNext() | yEnum.MoveNext()) throw new ArgumentException("lengths are not equal"); 45 50 46 51 if (s.IsAlmost(0)) return 0; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg/SymbolicRegressionProblem.cs
r11732 r11742 74 74 75 75 // right now only + and * is supported 76 public string Hash(string terminalPhrase) {76 public string CanonicalRepresentation(string terminalPhrase) { 77 77 return terminalPhrase; 78 78 //var terms = terminalPhrase.Split('+'); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs
r11732 r11742 4 4 using System.Globalization; 5 5 using HeuristicLab.Algorithms.Bandits; 6 using HeuristicLab.Algorithms.Bandits.BanditPolicies; 6 7 using HeuristicLab.Algorithms.Bandits.Models; 7 8 using Microsoft.VisualStudio.TestTools.UnitTesting; … … 16 17 var nArms = 20; 17 18 18 // Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(20, 0.01)); 19 // Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(100, 0.01)); 20 // Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(500, 0.01)); 21 // Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(1000, 0.01)); 22 Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1))); 23 Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 0.1))); 19 // ThresholdAscent only works for rewards in [0..1] so far 20 21 Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 1))); 22 Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 0.1))); 24 23 Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 25 24 Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy()); … … 91 90 var randSeed = 31415; 92 91 var nArms = 20; 92 Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01)); 93 Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01)); 94 Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01)); 95 Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01)); 96 Console.WriteLine("Generic Thompson (Gaussian fixed var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1))); 97 Console.WriteLine("Generic Thompson (Gaussian unknown var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1))); 93 98 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 94 99 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy()); 95 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1))); 100 96 101 /* 97 102 Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); … … 123 128 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01)); 124 129 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05)); 125 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1)); 130 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1)); 126 131 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); 127 132 Console.WriteLine("ThresholdAscent(10,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05)); … … 141 146 var randSeed = 31415; 142 147 var nArms = 20; 148 Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01)); 149 Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01)); 150 Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01)); 151 Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01)); 143 152 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 144 153 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy()); 145 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1))); 154 Console.WriteLine("Generic Thompson (Gaussian fixed variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 0.1))); 155 Console.WriteLine("Generic Thompson (Gaussian unknown variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1))); 146 156 147 157 /* … … 167 177 168 178 169 private void TestPolicyBernoulli(int randSeed, int nArms, I Policy policy) {179 private void TestPolicyBernoulli(int randSeed, int nArms, IBanditPolicy policy) { 170 180 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions)); 171 181 } 172 private void TestPolicyGaussian(int randSeed, int nArms, I Policy policy) {182 private void TestPolicyGaussian(int randSeed, int nArms, IBanditPolicy policy) { 173 183 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions)); 174 184 } 175 private void TestPolicyGaussianMixture(int randSeed, int nArms, I Policy policy) {185 private void TestPolicyGaussianMixture(int randSeed, int nArms, IBanditPolicy policy) { 176 186 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions)); 177 187 } 178 private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, I Policy policy) {188 private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) { 179 189 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions)); 180 190 } 181 191 182 192 183 private void TestPolicy(int randSeed, int nArms, I Policy policy, Func<Random, int, IBandit> banditFactory) {193 private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) { 184 194 var maxIt = 1E5; 185 195 var reps = 10; // independent runs -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/EvenParityProblem.cs
r11732 r11742 51 51 } 52 52 53 public string Hash(string terminalPhrase) {53 public string CanonicalRepresentation(string terminalPhrase) { 54 54 return terminalPhrase; 55 55 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs
r11732 r11742 39 39 } 40 40 41 public string Hash(string terminalPhrase) {41 public string CanonicalRepresentation(string terminalPhrase) { 42 42 return terminalPhrase; 43 43 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs
r11732 r11742 9 9 IGrammar Grammar { get; } 10 10 double Evaluate(string sentence); 11 string Hash(string terminalPhrase);11 string CanonicalRepresentation(string terminalPhrase); 12 12 } 13 13 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs
r11732 r11742 80 80 } 81 81 82 public string Hash(string terminalPhrase) {82 public string CanonicalRepresentation(string terminalPhrase) { 83 83 return terminalPhrase; 84 84 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/ReadonlySequence.cs
r11732 r11742 4 4 public class ReadonlySequence : Sequence { 5 5 public ReadonlySequence(string s) 6 : base(s ) {6 : base(s, s.Length) { 7 7 } 8 8 9 9 public ReadonlySequence(char ch) 10 : base(ch ) {10 : base(ch, 1) { 11 11 } 12 12 13 13 public ReadonlySequence(Sequence s) 14 : base(s ) {14 : base(s, s.Length) { 15 15 } 16 16 … … 32 32 public override int GetHashCode() { 33 33 int h = 31 * Length; 34 foreach (var ch in this) { h += 31 * (byte)h; } 34 for (int i = 0; i < Length; i++) { 35 h += 31 * (byte)this[i]; 36 } 35 37 return h; 36 38 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPairProblem.cs
r11732 r11742 34 34 } 35 35 36 public string Hash(string terminalPhrase) {36 public string CanonicalRepresentation(string terminalPhrase) { 37 37 return terminalPhrase; 38 38 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalRoadProblem.cs
r11732 r11742 29 29 throw new NotImplementedException(); 30 30 } 31 public string Hash(string terminalPhrase) {31 public string CanonicalRepresentation(string terminalPhrase) { 32 32 return terminalPhrase; 33 33 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSymbolProblem.cs
r11732 r11742 33 33 return regex.Matches(sentence).Count; 34 34 } 35 public string Hash(string terminalPhrase) {35 public string CanonicalRepresentation(string terminalPhrase) { 36 36 return terminalPhrase; 37 37 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalTreeProblem.cs
r11732 r11742 29 29 throw new NotImplementedException(); 30 30 } 31 public string Hash(string terminalPhrase) {31 public string CanonicalRepresentation(string terminalPhrase) { 32 32 return terminalPhrase; 33 33 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs
r11732 r11742 98 98 } 99 99 100 public string Hash(string terminalPhrase) {100 public string CanonicalRepresentation(string terminalPhrase) { 101 101 return terminalPhrase.Replace("rl", "").Replace("lr", ""); 102 102 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Sequence.cs
r11732 r11742 46 46 } 47 47 48 private Sequence( ) {49 this.symbols = new char[max Idx + 1];48 private Sequence(int maxLength) { 49 this.symbols = new char[maxLength]; 50 50 } 51 51 52 52 // create a sequence from a character 53 53 public Sequence(char ch) 54 : this() { 54 : this(ch, maxIdx + 1) { 55 } 56 57 protected Sequence(char ch, int maxLength) 58 : this(maxLength) { 55 59 this.len = 1; 56 60 symbols[0] = ch; … … 61 65 62 66 // create a sequence from a string 63 public Sequence(string s) 64 : this() { 67 public Sequence(string s) : this(s, maxIdx + 1) { } 68 protected Sequence(string s, int maxLength) 69 : this(maxLength) { 65 70 if (string.IsNullOrEmpty(s)) throw new ArgumentException(); 66 71 if (s.Length > (maxIdx + 1)) throw new ArgumentException(); … … 77 82 78 83 // cloning ctor 79 public Sequence(Sequence original) 80 : this() { 84 public Sequence(Sequence original) : this(original, maxIdx + 1) { } 85 protected Sequence(Sequence original, int maxLength) 86 : this(maxLength) { 81 87 this.len = original.len; 82 88 Array.Copy(original.symbols, this.symbols, len); … … 128 134 if (startIdx >= this.len) throw new ArgumentException(); 129 135 if (startIdx + len > this.len) throw new ArgumentException(); 130 var subsequence = new Sequence { len = len };136 var subsequence = new Sequence(maxIdx + 1) { len = len }; 131 137 132 138 Array.Copy(this.symbols, startIdx, subsequence.symbols, 0, len); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs
r11732 r11742 21 21 22 22 private readonly IGrammar grammar; 23 private readonly ExpressionInterpreter interpreter;24 23 25 24 private readonly int N; … … 29 28 public SymbolicRegressionPoly10Problem() { 30 29 this.grammar = new Grammar(grammarString); 31 this.interpreter = new ExpressionInterpreter();32 30 33 31 this.N = 500; … … 67 65 68 66 public double Evaluate(string sentence) { 67 var interpreter = new ExpressionInterpreter(); 69 68 return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])).ToArray()); 70 69 } … … 73 72 74 73 // right now only + and * is supported 75 public string Hash(string terminalPhrase) {74 public string CanonicalRepresentation(string terminalPhrase) { 76 75 var terms = terminalPhrase.Split('+'); 77 76 return string.Join("+", terms.Select(term => string.Join("", term.Replace("*", "").OrderBy(ch => ch))) -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11732 r11742 8 8 using System.Threading.Tasks; 9 9 using HeuristicLab.Algorithms.Bandits; 10 using HeuristicLab.Algorithms.Bandits.BanditPolicies; 10 11 using HeuristicLab.Algorithms.Bandits.Models; 11 12 using HeuristicLab.Algorithms.GrammaticalOptimization; … … 26 27 //var globalRandom = new Random(31415); 27 28 var localRandSeed = 31415; 28 var reps = 20;29 30 var policies = new Func<I Policy>[]29 var reps = 8; 30 31 var policies = new Func<IBanditPolicy>[] 31 32 { 32 () => new GaussianThompsonSamplingPolicy(), 33 () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"), 34 () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"), 35 () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"), 36 () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"), 37 //() => new GaussianThompsonSamplingPolicy(), 33 38 () => new GaussianThompsonSamplingPolicy(true), 34 () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)), 35 () => new BernoulliThompsonSamplingPolicy(), 39 () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)), 40 () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), 41 //() => new BernoulliThompsonSamplingPolicy(), 36 42 () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), 37 43 () => new RandomPolicy(), … … 61 67 () => new ChernoffIntervalEstimationPolicy( 0.1), 62 68 () => new ChernoffIntervalEstimationPolicy( 0.2), 63 // (rand) => new ThresholdAscentPolicy(10, 0.01),64 // (rand) => new ThresholdAscentPolicy(10, 0.05),65 // (rand) => new ThresholdAscentPolicy(10, 0.1),66 // (rand) => new ThresholdAscentPolicy(10, 0.2),67 // (rand) => new ThresholdAscentPolicy(100, 0.01),68 // (rand) => new ThresholdAscentPolicy(100, 0.05),69 // (rand) => new ThresholdAscentPolicy(100, 0.1),70 // (rand) => new ThresholdAscentPolicy(100, 0.2),71 // (rand) => new ThresholdAscentPolicy(1000, 0.01),72 // (rand) => new ThresholdAscentPolicy(1000, 0.05),73 // (rand) => new ThresholdAscentPolicy(1000, 0.1),74 // (rand) => new ThresholdAscentPolicy(1000, 0.2),75 // (rand) => new ThresholdAscentPolicy(5000, 0.01),76 // (rand) => new ThresholdAscentPolicy(10000, 0.01),69 () => new ThresholdAscentPolicy(10, 0.01), 70 () => new ThresholdAscentPolicy(10, 0.05), 71 () => new ThresholdAscentPolicy(10, 0.1), 72 () => new ThresholdAscentPolicy(10, 0.2), 73 () => new ThresholdAscentPolicy(100, 0.01), 74 () => new ThresholdAscentPolicy(100, 0.05), 75 () => new ThresholdAscentPolicy(100, 0.1), 76 () => new ThresholdAscentPolicy(100, 0.2), 77 () => new ThresholdAscentPolicy(1000, 0.01), 78 () => new ThresholdAscentPolicy(1000, 0.05), 79 () => new ThresholdAscentPolicy(1000, 0.1), 80 () => new ThresholdAscentPolicy(1000, 0.2), 81 () => new ThresholdAscentPolicy(5000, 0.01), 82 () => new ThresholdAscentPolicy(10000, 0.01), 77 83 }; 78 84 79 85 foreach (var problem in new Tuple<IProblem, int>[] 80 86 { 81 Tuple.Create((IProblem)new SantaFeAntProblem(), 17),87 //Tuple.Create((IProblem)new SantaFeAntProblem(), 17), 82 88 Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23), 83 89 }) … … 87 93 var localRand = new Random(localRandSeed); 88 94 var options = new ParallelOptions(); 89 options.MaxDegreeOfParallelism = 1;95 options.MaxDegreeOfParallelism = 4; 90 96 Parallel.For(0, reps, options, (i) => { 91 97 //var t = Task.Run(() => { … … 132 138 private static void RunDemo() { 133 139 // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!) 134 // TODO: implement GaussianWithUnknownMeanAndVariance Model for Thompson Sampling (verify with unit test if correct mean and variance is identified)135 140 // TODO: separate value function from policy 136 // TODO: debug and verify implementation variants of Gaussian Thompson Sampling with unit test137 // TODO: refactor Policies to use banditInfos (policies are factories for bandit infos and bandit info only has an update routine, each policy works only with it's type of banditinfo)138 141 // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents 139 142 // TODO: exhaustive search with priority list 140 // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling143 // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling 141 144 // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem? 142 145 // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis 143 // TODO: likelihood für R=1 bei Gaussian oder GaussianMixture einfach berechenbar?144 146 // TODO: research thompson sampling for max bandit? 145 147 // TODO: ausführlicher test von strategien für k-armed max bandit 146 148 // TODO: verify TA implementation using example from the original paper 147 // TODO: compare results for different policies also for the symb-reg problem148 149 // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence) 149 150 // TODO: implement thompson sampling for gaussian mixture models 150 151 // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen) 151 152 // TODO: implement ACO-style bandit policy 152 // TODO: implement sequences that can be manipulated in-place (instead of strings), alternatives are also stored as sequences, for a sequence the index of the first NT-symb can be stored153 153 // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...) 154 154 // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen … … 165 165 var random = new Random(); 166 166 167 var problem = new SymbolicRegressionPoly10Problem(); 168 //var problem = new SantaFeAntProblem(); // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); 169 //var problem = new SymbolicRegressionProblem("Tower"); // very good results e.g. new EpsGreedyPolicy(0.2) using max reward as quality !!! 167 var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward) 168 // Ant 169 // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); 170 // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant 171 //var problem = new SantaFeAntProblem(); 172 //var problem = new SymbolicRegressionProblem("Tower"); 170 173 //var problem = new PalindromeProblem(); 171 174 //var problem = new HardPalindromeProblem(); 172 175 //var problem = new RoyalPairProblem(); 173 176 //var problem = new EvenParityProblem(); 174 var alg = new MctsSampler(problem, 2 3, random, 10, new EpsGreedyPolicy(0.2)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) works well for Ant177 var alg = new MctsSampler(problem, 25, random, 0, new GaussianThompsonSamplingPolicy(true)); 175 178 //var alg = new ExhaustiveBreadthFirstSearch(problem, 17); 176 179 //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
Note: See TracChangeset
for help on using the changeset viewer.