Changeset 11732
- Timestamp:
- 01/07/15 09:21:46 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 16 added
- 45 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/GrammaticalOptimization.sln
r11727 r11732 13 13 EndProject 14 14 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Common", "HeuristicLab.Common\HeuristicLab.Common.csproj", "{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}" 15 EndProject 16 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Problems.GrammaticalOptimization.SymbReg", "HeuristicLab.Problems.GrammaticalOptimization.SymbReg\HeuristicLab.Problems.GrammaticalOptimization.SymbReg.csproj", "{17A7A380-86CE-482D-8D22-CBD70CC97F0D}" 15 17 EndProject 16 18 Global … … 44 46 {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Release|Any CPU.ActiveCfg = Release|Any CPU 45 47 {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Release|Any CPU.Build.0 = Release|Any CPU 48 {17A7A380-86CE-482D-8D22-CBD70CC97F0D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 49 {17A7A380-86CE-482D-8D22-CBD70CC97F0D}.Debug|Any CPU.Build.0 = Debug|Any CPU 50 {17A7A380-86CE-482D-8D22-CBD70CC97F0D}.Release|Any CPU.ActiveCfg = Release|Any CPU 51 {17A7A380-86CE-482D-8D22-CBD70CC97F0D}.Release|Any CPU.Build.0 = Release|Any CPU 46 52 EndGlobalSection 47 53 GlobalSection(SolutionProperties) = preSolution -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r11730 r11732 31 31 </PropertyGroup> 32 32 <ItemGroup> 33 <Reference Include="ALGLIB-3.7.0"> 34 <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath> 35 </Reference> 33 36 <Reference Include="System" /> 34 37 <Reference Include="System.Core" /> … … 42 45 <Compile Include="BanditHelper.cs" /> 43 46 <Compile Include="Bandits\BernoulliBandit.cs" /> 47 <Compile Include="Bandits\GaussianBandit.cs" /> 44 48 <Compile Include="Bandits\GaussianMixtureBandit.cs" /> 45 49 <Compile Include="Bandits\IBandit.cs" /> 46 50 <Compile Include="Bandits\TruncatedNormalBandit.cs" /> 51 <Compile Include="OnlineMeanAndVarianceEstimator.cs" /> 52 <Compile Include="IPolicyActionInfo.cs" /> 47 53 <Compile Include="Models\BernoulliModel.cs" /> 48 54 <Compile Include="Models\GaussianModel.cs" /> 49 <Compile Include="Models\GaussianMixtureModel.cs" />50 55 <Compile Include="Models\IModel.cs" /> 51 <Compile Include="Policies\BanditPolicy.cs" /> 52 <Compile Include="Policies\BernoulliThompsonSamplingPolicy.cs" /> 53 <Compile Include="Policies\BoltzmannExplorationPolicy.cs" /> 54 <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs" /> 55 <Compile Include="Policies\GenericThompsonSamplingPolicy.cs" /> 56 <Compile Include="Policies\ThresholdAscentPolicy.cs" /> 57 <Compile Include="Policies\UCTPolicy.cs" /> 58 <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs" /> 59 <Compile Include="Policies\Exp3Policy.cs" /> 60 <Compile Include="Policies\EpsGreedyPolicy.cs" /> 56 <Compile Include="Policies\BernoulliThompsonSamplingPolicy.cs"> 57 <SubType>Code</SubType> 58 </Compile> 59 <Compile Include="Policies\BoltzmannExplorationPolicy.cs"> 60 <SubType>Code</SubType> 61 </Compile> 62 <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs"> 63 <SubType>Code</SubType> 64 </Compile> 65 <Compile Include="Policies\BernoulliPolicyActionInfo.cs" /> 66 <Compile Include="Policies\ModelPolicyActionInfo.cs" /> 67 <Compile Include="Policies\EpsGreedyPolicy.cs"> 68 <SubType>Code</SubType> 69 </Compile> 70 <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs"> 71 <SubType>Code</SubType> 72 </Compile> 73 <Compile Include="Policies\GenericThompsonSamplingPolicy.cs"> 74 <SubType>Code</SubType> 75 </Compile> 76 <Compile Include="Policies\MeanAndVariancePolicyActionInfo.cs" /> 77 <Compile Include="Policies\DefaultPolicyActionInfo.cs" /> 78 <Compile Include="Policies\EmptyPolicyActionInfo.cs" /> 61 79 <Compile Include="Policies\RandomPolicy.cs" /> 62 80 <Compile Include="Policies\UCB1Policy.cs" /> 63 <Compile Include="Policies\UCB1TunedPolicy.cs" />64 <Compile Include="Policies\UCBNormalPolicy.cs" />65 81 <Compile Include="IPolicy.cs" /> 82 <Compile Include="Policies\UCB1TunedPolicy.cs"> 83 <SubType>Code</SubType> 84 </Compile> 85 <Compile Include="Policies\UCBNormalPolicy.cs"> 86 <SubType>Code</SubType> 87 </Compile> 88 <Compile Include="Policies\UCTPolicy.cs"> 89 <SubType>Code</SubType> 90 </Compile> 66 91 <Compile Include="Properties\AssemblyInfo.cs" /> 67 92 </ItemGroup> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs
r11730 r11732 8 8 // this interface represents a policy for reinforcement learning 9 9 public interface IPolicy { 10 IEnumerable<int> Actions { get; } 11 int SelectAction(); // action selection ... 12 void UpdateReward(int action, double reward); // ... and reward update are defined as usual 13 14 // policies must also support disabling of potential actions 15 // for instance if we know that an action in a state has a deterministic 16 // reward we need to sample it only once 17 // it is necessary to sample an action only once 18 void DisableAction(int action); 19 20 // reset causes the policy to be reinitialized to it's initial state (as after constructor-call) 21 void Reset(); 22 23 void PrintStats(); 10 int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos); 11 IPolicyActionInfo CreateActionInfo(); 24 12 } 25 13 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs
r11730 r11732 9 9 namespace HeuristicLab.Algorithms.Bandits.Models { 10 10 public class BernoulliModel : IModel { 11 private readonly int numActions; 12 private readonly int[] success; 13 private readonly int[] failure; 11 private int success; 12 private int failure; 14 13 15 14 // parameters of beta prior distribution … … 17 16 private readonly double beta; 18 17 19 public BernoulliModel(int numActions, double alpha = 1.0, double beta = 1.0) { 20 this.numActions = numActions; 21 this.success = new int[numActions]; 22 this.failure = new int[numActions]; 18 public BernoulliModel(double alpha = 1.0, double beta = 1.0) { 23 19 this.alpha = alpha; 24 20 this.beta = beta; 25 21 } 26 22 27 28 public double[] SampleExpectedRewards(Random random) { 23 public double SampleExpectedReward(Random random) { 29 24 // sample bernoulli mean from beta prior 30 var theta = new double[numActions]; 31 for (int a = 0; a < numActions; a++) { 32 if (success[a] == -1) 33 theta[a] = 0.0; 34 else { 35 theta[a] = Rand.BetaRand(random, success[a] + alpha, failure[a] + beta); 36 } 37 } 38 39 // no need to sample we know the exact expected value 40 // the expected value of a bernoulli variable is just theta 41 return theta.Select(t => t).ToArray(); 25 return Rand.BetaRand(random, success + alpha, failure + beta); 42 26 } 43 27 44 public void Update(int action, double reward) { 45 const double EPSILON = 1E-6; 46 Debug.Assert(Math.Abs(reward - 0.0) < EPSILON || Math.Abs(reward - 1.0) < EPSILON); 47 if (Math.Abs(reward - 1.0) < EPSILON) { 48 success[action]++; 28 public void Update(double reward) { 29 Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0)); 30 if (reward.IsAlmost(1.0)) { 31 success++; 49 32 } else { 50 failure [action]++;33 failure++; 51 34 } 52 35 } 53 36 54 public void Disable(int action) {55 success[action] = -1;56 }57 58 37 public void Reset() { 59 Array.Clear(success, 0, numActions);60 Array.Clear(failure, 0, numActions);38 success = 0; 39 failure = 0; 61 40 } 62 41 63 42 public void PrintStats() { 64 for (int i = 0; i < numActions; i++) { 65 Console.Write("{0:F2} ", success[i] / (double)failure[i]); 66 } 43 Console.Write("{0:F2} ", success / (double)failure); 44 } 45 46 public object Clone() { 47 return new BernoulliModel() { failure = this.failure, success = this.success }; 67 48 } 68 49 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs
r11730 r11732 1 1 using System; 2 using System.Collections.Generic;3 using System.Diagnostics;4 using System.Linq;5 using System.Text;6 using System.Threading.Tasks;7 2 using HeuristicLab.Common; 8 3 9 4 namespace HeuristicLab.Algorithms.Bandits.Models { 10 // bayesian estimation of a Gaussian with unknown mean and known variance 5 // bayesian estimation of a Gaussian with 6 // 1) unknown mean and known variance 7 // 2) unknown mean and unknown variance 11 8 public class GaussianModel : IModel { 12 private readonly int numActions; 13 private readonly int[] tries; 14 private readonly double[] sumRewards; 15 9 private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator(); 16 10 17 11 // parameters of Gaussian prior for mean … … 19 13 private readonly double meanPriorVariance; 20 14 15 private readonly bool knownVariance; 21 16 private readonly double rewardVariance = 0.1; // assumed know reward variance 22 17 23 public GaussianModel(int numActions, double meanPriorMu, double meanPriorVariance) { 24 this.numActions = numActions; 25 this.tries = new int[numActions]; 26 this.sumRewards = new double[numActions]; 18 // parameters of Gamma prior for precision (= inverse variance) 19 private readonly int precisionPriorAlpha; 20 private readonly double precisionPriorBeta; 21 22 // non-informative prior 23 private const double priorK = 1.0; 24 25 // this constructor assumes the variance is known 26 public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) { 27 27 this.meanPriorMu = meanPriorMu; 28 28 this.meanPriorVariance = meanPriorVariance; 29 30 this.knownVariance = true; 31 this.rewardVariance = rewardVariance; 32 } 33 34 // this constructor assumes the variance is also unknown 35 // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89 36 public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) { 37 this.meanPriorMu = meanPriorMu; 38 this.meanPriorVariance = meanPriorVariance; 39 40 this.knownVariance = false; 41 this.precisionPriorAlpha = precisionPriorAlpha; 42 this.precisionPriorBeta = precisionPriorBeta; 29 43 } 30 44 31 45 32 public double[] SampleExpectedRewards(Random random) { 46 public double SampleExpectedReward(Random random) { 47 if (knownVariance) { 48 return SampleExpectedRewardKnownVariance(random); 49 } else { 50 return SampleExpectedRewardUnknownVariance(random); 51 } 52 } 53 54 private double SampleExpectedRewardKnownVariance(Random random) { 33 55 // expected values for reward 34 var theta = new double[numActions];56 // calculate posterior mean and variance (for mean reward) 35 57 36 for (int a = 0; a < numActions; a++) { 37 if (tries[a] == -1) { 38 theta[a] = double.NegativeInfinity; // disabled action 39 } else { 40 // calculate posterior mean and variance (for mean reward) 58 // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) 59 var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance); 60 var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance); 41 61 42 // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) 43 var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / meanPriorVariance); 44 var posteriorMean = posteriorVariance * (meanPriorMu / meanPriorVariance + sumRewards[a] / rewardVariance); 62 // sample a mean from the posterior 63 var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean; 64 // theta already represents the expected reward value => nothing else to do 65 return posteriorMeanSample; 45 66 46 // sample a mean from the posterior 47 theta[a] = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean; 48 // theta already represents the expected reward value => nothing else to do 49 } 67 // return 0.99-quantile value 68 //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean; 69 } 70 71 // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) 72 private double SampleExpectedRewardUnknownVariance(Random random) { 73 74 var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N); 75 var posteriorK = priorK + estimator.N; 76 var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0; 77 double posteriorBeta; 78 if (estimator.N > 0) { 79 posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N)); 80 } else { 81 posteriorBeta = precisionPriorBeta; 50 82 } 51 83 84 // sample from the posterior marginal for mu (expected value) equ. 91 85 // p(µ|D) = T2αn (µ| µn, βn/(αnκn)) 86 87 // sample from Tk distribution : http://stats.stackexchange.com/a/70270 88 var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble()); 89 90 var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean; 52 91 return theta; 92 93 //return alglib.invnormaldistribution(random.NextDouble()) * + theta; 94 //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean; 53 95 } 54 96 55 public void Update(int action, double reward) {56 sumRewards[action] += reward;57 tries[action]++;58 }59 97 60 public void Disable(int action) { 61 tries[action] = -1; 62 sumRewards[action] = 0.0; 98 public void Update(double reward) { 99 estimator.UpdateReward(reward); 63 100 } 64 101 65 102 public void Reset() { 66 Array.Clear(tries, 0, numActions); 67 Array.Clear(sumRewards, 0, numActions); 103 estimator.Reset(); 68 104 } 69 105 70 106 public void PrintStats() { 71 for (int i = 0; i < numActions; i++) { 72 Console.Write("{0:F2} ", sumRewards[i] / (double)tries[i]); 73 } 107 Console.Write("{0:F2} ", estimator.Avg); 108 } 109 110 public object Clone() { 111 if (knownVariance) 112 return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance); 113 else 114 return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta); 74 115 } 75 116 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/IModel.cs
r11730 r11732 6 6 7 7 namespace HeuristicLab.Algorithms.Bandits { 8 public interface IModel {9 double[] SampleExpectedRewards(Random random);10 void Update(int action, double reward);11 void Disable(int action);8 // represents a model for the reward distribution (of an action given a state) 9 public interface IModel : ICloneable { 10 double SampleExpectedReward(Random random); 11 void Update(double reward); 12 12 void Reset(); 13 13 void PrintStats(); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BanditPolicy.cs
r11730 r11732 7 7 8 8 namespace HeuristicLab.Algorithms.Bandits { 9 public abstract class BanditPolicy : IPolicy{9 public abstract class BanditPolicy<TPolicyActionInfo> : IPolicy<TPolicyActionInfo> where TPolicyActionInfo : IPolicyActionInfo { 10 10 public IEnumerable<int> Actions { get; private set; } 11 11 private readonly int numInitialActions; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BernoulliThompsonSamplingPolicy.cs
r11730 r11732 8 8 9 9 namespace HeuristicLab.Algorithms.Bandits { 10 public class BernoulliThompsonSamplingPolicy : BanditPolicy { 11 private readonly Random random; 12 private readonly int[] success; 13 private readonly int[] failure; 14 10 public class BernoulliThompsonSamplingPolicy : IPolicy { 15 11 // parameters of beta prior distribution 16 12 private readonly double alpha = 1.0; 17 13 private readonly double beta = 1.0; 18 14 19 public BernoulliThompsonSamplingPolicy(Random random, int numActions) 20 : base(numActions) { 21 this.random = random; 22 this.success = new int[numActions]; 23 this.failure = new int[numActions]; 24 } 15 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 16 var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>(); // TODO: performance 17 int bestAction = -1; 18 double maxTheta = double.NegativeInfinity; 19 var aIdx = -1; 25 20 26 public override int SelectAction() { 27 Debug.Assert(Actions.Any()); 28 var maxTheta = double.NegativeInfinity; 29 int bestAction = -1; 30 foreach (var a in Actions) { 31 var theta = Rand.BetaRand(random, success[a] + alpha, failure[a] + beta); 21 foreach (var aInfo in myActionInfos) { 22 aIdx++; 23 if (aInfo.Disabled) continue; 24 var theta = Rand.BetaRand(random, aInfo.NumSuccess + alpha, aInfo.NumFailure + beta); 32 25 if (theta > maxTheta) { 33 26 maxTheta = theta; 34 bestAction = a ;27 bestAction = aIdx; 35 28 } 36 29 } 30 Debug.Assert(bestAction > -1); 37 31 return bestAction; 38 32 } 39 33 40 public override void UpdateReward(int action, double reward) { 41 Debug.Assert(Actions.Contains(action)); 42 43 if (reward > 0) success[action]++; 44 else failure[action]++; 34 public IPolicyActionInfo CreateActionInfo() { 35 return new BernoulliPolicyActionInfo(); 45 36 } 46 37 47 public override void DisableAction(int action) {48 base.DisableAction(action);49 success[action] = -1;50 }51 52 public override void Reset() {53 base.Reset();54 Array.Clear(success, 0, success.Length);55 Array.Clear(failure, 0, failure.Length);56 }57 58 public override void PrintStats() {59 for (int i = 0; i < success.Length; i++) {60 if (success[i] >= 0) {61 Console.Write("{0,5:F2}", success[i] / failure[i]);62 } else {63 Console.Write("{0,5}", "");64 }65 }66 Console.WriteLine();67 }68 38 69 39 public override string ToString() { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BoltzmannExplorationPolicy.cs
r11730 r11732 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 7 8 8 9 namespace HeuristicLab.Algorithms.Bandits { 9 10 // also called softmax policy 10 public class BoltzmannExplorationPolicy : BanditPolicy { 11 private readonly Random random; 12 private readonly double eps; 13 private readonly int[] tries; 14 private readonly double[] sumReward; 11 public class BoltzmannExplorationPolicy : IPolicy { 15 12 private readonly double beta; 16 13 17 public BoltzmannExplorationPolicy(Random random, int numActions, double beta) 18 : base(numActions) { 14 public BoltzmannExplorationPolicy(double beta) { 19 15 if (beta < 0) throw new ArgumentException(); 20 this.random = random;21 16 this.beta = beta; 22 this.tries = new int[numActions]; 23 this.sumReward = new double[numActions]; 17 } 18 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 19 Debug.Assert(actionInfos.Any()); 20 21 // select best 22 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance 23 Debug.Assert(myActionInfos.Any(a => !a.Disabled)); 24 double[] w = new double[myActionInfos.Length]; 25 26 for (int a = 0; a < myActionInfos.Length; a++) { 27 if (myActionInfos[a].Disabled) { 28 w[a] = 0; continue; 29 } 30 if (myActionInfos[a].Tries == 0) return a; 31 var sumReward = myActionInfos[a].SumReward; 32 var tries = myActionInfos[a].Tries; 33 var avgReward = sumReward / tries; 34 w[a] = Math.Exp(beta * avgReward); 35 } 36 37 38 var bestAction = Enumerable.Range(0, w.Length).SampleProportional(random, w).First(); 39 Debug.Assert(bestAction >= 0); 40 Debug.Assert(bestAction < w.Length); 41 Debug.Assert(!myActionInfos[bestAction].Disabled); 42 return bestAction; 24 43 } 25 44 26 public override int SelectAction() { 27 Debug.Assert(Actions.Any()); 28 // select best 29 var maxReward = double.NegativeInfinity; 30 int bestAction = -1; 31 if (Actions.Any(a => tries[a] == 0)) 32 return Actions.First(a => tries[a] == 0); 33 34 var ts = Actions.Select(a => Math.Exp(beta * sumReward[a] / tries[a])); 35 var r = random.NextDouble() * ts.Sum(); 36 37 var agg = 0.0; 38 foreach (var p in Actions.Zip(ts, Tuple.Create)) { 39 agg += p.Item2; 40 if (agg >= r) return p.Item1; 41 } 42 throw new InvalidProgramException(); 43 } 44 public override void UpdateReward(int action, double reward) { 45 Debug.Assert(Actions.Contains(action)); 46 47 tries[action]++; 48 sumReward[action] += reward; 49 } 50 51 public override void DisableAction(int action) { 52 base.DisableAction(action); 53 sumReward[action] = 0; 54 tries[action] = -1; 55 } 56 57 public override void Reset() { 58 base.Reset(); 59 Array.Clear(tries, 0, tries.Length); 60 Array.Clear(sumReward, 0, sumReward.Length); 61 } 62 63 public override void PrintStats() { 64 for (int i = 0; i < sumReward.Length; i++) { 65 if (tries[i] >= 0) { 66 Console.Write("{0,5:F2}", sumReward[i] / tries[i]); 67 } else { 68 Console.Write("{0,5}", ""); 69 } 70 } 71 Console.WriteLine(); 45 public IPolicyActionInfo CreateActionInfo() { 46 return new DefaultPolicyActionInfo(); 72 47 } 73 48 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/ChernoffIntervalEstimationPolicy.cs
r11730 r11732 10 10 International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */ 11 11 12 public class ChernoffIntervalEstimationPolicy : BanditPolicy { 13 private readonly int[] tries; 14 private readonly double[] sumReward; 15 private int totalTries = 0; 12 public class ChernoffIntervalEstimationPolicy : IPolicy { 16 13 private readonly double delta; 17 14 18 public ChernoffIntervalEstimationPolicy(int numActions, double delta = 0.01) 19 : base(numActions) { 15 public ChernoffIntervalEstimationPolicy(double delta = 0.01) { 20 16 this.delta = delta; 21 this.tries = new int[numActions];22 this.sumReward = new double[numActions];23 17 } 24 25 public override int SelectAction() { 18 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 19 Debug.Assert(actionInfos.Any()); 20 // select best 21 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance 22 int k = myActionInfos.Length; 23 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 26 24 int bestAction = -1; 27 25 double bestQ = double.NegativeInfinity; 28 double k = Actions.Count(); 29 Debug.Assert(k > 0); 30 foreach (var a in Actions) { 31 if (tries[a] == 0) return a; 26 for (int a = 0; a < myActionInfos.Length; a++) { 27 if (myActionInfos[a].Disabled) continue; 28 if (myActionInfos[a].Tries == 0) return a; 29 30 var sumReward = myActionInfos[a].SumReward; 31 var tries = myActionInfos[a].Tries; 32 33 var avgReward = sumReward / tries; 34 32 35 // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem" 33 36 // var alpha = Math.Log(2 * totalTries * k / delta); 34 37 double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper 35 double mu = sumReward[a] / tries[a]; 36 var q = mu + (alpha + Math.Sqrt(2 * tries[a] * mu * alpha + alpha * alpha)) / tries[a]; 38 var q = avgReward + (alpha + Math.Sqrt(2 * tries * avgReward * alpha + alpha * alpha)) / tries; 37 39 if (q > bestQ) { 38 40 bestQ = q; … … 40 42 } 41 43 } 44 Debug.Assert(bestAction >= 0); 42 45 return bestAction; 43 46 } 44 public override void UpdateReward(int action, double reward) { 45 Debug.Assert(Actions.Contains(action)); 46 totalTries++; 47 tries[action]++; 48 sumReward[action] += reward; 47 48 public IPolicyActionInfo CreateActionInfo() { 49 return new DefaultPolicyActionInfo(); 49 50 } 50 51 51 public override void DisableAction(int action) {52 base.DisableAction(action);53 totalTries -= tries[action];54 tries[action] = -1;55 sumReward[action] = 0;56 }57 58 public override void Reset() {59 base.Reset();60 totalTries = 0;61 Array.Clear(tries, 0, tries.Length);62 Array.Clear(sumReward, 0, sumReward.Length);63 }64 65 public override void PrintStats() {66 for (int i = 0; i < sumReward.Length; i++) {67 if (tries[i] >= 0) {68 Console.Write("{0,5:F2}", sumReward[i] / tries[i]);69 } else {70 Console.Write("{0,5}", "");71 }72 }73 Console.WriteLine();74 }75 52 public override string ToString() { 76 53 return string.Format("ChernoffIntervalEstimationPolicy({0:F2})", delta); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs
r11730 r11732 7 7 8 8 namespace HeuristicLab.Algorithms.Bandits { 9 public class EpsGreedyPolicy : BanditPolicy { 10 private readonly Random random; 9 public class EpsGreedyPolicy : IPolicy { 11 10 private readonly double eps; 12 private readonly int[] tries;13 private readonly double[] sumReward;14 11 private readonly RandomPolicy randomPolicy; 15 12 16 public EpsGreedyPolicy(Random random, int numActions, double eps) 17 : base(numActions) { 18 this.random = random; 13 public EpsGreedyPolicy(double eps) { 19 14 this.eps = eps; 20 this.randomPolicy = new RandomPolicy(random, numActions); 21 this.tries = new int[numActions]; 22 this.sumReward = new double[numActions]; 15 this.randomPolicy = new RandomPolicy(); 23 16 } 24 25 public override int SelectAction() { 26 Debug.Assert(Actions.Any()); 17 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 18 Debug.Assert(actionInfos.Any()); 27 19 if (random.NextDouble() > eps) { 28 20 // select best 29 var bestQ = double.NegativeInfinity;21 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 30 22 int bestAction = -1; 31 foreach (var a in Actions) { 32 if (tries[a] == 0) return a; 33 var q = sumReward[a] / tries[a]; 34 if (bestQ < q) { 23 double bestQ = double.NegativeInfinity; 24 int aIdx = -1; 25 foreach (var aInfo in myActionInfos) { 26 27 aIdx++; 28 if (aInfo.Disabled) continue; 29 if (aInfo.Tries == 0) return aIdx; 30 31 32 var avgReward = aInfo.SumReward / aInfo.Tries; 33 //var q = avgReward; 34 var q = aInfo.MaxReward; 35 if (q > bestQ) { 35 36 bestQ = q; 36 bestAction = a ;37 bestAction = aIdx; 37 38 } 38 39 } … … 41 42 } else { 42 43 // select random 43 return randomPolicy.SelectAction( );44 return randomPolicy.SelectAction(random, actionInfos); 44 45 } 45 46 } 46 public override void UpdateReward(int action, double reward) {47 Debug.Assert(Actions.Contains(action));48 47 49 randomPolicy.UpdateReward(action, reward); // does nothing 50 tries[action]++; 51 sumReward[action] += reward; 48 public IPolicyActionInfo CreateActionInfo() { 49 return new DefaultPolicyActionInfo(); 52 50 } 53 51 54 public override void DisableAction(int action) {55 base.DisableAction(action);56 randomPolicy.DisableAction(action);57 sumReward[action] = 0;58 tries[action] = -1;59 }60 52 61 public override void Reset() {62 base.Reset();63 randomPolicy.Reset();64 Array.Clear(tries, 0, tries.Length);65 Array.Clear(sumReward, 0, sumReward.Length);66 }67 public override void PrintStats() {68 for (int i = 0; i < sumReward.Length; i++) {69 if (tries[i] >= 0) {70 Console.Write(" {0,5:F2} {1}", sumReward[i] / tries[i], tries[i]);71 } else {72 Console.Write("-", "");73 }74 }75 Console.WriteLine();76 }77 53 public override string ToString() { 78 54 return string.Format("EpsGreedyPolicy({0:F2})", eps); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GaussianThompsonSamplingPolicy.cs
r11730 r11732 1 1 using System; 2 using System.Collections.Generic; 2 3 using System.Diagnostics; 3 4 using System.Linq; … … 5 6 6 7 namespace HeuristicLab.Algorithms.Bandits { 7 8 public class GaussianThompsonSamplingPolicy : BanditPolicy { 9 private readonly Random random; 10 private readonly double[] sampleMean; 11 private readonly double[] sampleM2; 12 private readonly int[] tries; 8 9 public class GaussianThompsonSamplingPolicy : IPolicy { 13 10 private bool compatibility; 14 11 … … 21 18 22 19 23 public GaussianThompsonSamplingPolicy(Random random, int numActions, bool compatibility = false) 24 : base(numActions) { 25 this.random = random; 26 this.sampleMean = new double[numActions]; 27 this.sampleM2 = new double[numActions]; 28 this.tries = new int[numActions]; 20 public GaussianThompsonSamplingPolicy(bool compatibility = false) { 29 21 this.compatibility = compatibility; 30 22 } 31 23 24 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 25 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>(); 26 int bestAction = -1; 27 double bestQ = double.NegativeInfinity; 32 28 33 public override int SelectAction() { 34 Debug.Assert(Actions.Any()); 35 var maxTheta = double.NegativeInfinity; 36 int bestAction = -1; 37 foreach (var a in Actions) { 38 if(tries[a] == -1) continue; // skip disabled actions 29 int aIdx = -1; 30 foreach (var aInfo in myActionInfos) { 31 aIdx++; 32 if (aInfo.Disabled) continue; 33 34 var tries = aInfo.Tries; 35 var sampleMean = aInfo.AvgReward; 36 var sampleVariance = aInfo.RewardVariance; 37 39 38 double theta; 40 39 if (compatibility) { 41 if (tries [a] < 2) return a;42 var mu = sampleMean [a];43 var variance = sample M2[a] / tries[a];40 if (tries < 2) return aIdx; 41 var mu = sampleMean; 42 var variance = sampleVariance; 44 43 var stdDev = Math.Sqrt(variance); 45 44 theta = Rand.RandNormal(random) * stdDev + mu; … … 48 47 49 48 // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) 50 var posteriorVariance = 1.0 / (tries [a]/ rewardVariance + 1.0 / priorVariance);51 var posteriorMean = posteriorVariance * (priorMean / priorVariance + tries [a] * sampleMean[a]/ rewardVariance);49 var posteriorVariance = 1.0 / (tries / rewardVariance + 1.0 / priorVariance); 50 var posteriorMean = posteriorVariance * (priorMean / priorVariance + tries * sampleMean / rewardVariance); 52 51 53 52 // sample a mean from the posterior … … 56 55 // theta already represents the expected reward value => nothing else to do 57 56 } 58 if (theta > maxTheta) { 59 maxTheta = theta; 60 bestAction = a; 57 58 if (theta > bestQ) { 59 bestQ = theta; 60 bestAction = aIdx; 61 61 } 62 62 } 63 Debug.Assert( Actions.Contains(bestAction));63 Debug.Assert(bestAction > -1); 64 64 return bestAction; 65 65 } 66 66 67 public override void UpdateReward(int action, double reward) { 68 Debug.Assert(Actions.Contains(action)); 69 tries[action]++; 70 var delta = reward - sampleMean[action]; 71 sampleMean[action] += delta / tries[action]; 72 sampleM2[action] += sampleM2[action] + delta * (reward - sampleMean[action]); 67 public IPolicyActionInfo CreateActionInfo() { 68 return new MeanAndVariancePolicyActionInfo(); 73 69 } 74 70 75 public override void DisableAction(int action) {76 base.DisableAction(action);77 sampleMean[action] = 0;78 sampleM2[action] = 0;79 tries[action] = -1;80 }81 71 82 public override void Reset() { 83 base.Reset(); 84 Array.Clear(sampleMean, 0, sampleMean.Length); 85 Array.Clear(sampleM2, 0, sampleM2.Length); 86 Array.Clear(tries, 0, tries.Length); 87 } 72 //public override void UpdateReward(int action, double reward) { 73 // Debug.Assert(Actions.Contains(action)); 74 // tries[action]++; 75 // var delta = reward - sampleMean[action]; 76 // sampleMean[action] += delta / tries[action]; 77 // sampleM2[action] += sampleM2[action] + delta * (reward - sampleMean[action]); 78 //} 88 79 89 public override void PrintStats() {90 for (int i = 0; i < sampleMean.Length; i++) {91 if (tries[i] >= 0) {92 Console.Write(" {0,5:F2} {1}", sampleMean[i] / tries[i], tries[i]);93 } else {94 Console.Write("{0,5}", "");95 }96 }97 Console.WriteLine();98 }99 80 public override string ToString() { 100 81 return "GaussianThompsonSamplingPolicy"; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GenericThompsonSamplingPolicy.cs
r11730 r11732 8 8 9 9 namespace HeuristicLab.Algorithms.Bandits { 10 public class GenericThompsonSamplingPolicy : BanditPolicy { 11 private readonly Random random; 10 public class GenericThompsonSamplingPolicy : IPolicy { 12 11 private readonly IModel model; 13 12 14 public GenericThompsonSamplingPolicy(Random random, int numActions, IModel model) 15 : base(numActions) { 16 this.random = random; 13 public GenericThompsonSamplingPolicy(IModel model) { 17 14 this.model = model; 18 15 } 19 16 20 public override int SelectAction() { 21 Debug.Assert(Actions.Any()); 22 var maxR = double.NegativeInfinity; 17 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 18 var myActionInfos = actionInfos.OfType<ModelPolicyActionInfo>(); 23 19 int bestAction = -1; 24 var expRewards = model.SampleExpectedRewards(random); 25 foreach (var a in Actions) { 26 var r = expRewards[a]; 27 if (r > maxR) { 28 maxR = r; 29 bestAction = a; 20 double bestQ = double.NegativeInfinity; 21 var aIdx = -1; 22 foreach (var aInfo in myActionInfos) { 23 aIdx++; 24 if (aInfo.Disabled) continue; 25 //if (aInfo.Tries == 0) return aIdx; 26 var q = aInfo.SampleExpectedReward(random); 27 if (q > bestQ) { 28 bestQ = q; 29 bestAction = aIdx; 30 30 } 31 31 } 32 Debug.Assert(bestAction > -1); 32 33 return bestAction; 33 34 } 34 35 35 public override void UpdateReward(int action, double reward) { 36 Debug.Assert(Actions.Contains(action)); 37 38 model.Update(action, reward); 39 } 40 41 public override void DisableAction(int action) { 42 base.DisableAction(action); 43 model.Disable(action); 44 } 45 46 public override void Reset() { 47 base.Reset(); 48 model.Reset(); 49 } 50 51 public override void PrintStats() { 52 model.PrintStats(); 36 public IPolicyActionInfo CreateActionInfo() { 37 return new ModelPolicyActionInfo((IModel)model.Clone()); 53 38 } 54 39 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/RandomPolicy.cs
r11730 r11732 8 8 9 9 namespace HeuristicLab.Algorithms.Bandits { 10 public class RandomPolicy : BanditPolicy { 11 private readonly Random random; 10 public class RandomPolicy : IPolicy { 12 11 13 public RandomPolicy(Random random, int numActions)14 : base(numActions) {15 this.random = random;16 }17 18 public override int SelectAction() {19 Debug.Assert(Actions.Any());20 return Actions.SelectRandom(random);21 }22 public override void UpdateReward(int action, double reward) {23 // do nothing24 }25 public override void PrintStats() {26 Console.WriteLine("Random");27 }28 12 public override string ToString() { 29 13 return "RandomPolicy"; 30 14 } 15 16 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 17 return actionInfos 18 .Select((a, i) => Tuple.Create(a, i)) 19 .Where(p => !p.Item1.Disabled) 20 .SelectRandom(random).Item2; 21 } 22 23 public IPolicyActionInfo CreateActionInfo() { 24 return new EmptyPolicyActionInfo(); 25 } 31 26 } 32 27 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs
r11730 r11732 7 7 8 8 namespace HeuristicLab.Algorithms.Bandits { 9 public class UCB1Policy : BanditPolicy { 10 private readonly int[] tries; 11 private readonly double[] sumReward; 12 private int totalTries = 0; 13 public UCB1Policy(int numActions) 14 : base(numActions) { 15 this.tries = new int[numActions]; 16 this.sumReward = new double[numActions]; 17 } 18 19 public override int SelectAction() { 9 public class UCB1Policy : IPolicy { 10 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 11 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance 20 12 int bestAction = -1; 21 13 double bestQ = double.NegativeInfinity; 22 foreach (var a in Actions) { 23 if (tries[a] == 0) return a; 24 var q = sumReward[a] / tries[a] + Math.Sqrt((2 * Math.Log(totalTries)) / tries[a]); 14 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 15 16 for (int a = 0; a < myActionInfos.Length; a++) { 17 if (myActionInfos[a].Disabled) continue; 18 if (myActionInfos[a].Tries == 0) return a; 19 var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + Math.Sqrt((2 * Math.Log(totalTries)) / myActionInfos[a].Tries); 25 20 if (q > bestQ) { 26 21 bestQ = q; … … 28 23 } 29 24 } 25 Debug.Assert(bestAction > -1); 30 26 return bestAction; 31 27 } 32 public override void UpdateReward(int action, double reward) {33 Debug.Assert(Actions.Contains(action));34 totalTries++;35 tries[action]++;36 sumReward[action] += reward;37 }38 28 39 public override void DisableAction(int action) { 40 base.DisableAction(action); 41 totalTries -= tries[action]; 42 tries[action] = -1; 43 sumReward[action] = 0; 44 } 45 46 public override void Reset() { 47 base.Reset(); 48 totalTries = 0; 49 Array.Clear(tries, 0, tries.Length); 50 Array.Clear(sumReward, 0, sumReward.Length); 51 } 52 public override void PrintStats() { 53 for (int i = 0; i < sumReward.Length; i++) { 54 if (tries[i] >= 0) { 55 Console.Write("{0,5:F2}", sumReward[i] / tries[i]); 56 } else { 57 Console.Write("{0,5}", ""); 58 } 59 } 60 Console.WriteLine(); 29 public IPolicyActionInfo CreateActionInfo() { 30 return new DefaultPolicyActionInfo(); 61 31 } 62 32 public override string ToString() { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs
r11730 r11732 7 7 8 8 namespace HeuristicLab.Algorithms.Bandits { 9 public class UCB1TunedPolicy : BanditPolicy { 10 private readonly int[] tries; 11 private readonly double[] sumReward; 12 private readonly double[] sumSqrReward; 13 private int totalTries = 0; 14 public UCB1TunedPolicy(int numActions) 15 : base(numActions) { 16 this.tries = new int[numActions]; 17 this.sumReward = new double[numActions]; 18 this.sumSqrReward = new double[numActions]; 19 } 9 public class UCB1TunedPolicy : IPolicy { 20 10 21 private double V(int arm) { 22 var s = tries[arm]; 23 return sumSqrReward[arm] / s - Math.Pow(sumReward[arm] / s, 2) + Math.Sqrt(2 * Math.Log(totalTries) / s); 24 } 25 26 27 public override int SelectAction() { 28 Debug.Assert(Actions.Any()); 11 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 12 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>().ToArray(); // TODO: performance 29 13 int bestAction = -1; 30 14 double bestQ = double.NegativeInfinity; 31 foreach (var a in Actions) { 32 if (tries[a] == 0) return a; 33 var q = sumReward[a] / tries[a] + Math.Sqrt((Math.Log(totalTries) / tries[a]) * Math.Min(1.0 / 4, V(a))); // 1/4 is upper bound of bernoulli distributed variable 15 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 16 17 for (int a = 0; a < myActionInfos.Length; a++) { 18 if (myActionInfos[a].Disabled) continue; 19 if (myActionInfos[a].Tries == 0) return a; 20 21 var sumReward = myActionInfos[a].SumReward; 22 var tries = myActionInfos[a].Tries; 23 24 var avgReward = sumReward / tries; 25 var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(myActionInfos[a], totalTries))); // 1/4 is upper bound of bernoulli distributed variable 34 26 if (q > bestQ) { 35 27 bestQ = q; … … 37 29 } 38 30 } 31 Debug.Assert(bestAction > -1); 39 32 return bestAction; 40 33 } 41 public override void UpdateReward(int action, double reward) { 42 Debug.Assert(Actions.Contains(action)); 43 totalTries++; 44 tries[action]++; 45 sumReward[action] += reward; 46 sumSqrReward[action] += reward * reward; 34 35 public IPolicyActionInfo CreateActionInfo() { 36 return new MeanAndVariancePolicyActionInfo(); 47 37 } 48 38 49 public override void DisableAction(int action) { 50 base.DisableAction(action); 51 totalTries -= tries[action]; 52 tries[action] = -1; 53 sumReward[action] = 0; 54 sumSqrReward[action] = 0; 39 private double V(MeanAndVariancePolicyActionInfo actionInfo, int totalTries) { 40 var s = actionInfo.Tries; 41 return actionInfo.RewardVariance + Math.Sqrt(2 * Math.Log(totalTries) / s); 55 42 } 56 43 57 public override void Reset() {58 base.Reset();59 totalTries = 0;60 Array.Clear(tries, 0, tries.Length);61 Array.Clear(sumReward, 0, sumReward.Length);62 Array.Clear(sumSqrReward, 0, sumSqrReward.Length);63 }64 public override void PrintStats() {65 for (int i = 0; i < sumReward.Length; i++) {66 if (tries[i] >= 0) {67 Console.Write("{0,5:F2}", sumReward[i] / tries[i]);68 } else {69 Console.Write("{0,5}", "");70 }71 }72 Console.WriteLine();73 }74 44 public override string ToString() { 75 45 return "UCB1TunedPolicy"; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCBNormalPolicy.cs
r11730 r11732 7 7 8 8 namespace HeuristicLab.Algorithms.Bandits { 9 public class UCBNormalPolicy : BanditPolicy { 10 private readonly int[] tries; 11 private readonly double[] sumReward; 12 private readonly double[] sumSqrReward; 13 private int totalTries = 0; 14 public UCBNormalPolicy(int numActions) 15 : base(numActions) { 16 this.tries = new int[numActions]; 17 this.sumReward = new double[numActions]; 18 this.sumSqrReward = new double[numActions]; 19 } 9 public class UCBNormalPolicy : IPolicy { 20 10 21 public override int SelectAction() {22 Debug.Assert(Actions.Any());11 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 12 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>().ToArray(); // TODO: performance 23 13 int bestAction = -1; 24 14 double bestQ = double.NegativeInfinity; 25 foreach (var a in Actions) { 26 if (totalTries <= 1 || tries[a] <= 1 || tries[a] <= Math.Ceiling(8 * Math.Log(totalTries))) return a; 27 var avgReward = sumReward[a] / tries[a]; 28 var estVariance = 16 * ((sumSqrReward[a] - tries[a] * Math.Pow(avgReward, 2)) / (tries[a] - 1)) * (Math.Log(totalTries - 1) / tries[a]); 15 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 16 17 for (int a = 0; a < myActionInfos.Length; a++) { 18 if (myActionInfos[a].Disabled) continue; 19 if (totalTries <= 1 || myActionInfos[a].Tries <= 1 || myActionInfos[a].Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return a; 20 21 var tries = myActionInfos[a].Tries; 22 var avgReward = myActionInfos[a].AvgReward; 23 var rewardVariance = myActionInfos[a].RewardVariance; 24 var estVariance = 16 * rewardVariance * (Math.Log(totalTries - 1) / tries); 29 25 if (estVariance < 0) estVariance = 0; // numerical problems 30 26 var q = avgReward … … 35 31 } 36 32 } 37 Debug.Assert( Actions.Contains(bestAction));33 Debug.Assert(bestAction > -1); 38 34 return bestAction; 39 35 } 40 public override void UpdateReward(int action, double reward) { 41 Debug.Assert(Actions.Contains(action)); 42 totalTries++; 43 tries[action]++; 44 sumReward[action] += reward; 45 sumSqrReward[action] += reward * reward; 36 37 public IPolicyActionInfo CreateActionInfo() { 38 return new MeanAndVariancePolicyActionInfo(); 46 39 } 47 40 48 public override void DisableAction(int action) {49 base.DisableAction(action);50 totalTries -= tries[action];51 tries[action] = -1;52 sumReward[action] = 0;53 sumSqrReward[action] = 0;54 }55 56 public override void Reset() {57 base.Reset();58 totalTries = 0;59 Array.Clear(tries, 0, tries.Length);60 Array.Clear(sumReward, 0, sumReward.Length);61 Array.Clear(sumSqrReward, 0, sumSqrReward.Length);62 }63 public override void PrintStats() {64 for (int i = 0; i < sumReward.Length; i++) {65 if (tries[i] >= 0) {66 Console.Write("{0,5:F2}", sumReward[i] / tries[i]);67 } else {68 Console.Write("{0,5}", "");69 }70 }71 Console.WriteLine();72 }73 41 public override string ToString() { 74 42 return "UCBNormalPolicy"; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCTPolicy.cs
r11730 r11732 8 8 namespace HeuristicLab.Algorithms.Bandits { 9 9 /* Kocsis et al. Bandit based Monte-Carlo Planning */ 10 public class UCTPolicy : BanditPolicy { 11 private readonly int[] tries; 12 private readonly double[] sumReward; 13 private int totalTries = 0; 10 public class UCTPolicy : IPolicy { 14 11 private readonly double c; 15 12 16 public UCTPolicy(int numActions, double c = 1.0) 17 : base(numActions) { 18 this.tries = new int[numActions]; 19 this.sumReward = new double[numActions]; 13 public UCTPolicy(double c = 1.0) { 20 14 this.c = c; 21 15 } 22 16 23 public override int SelectAction() { 17 18 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 19 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance 24 20 int bestAction = -1; 25 21 double bestQ = double.NegativeInfinity; 26 foreach (var a in Actions) { 27 if (tries[a] == 0) return a; 28 var q = sumReward[a] / tries[a] + 2 * c * Math.Sqrt(Math.Log(totalTries) / tries[a]); 22 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 23 24 for (int a = 0; a < myActionInfos.Length; a++) { 25 if (myActionInfos[a].Disabled) continue; 26 if (myActionInfos[a].Tries == 0) return a; 27 var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + 2 * c * Math.Sqrt(Math.Log(totalTries) / myActionInfos[a].Tries); 29 28 if (q > bestQ) { 30 29 bestQ = q; … … 32 31 } 33 32 } 33 Debug.Assert(bestAction > -1); 34 34 return bestAction; 35 35 } 36 public override void UpdateReward(int action, double reward) { 37 Debug.Assert(Actions.Contains(action)); 38 totalTries++; 39 tries[action]++; 40 sumReward[action] += reward; 36 37 public IPolicyActionInfo CreateActionInfo() { 38 return new DefaultPolicyActionInfo(); 41 39 } 42 40 43 public override void DisableAction(int action) {44 base.DisableAction(action);45 totalTries -= tries[action];46 tries[action] = -1;47 sumReward[action] = 0;48 }49 50 public override void Reset() {51 base.Reset();52 totalTries = 0;53 Array.Clear(tries, 0, tries.Length);54 Array.Clear(sumReward, 0, sumReward.Length);55 }56 public override void PrintStats() {57 for (int i = 0; i < sumReward.Length; i++) {58 if (tries[i] >= 0) {59 Console.Write("{0,5:F2}", sumReward[i] / tries[i]);60 } else {61 Console.Write("{0,5}", "");62 }63 }64 Console.WriteLine();65 }66 41 public override string ToString() { 67 42 return string.Format("UCTPolicy({0:F2})", c); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs
r11730 r11732 17 17 private readonly Random random; 18 18 private readonly int contextLen; 19 private readonly Func<Random, int, IPolicy> policyFactory;19 private readonly IPolicy policy; 20 20 21 public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, Func<Random, int, IPolicy> policyFactory) {21 public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, IPolicy policy) { 22 22 this.maxLen = maxLen; 23 23 this.problem = problem; 24 24 this.random = random; 25 25 this.contextLen = contextLen; 26 this.policy Factory = policyFactory;26 this.policy = policy; 27 27 } 28 28 … … 32 32 for (int i = 0; i < maxIterations; i++) { 33 33 var sentence = SampleSentence(problem.Grammar).ToString(); 34 var quality = problem.Evaluate(sentence) / problem. GetBestKnownQuality(maxLen);34 var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen); 35 35 DistributeReward(quality); 36 36 … … 45 45 46 46 47 private Dictionary<string, IPolicy > ntPolicy;47 private Dictionary<string, IPolicyActionInfo[]> contextActionInfos; 48 48 private List<Tuple<string, int>> updateChain; 49 49 50 50 private void InitPolicies(IGrammar grammar) { 51 this. ntPolicy = new Dictionary<string, IPolicy>();51 this.contextActionInfos = new Dictionary<string, IPolicyActionInfo[]>(); 52 52 this.updateChain = new List<Tuple<string, int>>(); 53 53 } … … 83 83 var lft = phrase.Subsequence(startIdx, endIdx - startIdx + 1).ToString(); 84 84 lft = problem.Hash(lft); 85 if (! ntPolicy.ContainsKey(lft)) {86 ntPolicy.Add(lft, policyFactory(random, g.GetAlternatives(nt).Count()));85 if (!contextActionInfos.ContainsKey(lft)) { 86 contextActionInfos.Add(lft, g.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray()); 87 87 } 88 var selectedAltIdx = ntPolicy[lft].SelectAction();88 var selectedAltIdx = policy.SelectAction(random, contextActionInfos[lft]); 89 89 selectedAlt = alts.ElementAt(selectedAltIdx); 90 90 updateChain.Add(Tuple.Create(lft, selectedAltIdx)); … … 103 103 var lft = e.Item1; 104 104 var action = e.Item2; 105 ntPolicy[lft].UpdateReward(action,reward);105 contextActionInfos[lft][action].UpdateReward(reward); 106 106 } 107 107 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs
r11730 r11732 16 16 private readonly Random random; 17 17 private readonly IProblem problem; 18 private readonly IPolicy policy; 18 19 19 public AlternativesSampler(IProblem problem, int maxLen) {20 public AlternativesSampler(IProblem problem, IPolicy policy, int maxLen) { 20 21 this.problem = problem; 21 22 this.maxLen = maxLen; 22 this.random = new Random(31415); 23 this.random = new Random(); 24 this.policy = policy; 23 25 } 24 26 … … 28 30 for (int i = 0; i < maxIterations; i++) { 29 31 var sentence = SampleSentence(problem.Grammar).ToString(); 30 var quality = problem.Evaluate(sentence) / problem. GetBestKnownQuality(maxLen);32 var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen); 31 33 DistributeReward(quality); 32 34 … … 41 43 42 44 43 private Dictionary<char, IPolicy > ntPolicy;45 private Dictionary<char, IPolicyActionInfo[]> ntActionInfos; 44 46 private List<Tuple<char, int>> updateChain; 45 47 46 48 private void InitPolicies(IGrammar grammar) { 47 this.nt Policy = new Dictionary<char, IPolicy>();49 this.ntActionInfos = new Dictionary<char, IPolicyActionInfo[]>(); 48 50 this.updateChain = new List<Tuple<char, int>>(); 49 51 foreach (var nt in grammar.NonTerminalSymbols) { 50 nt Policy.Add(nt, new EpsGreedyPolicy(random, grammar.GetAlternatives(nt).Count(), 0.1));52 ntActionInfos.Add(nt, grammar.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray()); 51 53 } 52 54 } … … 77 79 } else { 78 80 // all alts are allowed => select using bandit policy 79 var selectedAltIdx = ntPolicy[nt].SelectAction();81 var selectedAltIdx = policy.SelectAction(random, ntActionInfos[nt]); 80 82 selectedAlt = alts.ElementAt(selectedAltIdx); 81 83 updateChain.Add(Tuple.Create(nt, selectedAltIdx)); … … 95 97 var nt = e.Item1; 96 98 var action = e.Item2; 97 nt Policy[nt].UpdateReward(action,reward);99 ntActionInfos[nt][action].UpdateReward(reward); 98 100 } 99 101 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveBreadthFirstSearch.cs
r11730 r11732 26 26 for (int i = 0; sentenceEnumerator.MoveNext() && i < maxIterations; i++) { 27 27 var sentence = sentenceEnumerator.Current.ToString(); 28 var quality = problem.Evaluate(sentence) / problem. GetBestKnownQuality(maxLen);28 var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen); 29 29 RaiseSolutionEvaluated(sentence, quality); 30 30 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveDepthFirstSearch.cs
r11730 r11732 26 26 for (int i = 0; sentenceEnumerator.MoveNext() && i < maxIterations; i++) { 27 27 var sentence = sentenceEnumerator.Current.ToString(); 28 var quality = problem.Evaluate(sentence) / problem. GetBestKnownQuality(maxLen);28 var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen); 29 29 RaiseSolutionEvaluated(sentence, quality); 30 30 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj
r11727 r11732 45 45 <Compile Include="AlternativesSampler.cs" /> 46 46 <Compile Include="AlternativesContextSampler.cs" /> 47 <Compile Include="ExhaustiveRandomFirstSearch.cs" /> 47 48 <Compile Include="MctsSampler.cs" /> 48 49 <Compile Include="ExhaustiveDepthFirstSearch.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs
r11730 r11732 13 13 public int randomTries; 14 14 public int policyTries; 15 public IPolicy policy;15 public IPolicyActionInfo actionInfo; 16 16 public TreeNode[] children; 17 17 public bool done = false; … … 22 22 23 23 public override string ToString() { 24 return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, policy);24 return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, actionInfo); 25 25 } 26 26 } … … 34 34 private readonly Random random; 35 35 private readonly int randomTries; 36 private readonly Func<Random, int, IPolicy> policyFactory;36 private readonly IPolicy policy; 37 37 38 private List<T uple<TreeNode, int>> updateChain;38 private List<TreeNode> updateChain; 39 39 private TreeNode rootNode; 40 40 … … 42 42 public int treeSize; 43 43 44 public MctsSampler(IProblem problem, int maxLen, Random random) : 45 this(problem, maxLen, random, 10, (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1)) { 44 // public MctsSampler(IProblem problem, int maxLen, Random random) : 45 // this(problem, maxLen, random, 10, (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1)) { 46 // 47 // } 46 48 47 } 48 49 public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, Func<Random, int, IPolicy> policyFactory) { 49 public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IPolicy policy) { 50 50 this.maxLen = maxLen; 51 51 this.problem = problem; 52 52 this.random = random; 53 53 this.randomTries = randomTries; 54 this.policy Factory = policyFactory;54 this.policy = policy; 55 55 } 56 56 … … 60 60 for (int i = 0; !rootNode.done && i < maxIterations; i++) { 61 61 var sentence = SampleSentence(problem.Grammar).ToString(); 62 var quality = problem.Evaluate(sentence) / problem. GetBestKnownQuality(maxLen);62 var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen); 63 63 Debug.Assert(quality >= 0 && quality <= 1.0); 64 64 DistributeReward(quality); … … 79 79 var n = rootNode; 80 80 Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries); 81 while (n. policy!= null) {81 while (n.children != null) { 82 82 Console.WriteLine(); 83 83 Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident)))); … … 90 90 91 91 private void InitPolicies(IGrammar grammar) { 92 this.updateChain = new List<T uple<TreeNode, int>>();92 this.updateChain = new List<TreeNode>(); 93 93 94 94 rootNode = new TreeNode(grammar.SentenceSymbol.ToString()); 95 rootNode.actionInfo = policy.CreateActionInfo(); 95 96 treeDepth = 0; 96 97 treeSize = 0; … … 108 109 TreeNode n = rootNode; 109 110 bool done = phrase.IsTerminal; 110 int selectedAltIdx = -1;111 111 var curDepth = 0; 112 112 while (!done) { 113 char nt = phrase.FirstNonTerminal; 114 115 int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2 116 Debug.Assert(maxLenOfReplacement > 0); 117 118 var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement); 113 updateChain.Add(n); 119 114 120 115 if (n.randomTries < randomTries) { 121 116 n.randomTries++; 117 treeDepth = Math.Max(treeDepth, curDepth); 118 return g.CompleteSentenceRandomly(random, phrase, maxLen); 119 } else { 120 char nt = phrase.FirstNonTerminal; 122 121 123 treeDepth = Math.Max(treeDepth, curDepth); 122 int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2 123 Debug.Assert(maxLenOfReplacement > 0); 124 124 125 return g.CompleteSentenceRandomly(random, phrase, maxLen); 126 } else if (n.randomTries == randomTries && n.policy == null) { 127 n.policy = policyFactory(random, alts.Count()); 128 //n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative 129 n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative 125 var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement); 130 126 131 treeSize += n.children.Length; 132 } 133 n.policyTries++; 134 // => select using bandit policy 135 selectedAltIdx = n.policy.SelectAction(); 136 Sequence selectedAlt = alts.ElementAt(selectedAltIdx); 127 if (n.randomTries == randomTries && n.children == null) { 128 n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative 129 //n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative 130 foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo(); 131 treeSize += n.children.Length; 132 } 133 n.policyTries++; 134 // => select using bandit policy 135 int selectedAltIdx = policy.SelectAction(random, n.children.Select(c => c.actionInfo)); 136 Sequence selectedAlt = alts.ElementAt(selectedAltIdx); 137 137 138 // replace nt with alt139 phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);138 // replace nt with alt 139 phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt); 140 140 141 updateChain.Add(Tuple.Create(n, selectedAltIdx));141 curDepth++; 142 142 143 curDepth++;143 done = phrase.IsTerminal; 144 144 145 done = phrase.IsTerminal;146 if (!done) {147 145 // prepare for next iteration 148 146 n = n.children[selectedAltIdx]; 149 Debug.Assert(!n.done);150 147 } 151 148 } // while 152 149 150 updateChain.Add(n); 151 152 153 153 // the last node is a leaf node (sentence is done), so we never need to visit this node again 154 n.children[selectedAltIdx].done = true; 154 n.done = true; 155 n.actionInfo.Disable(); 155 156 156 157 treeDepth = Math.Max(treeDepth, curDepth); … … 163 164 164 165 foreach (var e in updateChain) { 165 var node = e.Item1; 166 var policy = node.policy; 167 var action = e.Item2; 168 //policy.UpdateReward(action, reward / updateChain.Count); 169 policy.UpdateReward(action, reward); 170 171 if (node.children[action].done) node.policy.DisableAction(action); 172 if (node.children.All(c => c.done)) node.done = true; 166 var node = e; 167 if (node.children != null && node.children.All(c => c.done)) { 168 node.done = true; 169 node.actionInfo.Disable(); 170 } 171 if (!node.done) { 172 node.actionInfo.UpdateReward(reward); 173 //policy.UpdateReward(action, reward / updateChain.Count); 174 } 173 175 } 174 176 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/RandomSearch.cs
r11730 r11732 25 25 for (int i = 0; i < maxIterations; i++) { 26 26 var sentence = CreateSentence(problem.Grammar).ToString(); 27 var quality = problem.Evaluate(sentence) / problem. GetBestKnownQuality(maxLen);27 var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen); 28 28 RaiseSolutionEvaluated(sentence, quality); 29 29 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs
r11730 r11732 29 29 } 30 30 } 31 32 public static double RSq(IEnumerable<double> xs, IEnumerable<double> ys) { 33 // two pass implementation, but we don't care 34 var meanX = xs.Average(); 35 var meanY = ys.Average(); 36 37 var s = 0.0; 38 var ssX = 0.0; 39 var ssY = 0.0; 40 foreach (var p in xs.Zip(ys, (x, y) => new { x, y })) { 41 s += (p.x - meanX) * (p.y - meanY); 42 ssX += Math.Pow(p.x - meanX, 2); 43 ssY += Math.Pow(p.y - meanY, 2); 44 } 45 46 if (s.IsAlmost(0)) return 0; 47 if (ssX.IsAlmost(0) || ssY.IsAlmost(0)) return 0; 48 return s * s / (ssX * ssY); 49 } 50 51 31 52 } 32 53 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Rand.cs
r11727 r11732 32 32 * For Gamma(a,b), scale the result by b. 33 33 */ 34 p rivatestatic double GammaRand(Random random, double a) {34 public static double GammaRand(Random random, double a) { 35 35 /* Algorithm: 36 36 * G. Marsaglia and W.W. Tsang, A simple method for generating gamma -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/HeuristicLab.Problems.GrammaticalOptimization.Test.csproj
r11730 r11732 39 39 </PropertyGroup> 40 40 <ItemGroup> 41 <Reference Include="HeuristicLab.Problems.Instances-3.3"> 42 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath> 43 </Reference> 44 <Reference Include="HeuristicLab.Problems.Instances.DataAnalysis-3.3"> 45 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances.DataAnalysis-3.3.dll</HintPath> 46 </Reference> 41 47 <Reference Include="System" /> 42 48 <Reference Include="System.Core"> … … 57 63 </Choose> 58 64 <ItemGroup> 65 <Compile Include="TestSymbRegInstances.cs" /> 59 66 <Compile Include="TestSequence.cs" /> 60 67 <Compile Include="TestBanditPolicies.cs" /> … … 71 78 <Project>{eea07488-1a51-412a-a52c-53b754a628b3}</Project> 72 79 <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name> 80 </ProjectReference> 81 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization.SymbReg\HeuristicLab.Problems.GrammaticalOptimization.SymbReg.csproj"> 82 <Project>{17a7a380-86ce-482d-8d22-cbd70cc97f0d}</Project> 83 <Name>HeuristicLab.Problems.GrammaticalOptimization.SymbReg</Name> 73 84 </ProjectReference> 74 85 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj"> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs
r11730 r11732 10 10 [TestClass] 11 11 public class TestBanditPolicies { 12 [TestMethod] 13 public void ComparePoliciesForGaussianUnknownVarianceBandit() { 14 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 15 var randSeed = 31415; 16 var nArms = 20; 17 18 // Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(20, 0.01)); 19 // Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(100, 0.01)); 20 // Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(500, 0.01)); 21 // Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(1000, 0.01)); 22 Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1))); 23 Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 0.1))); 24 Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 25 Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy()); 26 Console.WriteLine("UCBNormal"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy()); 27 Console.WriteLine("Random"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy()); 28 29 } 12 30 13 31 … … 15 33 public void ComparePoliciesForBernoulliBandit() { 16 34 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 17 18 var globalRand = new Random(31415); 19 var seedForPolicy = globalRand.Next(); 35 var randSeed = 31415; 20 36 var nArms = 20; 21 37 //Console.WriteLine("Exp3 (gamma=0.01)"); … … 23 39 //Console.WriteLine("Exp3 (gamma=0.05)"); 24 40 //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1)); 25 Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli( globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));26 Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli( globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new BernoulliModel(nArms)));41 Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new BernoulliThompsonSamplingPolicy()); 42 Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new GenericThompsonSamplingPolicy(new BernoulliModel())); 27 43 Console.WriteLine("Random"); 28 TestPolicyBernoulli( globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));44 TestPolicyBernoulli(randSeed, nArms, new RandomPolicy()); 29 45 Console.WriteLine("UCB1"); 30 TestPolicyBernoulli( globalRand, nArms, new UCB1Policy(nArms));46 TestPolicyBernoulli(randSeed, nArms, new UCB1Policy()); 31 47 Console.WriteLine("UCB1Tuned"); 32 TestPolicyBernoulli( globalRand, nArms, new UCB1TunedPolicy(nArms));48 TestPolicyBernoulli(randSeed, nArms, new UCB1TunedPolicy()); 33 49 Console.WriteLine("UCB1Normal"); 34 TestPolicyBernoulli( globalRand, nArms, new UCBNormalPolicy(nArms));50 TestPolicyBernoulli(randSeed, nArms, new UCBNormalPolicy()); 35 51 Console.WriteLine("Eps(0.01)"); 36 TestPolicyBernoulli( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms,0.01));52 TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.01)); 37 53 Console.WriteLine("Eps(0.05)"); 38 TestPolicyBernoulli( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms,0.05));54 TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.05)); 39 55 //Console.WriteLine("Eps(0.1)"); 40 //TestPolicyBernoulli( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms,0.1));56 //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.1)); 41 57 //Console.WriteLine("Eps(0.2)"); 42 //TestPolicyBernoulli( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms,0.2));58 //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.2)); 43 59 //Console.WriteLine("Eps(0.5)"); 44 //TestPolicyBernoulli( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms,0.5));45 Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli( globalRand, nArms, new UCTPolicy(nArms,0.1));46 Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli( globalRand, nArms, new UCTPolicy(nArms,0.5));47 Console.WriteLine("UCT(1) "); TestPolicyBernoulli( globalRand, nArms, new UCTPolicy(nArms,1));48 Console.WriteLine("UCT(2) "); TestPolicyBernoulli( globalRand, nArms, new UCTPolicy(nArms,2));49 Console.WriteLine("UCT(5) "); TestPolicyBernoulli( globalRand, nArms, new UCTPolicy(nArms,5));50 Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms,0.1));51 Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms,0.5));52 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyBernoulli( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms,1));53 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms,10));54 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms,100));55 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms,0.01));56 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms,0.05));57 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms,0.1));60 //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.5)); 61 Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.1)); 62 Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.5)); 63 Console.WriteLine("UCT(1) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(1)); 64 Console.WriteLine("UCT(2) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(2)); 65 Console.WriteLine("UCT(5) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(5)); 66 Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.1)); 67 Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.5)); 68 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(1)); 69 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(10)); 70 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(100)); 71 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.01)); 72 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.05)); 73 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.1)); 58 74 59 75 // not applicable to bernoulli rewards … … 70 86 71 87 [TestMethod] 72 public void ComparePoliciesForNormalBandit() { 73 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 74 75 var globalRand = new Random(31415); 76 var seedForPolicy = globalRand.Next(); 77 var nArms = 20; 78 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true)); 79 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms)); 80 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyNormal(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1))); 88 public void ComparePoliciesForGaussianBandit() { 89 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 90 91 var randSeed = 31415; 92 var nArms = 20; 93 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 94 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy()); 95 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1))); 81 96 /* 82 Console.WriteLine("Random"); TestPolicyNormal( globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));83 Console.WriteLine("UCB1"); TestPolicyNormal( globalRand, nArms, new UCB1Policy(nArms));84 Console.WriteLine("UCB1Tuned"); TestPolicyNormal( globalRand, nArms, new UCB1TunedPolicy(nArms));85 Console.WriteLine("UCB1Normal"); TestPolicyNormal( globalRand, nArms, new UCBNormalPolicy(nArms));97 Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); 98 Console.WriteLine("UCB1"); TestPolicyNormal(randSeed, nArms, new UCB1Policy(nArms)); 99 Console.WriteLine("UCB1Tuned"); TestPolicyNormal(randSeed, nArms, new UCB1TunedPolicy(nArms)); 100 Console.WriteLine("UCB1Normal"); TestPolicyNormal(randSeed, nArms, new UCBNormalPolicy(nArms)); 86 101 //Console.WriteLine("Exp3 (gamma=0.01)"); 87 //TestPolicyNormal( globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));102 //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01)); 88 103 //Console.WriteLine("Exp3 (gamma=0.05)"); 89 //TestPolicyNormal( globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));90 Console.WriteLine("Eps(0.01)"); TestPolicyNormal( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));91 Console.WriteLine("Eps(0.05)"); TestPolicyNormal( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));104 //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05)); 105 Console.WriteLine("Eps(0.01)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); 106 Console.WriteLine("Eps(0.05)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); 92 107 //Console.WriteLine("Eps(0.1)"); 93 //TestPolicyNormal( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));108 //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1)); 94 109 //Console.WriteLine("Eps(0.2)"); 95 //TestPolicyNormal( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));110 //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2)); 96 111 //Console.WriteLine("Eps(0.5)"); 97 //TestPolicyNormal( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));98 Console.WriteLine("UCT(0.1)"); TestPolicyNormal( globalRand, nArms, new UCTPolicy(nArms, 0.1));99 Console.WriteLine("UCT(0.5)"); TestPolicyNormal( globalRand, nArms, new UCTPolicy(nArms, 0.5));100 Console.WriteLine("UCT(1) "); TestPolicyNormal( globalRand, nArms, new UCTPolicy(nArms, 1));101 Console.WriteLine("UCT(2) "); TestPolicyNormal( globalRand, nArms, new UCTPolicy(nArms, 2));102 Console.WriteLine("UCT(5) "); TestPolicyNormal( globalRand, nArms, new UCTPolicy(nArms, 5));103 Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));104 Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));105 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyNormal( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));106 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));107 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));108 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));109 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));110 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));111 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));112 Console.WriteLine("ThresholdAscent(10,0.05) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));113 Console.WriteLine("ThresholdAscent(10,0.1) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));114 Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));115 Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));116 Console.WriteLine("ThresholdAscent(100,0.1) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));117 Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));118 Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));119 Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));112 //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5)); 113 Console.WriteLine("UCT(0.1)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.1)); 114 Console.WriteLine("UCT(0.5)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.5)); 115 Console.WriteLine("UCT(1) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 1)); 116 Console.WriteLine("UCT(2) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 2)); 117 Console.WriteLine("UCT(5) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 5)); 118 Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1)); 119 Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5)); 120 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1)); 121 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10)); 122 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100)); 123 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01)); 124 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05)); 125 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1)); 126 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); 127 Console.WriteLine("ThresholdAscent(10,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05)); 128 Console.WriteLine("ThresholdAscent(10,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1)); 129 Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01)); 130 Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05)); 131 Console.WriteLine("ThresholdAscent(100,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1)); 132 Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01)); 133 Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05)); 134 Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1)); 120 135 */ 121 136 } … … 124 139 public void ComparePoliciesForGaussianMixtureBandit() { 125 140 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 126 127 var globalRand = new Random(31415); 128 var seedForPolicy = globalRand.Next(); 129 var nArms = 20; 130 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true)); 131 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms)); 132 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1))); 141 var randSeed = 31415; 142 var nArms = 20; 143 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 144 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy()); 145 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1))); 133 146 134 147 /* 135 Console.WriteLine("Random"); TestPolicyGaussianMixture( globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));136 Console.WriteLine("UCB1"); TestPolicyGaussianMixture( globalRand, nArms, new UCB1Policy(nArms));137 Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture( globalRand, nArms, new UCB1TunedPolicy(nArms));138 Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture( globalRand, nArms, new UCBNormalPolicy(nArms));139 Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));140 Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));141 Console.WriteLine("UCT(1) "); TestPolicyGaussianMixture( globalRand, nArms, new UCTPolicy(nArms, 1));142 Console.WriteLine("UCT(2) "); TestPolicyGaussianMixture( globalRand, nArms, new UCTPolicy(nArms, 2));143 Console.WriteLine("UCT(5) "); TestPolicyGaussianMixture( globalRand, nArms, new UCTPolicy(nArms, 5));144 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyGaussianMixture( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));145 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));146 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));147 148 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyGaussianMixture( globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));149 Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture( globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));150 Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture( globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));151 Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture( globalRand, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));148 Console.WriteLine("Random"); TestPolicyGaussianMixture(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); 149 Console.WriteLine("UCB1"); TestPolicyGaussianMixture(randSeed, nArms, new UCB1Policy(nArms)); 150 Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(randSeed, nArms, new UCB1TunedPolicy(nArms)); 151 Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(randSeed, nArms, new UCBNormalPolicy(nArms)); 152 Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); 153 Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); 154 Console.WriteLine("UCT(1) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 1)); 155 Console.WriteLine("UCT(2) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 2)); 156 Console.WriteLine("UCT(5) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 5)); 157 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1)); 158 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10)); 159 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100)); 160 161 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); 162 Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01)); 163 Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01)); 164 Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01)); 152 165 */ 153 166 } 154 167 155 168 156 private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) { 157 TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions)); 158 } 159 private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) { 160 TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions)); 161 } 162 private void TestPolicyGaussianMixture(Random globalRand, int nArms, IPolicy policy) { 163 TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions)); 164 } 165 166 167 private void TestPolicy(Random globalRand, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) { 169 private void TestPolicyBernoulli(int randSeed, int nArms, IPolicy policy) { 170 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions)); 171 } 172 private void TestPolicyGaussian(int randSeed, int nArms, IPolicy policy) { 173 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions)); 174 } 175 private void TestPolicyGaussianMixture(int randSeed, int nArms, IPolicy policy) { 176 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions)); 177 } 178 private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IPolicy policy) { 179 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions)); 180 } 181 182 183 private void TestPolicy(int randSeed, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) { 168 184 var maxIt = 1E5; 169 var reps = 30; // independent runs185 var reps = 10; // independent runs 170 186 var regretForIteration = new Dictionary<int, List<double>>(); 171 187 var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>(); 172 188 var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>(); 189 var globalRandom = new Random(randSeed); 190 var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test 191 var policyRandom = new Random(globalRandom.Next()); 192 173 193 // calculate statistics 174 194 for (int r = 0; r < reps; r++) { 175 195 var nextLogStep = 1; 176 var b = banditFactory(new Random(globalRand.Next()), nArms); 177 policy.Reset(); 196 var b = banditFactory(banditRandom, nArms); 178 197 var totalRegret = 0.0; 179 198 var totalPullsOfSuboptimalArmsExp = 0.0; 180 199 var totalPullsOfSuboptimalArmsMax = 0.0; 200 var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray(); 181 201 for (int i = 0; i <= maxIt; i++) { 182 var selectedAction = policy.SelectAction( );202 var selectedAction = policy.SelectAction(policyRandom, actionInfos); 183 203 var reward = b.Pull(selectedAction); 184 policy.UpdateReward(selectedAction,reward);204 actionInfos[selectedAction].UpdateReward(reward); 185 205 186 206 // collect stats -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/EvenParityProblem.cs
r11727 r11732 20 20 private readonly ExpressionInterpreter interpreter = new ExpressionInterpreter(); 21 21 public EvenParityProblem() { 22 this.grammar = new Grammar (grammarString);22 this.grammar = new Grammar (grammarString); 23 23 } 24 24 25 public double GetBestKnownQuality(int maxLen) {25 public double BestKnownQuality(int maxLen) { 26 26 // for now only an upper bound is returned, ideally all fitness cases are predicted correctly 27 27 return Math.Pow(2, 4); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/ExpressionInterpreter.cs
r11727 r11732 54 54 var r = 0.0; 55 55 r = Term(d); 56 while (CurSy() == '+' || CurSy() == '-' || CurSy() == '^') { 57 if (CurSy() == '+') { 56 var curSy = CurSy(); 57 while (curSy == '+' || curSy == '-' || curSy == '^') { 58 if (curSy == '+') { 58 59 NewSy(); 59 60 r += Expr(d); 60 } else if ( CurSy()== '-') {61 } else if (curSy == '-') { 61 62 NewSy(); 62 63 r -= Expr(d); 63 } else if (CurSy() == '^'){64 } else { 64 65 NewSy(); 65 66 var e = Expr(d); 66 67 r = Not(r) * e + r * Not(e); // xor = (!x AND y) OR (x AND !y) 67 68 } 69 curSy = CurSy(); 68 70 } 69 71 return r; … … 73 75 var r = 0.0; 74 76 r = Fact(d); 75 while (CurSy() == '*' || CurSy() == '/') { 76 if (CurSy() == '*') { 77 var curSy = CurSy(); 78 while (curSy == '*' || curSy == '/') { 79 if (curSy == '*') { 77 80 NewSy(); 78 81 r *= Term(d); 79 } else if (CurSy() == '/'){82 } else { 80 83 NewSy(); 81 84 r /= Term(d); 82 85 } 86 curSy = CurSy(); 83 87 } 84 88 return r; … … 87 91 private double Fact(double[] d) { 88 92 double r = 0.0; 89 if (CurSy() == '!') { 93 var curSy = CurSy(); 94 if (curSy == '!') { 90 95 NewSy(); 91 96 r = Not(Expr(d)); 92 } else if ( CurSy()== '(') {97 } else if (curSy == '(') { 93 98 NewSy(); 94 99 r = Expr(d); 95 100 if (CurSy() != ')') throw new ArgumentException(); 96 101 NewSy(); 97 } else if (CurSy() >= 'a' && CurSy() <= 'z') { 98 int o = Convert.ToByte(CurSy()) - Convert.ToByte('a'); 102 } else /* if (curSy >= 'a' && curSy <= 'z') */ { 103 int o = (byte)curSy - (byte)'a'; 104 //int o = Convert.ToByte(CurSy()) - Convert.ToByte('a'); 99 105 if (o < 0 || o >= d.Length) throw new ArgumentException(); 100 106 r = d[o]; 101 107 NewSy(); 102 } else throw new ArgumentException(); 108 } 109 //} else throw new ArgumentException(); 103 110 return r; 104 111 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs
r11730 r11732 35 35 this.rules = new Dictionary<char, List<Sequence>>(); 36 36 foreach (var r in orig.rules) 37 this.rules.Add(r.Key, new List<Sequence>(r.Value.Select(v => new Sequence(v)))); // clone sequences37 this.rules.Add(r.Key, new List<Sequence>(r.Value.Select(v => new ReadonlySequence(v)))); // clone sequences 38 38 this.terminalSymbols = new HashSet<char>(orig.terminalSymbols); 39 39 this.sentenceSymbol = orig.sentenceSymbol; 40 40 this.nonTerminalSymbols = new HashSet<char>(orig.nonTerminalSymbols); 41 41 this.maxPhraseLength = new Dictionary<Sequence, int>(); 42 foreach (var p in orig.maxPhraseLength) this.maxPhraseLength.Add(new Sequence(p.Key), p.Value);42 foreach (var p in orig.maxPhraseLength) this.maxPhraseLength.Add(new ReadonlySequence(p.Key), p.Value); 43 43 this.minPhraseLength = new Dictionary<Sequence, int>(); 44 foreach (var p in orig.minPhraseLength) this.minPhraseLength.Add(new Sequence(p.Key), p.Value);44 foreach (var p in orig.minPhraseLength) this.minPhraseLength.Add(new ReadonlySequence(p.Key), p.Value); 45 45 } 46 46 … … 54 54 foreach (var r in rules) { 55 55 if (!this.rules.ContainsKey(r.Item1)) this.rules.Add(r.Item1, new List<Sequence>()); 56 this.rules[r.Item1].Add(new Sequence(r.Item2)); // here we store an array of symbols for a phase56 this.rules[r.Item1].Add(new ReadonlySequence(r.Item2)); // here we store an array of symbols for a phase 57 57 } 58 58 59 59 CheckValidity(); 60 CalculatePh aseLengthBounds();60 CalculatePhraseLengthBounds(); 61 61 } 62 62 … … 76 76 } 77 77 78 private void CalculatePhaseLengthBounds() { 79 minPhraseLength.Clear(); 80 maxPhraseLength.Clear(); 78 private void CalculatePhraseLengthBounds() { 81 79 // cache phrase lengths 82 80 foreach (var nt in nonTerminalSymbols) { … … 84 82 var max = int.MinValue; 85 83 foreach (var alt in rules[nt].OrderBy(alt => alt.Length)) { 86 minPhraseLength[alt] =MinPhraseLength(alt);87 maxPhraseLength[alt] =MaxPhraseLength(alt);84 CalcAndSetMinPhraseLength(alt); 85 CalcAndSetMaxPhraseLength(alt); 88 86 89 87 min = Math.Min(min, minPhraseLength[alt]); 90 88 max = Math.Max(max, maxPhraseLength[alt]); 91 89 } 92 minPhraseLength[new Sequence(nt)] = min; 93 maxPhraseLength[new Sequence(nt)] = max; 94 } 95 } 96 90 minPhraseLength[new ReadonlySequence(nt)] = min; 91 maxPhraseLength[new ReadonlySequence(nt)] = max; 92 } 93 } 97 94 98 95 public IEnumerable<Sequence> GetAlternatives(char nt) { … … 108 105 } 109 106 110 // caches for this are build in construction of object 111 public int MinPhraseLength(Sequence phrase) { 107 #region population of minphraselength cache 108 private int CalcAndSetMinSymbolLength(char symb) { 109 if (IsTerminal(symb)) return 1; 110 else return Math.Min(short.MaxValue, rules[symb].Min(alt => CalcAndSetMinPhraseLength(alt))); // maximal allowed value is short.MaxValue 111 } 112 private int CalcAndSetMinPhraseLength(Sequence phrase) { 113 Debug.Assert(phrase is ReadonlySequence); 112 114 int l; 113 115 if (minPhraseLength.TryGetValue(phrase, out l)) return l; … … 116 118 117 119 foreach (var symb in phrase) { 118 if (IsNonTerminal(symb)) { 119 l += MinSymbolLength(symb); 120 } else { 121 l++; 122 } 120 l += CalcAndSetMinSymbolLength(symb); 123 121 } 124 122 … … 127 125 return l; 128 126 } 129 127 #endregion 128 129 // read only access to caches 130 private int MinSymbolLength(char symb) { 131 if (IsTerminal(symb)) return 1; 132 else return Math.Min(short.MaxValue, rules[symb].Min(alt => MinPhraseLength(alt))); // maximal allowed value is short.MaxValue 133 } 134 public int MinPhraseLength(Sequence phrase) { 135 var minLen = 0; 136 if (minPhraseLength.TryGetValue(phrase, out minLen)) return minLen; 137 foreach (var s in phrase) { 138 minLen += MinSymbolLength(s); 139 } 140 return Math.Min(short.MaxValue, minLen); // maximal allowed value is short.MaxValue 141 } 142 143 #region population of maxphraselength cache 144 private int CalcAndSetMaxSymbolLength(char symb) { 145 if (IsTerminal(symb)) return 1; 146 else return Math.Min(short.MaxValue, rules[symb].Max(alt => CalcAndSetMaxPhraseLength(alt))); // maximal allowed value is short.MaxValue 147 } 130 148 // caches for this are build in construction of object 131 public int MaxPhraseLength(Sequence phrase) { 149 private int CalcAndSetMaxPhraseLength(Sequence phrase) { 150 Debug.Assert(phrase is ReadonlySequence); 132 151 int l; 133 152 if (maxPhraseLength.TryGetValue(phrase, out l)) return l; … … 136 155 137 156 foreach (var symb in phrase) { 138 if (IsNonTerminal(symb)) { 139 l += MaxSymbolLength(symb); 140 } else { 141 l++; 142 } 157 l += CalcAndSetMaxSymbolLength(symb); 143 158 } 144 159 l = Math.Min(short.MaxValue, l); // maximal allowed value is short.MaxValue … … 146 161 return l; 147 162 } 148 149 private int MinSymbolLength(char nt) { 150 if (IsTerminal(nt)) return 1; 151 else return Math.Min(short.MaxValue, rules[nt].Min(alt => MinPhraseLength(alt))); // maximal allowed value is short.MaxValue 152 } 153 private int MaxSymbolLength(char nt) { 154 if (IsTerminal(nt)) return 1; 155 else return Math.Min(short.MaxValue, rules[nt].Max(alt => MaxPhraseLength(alt))); // maximal allowed value is short.MaxValue 163 #endregion 164 165 // read only access to caches 166 public int MaxSymbolLength(char symb) { 167 if (IsTerminal(symb)) return 1; 168 else return Math.Min(short.MaxValue, rules[symb].Max(alt => MaxPhraseLength(alt))); // maximal allowed value is short.MaxValue 169 } 170 public int MaxPhraseLength(Sequence phrase) { 171 var maxLen = 0; 172 if (maxPhraseLength.TryGetValue(phrase, out maxLen)) return maxLen; 173 foreach (var s in phrase) { 174 maxLen += MaxSymbolLength(s); 175 } 176 return Math.Min(short.MaxValue, maxLen); // maximal allowed value is short.MaxValue 156 177 } 157 178 … … 159 180 if (phrase.Length > maxLen) throw new ArgumentException(); 160 181 if (MinPhraseLength(phrase) > maxLen) throw new ArgumentException(); 161 bool done = phrase.IsTerminal; // terminal phrase means we are done 162 while (!done) { 182 while (!phrase.IsTerminal) { 163 183 char nt = phrase.FirstNonTerminal; 164 184 … … 173 193 phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt); 174 194 175 done = phrase.All(IsTerminal); // terminal phrase means we are done176 195 } 177 196 return phrase; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs
r11727 r11732 18 18 } 19 19 20 public double GetBestKnownQuality(int maxLen) {20 public double BestKnownQuality(int maxLen) { 21 21 // the whole sentence is a palindrome + each symbol occurs only once or twice 22 22 // for odd total length the number of different characters can be larger than len/2 (aba) -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.csproj
r11730 r11732 46 46 <Compile Include="Grammar.cs" /> 47 47 <Compile Include="EvenParityProblem.cs" /> 48 <Compile Include="ReadonlySequence.cs" /> 48 49 <Compile Include="SentenceSetStatistics.cs" /> 49 50 <Compile Include="Sequence.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs
r11727 r11732 6 6 namespace HeuristicLab.Problems.GrammaticalOptimization { 7 7 public interface IProblem { 8 double GetBestKnownQuality(int maxLen);8 double BestKnownQuality(int maxLen); 9 9 IGrammar Grammar { get; } 10 10 double Evaluate(string sentence); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs
r11727 r11732 18 18 } 19 19 20 public double GetBestKnownQuality(int maxLen) {20 public double BestKnownQuality(int maxLen) { 21 21 // the whole sentence is a palindrome 22 22 return maxLen; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPairProblem.cs
r11727 r11732 19 19 } 20 20 21 public double GetBestKnownQuality(int maxLen) {21 public double BestKnownQuality(int maxLen) { 22 22 return maxLen - 1; 23 23 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalRoadProblem.cs
r11727 r11732 17 17 } 18 18 19 public double GetBestKnownQuality(int maxLen) {19 public double BestKnownQuality(int maxLen) { 20 20 // for now only an upper bound is returned, ideally all fitness cases are predicted correctly 21 21 throw new NotImplementedException(); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSymbolProblem.cs
r11727 r11732 19 19 } 20 20 21 public double GetBestKnownQuality(int maxLen) {21 public double BestKnownQuality(int maxLen) { 22 22 return maxLen; 23 23 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalTreeProblem.cs
r11727 r11732 17 17 } 18 18 19 public double GetBestKnownQuality(int maxLen) {19 public double BestKnownQuality(int maxLen) { 20 20 // for now only an upper bound is returned, ideally all fitness cases are predicted correctly 21 21 throw new NotImplementedException(); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs
r11730 r11732 23 23 } 24 24 25 public double GetBestKnownQuality(int maxLen) {25 public double BestKnownQuality(int maxLen) { 26 26 // for now only an upper bound is returned, ideally all food pellets are discovered 27 27 return 89; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Sequence.cs
r11730 r11732 84 84 } 85 85 86 public v oid ReplaceAt(int position, int len, Sequence replacement) {86 public virtual void ReplaceAt(int position, int len, Sequence replacement) { 87 87 if (replacement == null) throw new ArgumentNullException(); 88 88 if (len <= 0) throw new ArgumentException(); … … 125 125 126 126 public Sequence Subsequence(int startIdx, int len) { 127 if (startIdx < 0 || len < =0) throw new ArgumentException();127 if (startIdx < 0 || len < 0) throw new ArgumentException(); 128 128 if (startIdx >= this.len) throw new ArgumentException(); 129 129 if (startIdx + len > this.len) throw new ArgumentException(); 130 var subsequence = new Sequence { len = len};130 var subsequence = new Sequence { len = len }; 131 131 132 132 Array.Copy(this.symbols, startIdx, subsequence.symbols, 0, len); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs
r11730 r11732 56 56 } 57 57 58 public double GetBestKnownQuality(int maxLen) {58 public double BestKnownQuality(int maxLen) { 59 59 // for now only an upper bound is returned, ideally we have an R² of 1.0 60 60 // the optimal R² can only be reached for sentences of at least 23 symbols … … 67 67 68 68 public double Evaluate(string sentence) { 69 return RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])).ToArray());69 return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])).ToArray()); 70 70 } 71 71 72 private double RSq(IEnumerable<double> xs, IEnumerable<double> ys) {73 // two pass implementation, but we don't care74 var meanX = xs.Average();75 var meanY = ys.Average();76 77 var s = 0.0;78 var ssX = 0.0;79 var ssY = 0.0;80 foreach (var p in xs.Zip(ys, (x, y) => new { x, y })) {81 s += (p.x - meanX) * (p.y - meanY);82 ssX += Math.Pow(p.x - meanX, 2);83 ssY += Math.Pow(p.y - meanY, 2);84 }85 86 if (s.IsAlmost(0)) return 0;87 if (ssX.IsAlmost(0) || ssY.IsAlmost(0)) return 0;88 return s * s / (ssX * ssY);89 }90 72 91 73 -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Main.csproj
r11727 r11732 57 57 <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name> 58 58 </ProjectReference> 59 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization.SymbReg\HeuristicLab.Problems.GrammaticalOptimization.SymbReg.csproj"> 60 <Project>{17A7A380-86CE-482D-8D22-CBD70CC97F0D}</Project> 61 <Name>HeuristicLab.Problems.GrammaticalOptimization.SymbReg</Name> 62 </ProjectReference> 59 63 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj"> 60 64 <Project>{cb9dccf6-667e-4a13-b82d-dbd6b45a045e}</Project> -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11730 r11732 11 11 using HeuristicLab.Algorithms.GrammaticalOptimization; 12 12 using HeuristicLab.Problems.GrammaticalOptimization; 13 using HeuristicLab.Problems.GrammaticalOptimization.SymbReg; 13 14 14 15 namespace Main { … … 22 23 23 24 private static void RunGridTest() { 24 int maxIterations = 100000; // for poly-10 with 50000 evaluations no successful try with hl yet25 // 25 int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet 26 //var globalRandom = new Random(31415); 26 27 var localRandSeed = 31415; 27 28 var reps = 20; 28 29 29 var polic yFactories = new Func<Random, int,IPolicy>[]30 var policies = new Func<IPolicy>[] 30 31 { 31 (rand, numActions) => new GaussianThompsonSamplingPolicy(rand, numActions), 32 (rand, numActions) => new BernoulliThompsonSamplingPolicy(rand, numActions), 33 (rand, numActions) => new RandomPolicy(rand, numActions), 34 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.01), 35 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.05), 36 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1), 37 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.2), 38 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.5), 39 (rand, numActions) => new UCTPolicy(numActions, 0.1), 40 (rand, numActions) => new UCTPolicy(numActions, 0.5), 41 (rand, numActions) => new UCTPolicy(numActions, 1), 42 (rand, numActions) => new UCTPolicy(numActions, 2), 43 (rand, numActions) => new UCTPolicy(numActions, 5), 44 (rand, numActions) => new UCTPolicy(numActions, 10), 45 (rand, numActions) => new UCB1Policy(numActions), 46 (rand, numActions) => new UCB1TunedPolicy(numActions), 47 (rand, numActions) => new UCBNormalPolicy(numActions), 48 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.1), 49 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.5), 50 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 1), 51 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 5), 52 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 10), 53 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 20), 54 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 100), 55 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.01), 56 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.05), 57 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.1), 58 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.2), 59 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.01), 60 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.05), 61 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.1), 62 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.2), 63 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.01), 64 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.05), 65 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.1), 66 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.2), 67 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.01), 68 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.05), 69 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.1), 70 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.2), 71 (rand, numActions) => new ThresholdAscentPolicy(numActions, 5000, 0.01), 72 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10000, 0.01), 32 () => new GaussianThompsonSamplingPolicy(), 33 () => new GaussianThompsonSamplingPolicy(true), 34 () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)), 35 () => new BernoulliThompsonSamplingPolicy(), 36 () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), 37 () => new RandomPolicy(), 38 () => new EpsGreedyPolicy(0.01), 39 () => new EpsGreedyPolicy(0.05), 40 () => new EpsGreedyPolicy(0.1), 41 () => new EpsGreedyPolicy(0.2), 42 () => new EpsGreedyPolicy(0.5), 43 () => new UCTPolicy(0.1), 44 () => new UCTPolicy(0.5), 45 () => new UCTPolicy(1), 46 () => new UCTPolicy(2), 47 () => new UCTPolicy( 5), 48 () => new UCTPolicy( 10), 49 () => new UCB1Policy(), 50 () => new UCB1TunedPolicy(), 51 () => new UCBNormalPolicy(), 52 () => new BoltzmannExplorationPolicy(0.1), 53 () => new BoltzmannExplorationPolicy(0.5), 54 () => new BoltzmannExplorationPolicy(1), 55 () => new BoltzmannExplorationPolicy(5), 56 () => new BoltzmannExplorationPolicy(10), 57 () => new BoltzmannExplorationPolicy(20), 58 () => new BoltzmannExplorationPolicy(100), 59 () => new ChernoffIntervalEstimationPolicy( 0.01), 60 () => new ChernoffIntervalEstimationPolicy( 0.05), 61 () => new ChernoffIntervalEstimationPolicy( 0.1), 62 () => new ChernoffIntervalEstimationPolicy( 0.2), 63 // (rand) => new ThresholdAscentPolicy(10, 0.01), 64 // (rand) => new ThresholdAscentPolicy(10, 0.05), 65 // (rand) => new ThresholdAscentPolicy(10, 0.1), 66 // (rand) => new ThresholdAscentPolicy(10, 0.2), 67 // (rand) => new ThresholdAscentPolicy(100, 0.01), 68 // (rand) => new ThresholdAscentPolicy(100, 0.05), 69 // (rand) => new ThresholdAscentPolicy(100, 0.1), 70 // (rand) => new ThresholdAscentPolicy(100, 0.2), 71 // (rand) => new ThresholdAscentPolicy(1000, 0.01), 72 // (rand) => new ThresholdAscentPolicy(1000, 0.05), 73 // (rand) => new ThresholdAscentPolicy(1000, 0.1), 74 // (rand) => new ThresholdAscentPolicy(1000, 0.2), 75 // (rand) => new ThresholdAscentPolicy(5000, 0.01), 76 // (rand) => new ThresholdAscentPolicy(10000, 0.01), 73 77 }; 74 78 75 var tasks = new List<Task>(); 76 foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) { 77 foreach (var policyFactory in policyFactories) { 78 var myPolicyFactory = policyFactory; 79 var myRandomTries = randomTries; 80 var localRand = new Random(localRandSeed); 81 var options = new ParallelOptions(); 82 options.MaxDegreeOfParallelism = 1; 83 Parallel.For(0, reps, options, (i) => { 84 //var t = Task.Run(() => { 85 Random myLocalRand; 86 lock (localRand) 87 myLocalRand = new Random(localRand.Next()); 88 89 //for (int i = 0; i < reps; i++) { 90 91 int iterations = 0; 92 var sw = new Stopwatch(); 93 var globalStatistics = new SentenceSetStatistics(); 94 95 var problem = new SymbolicRegressionPoly10Problem(); 96 //var problem = new SantaFeAntProblem(); 97 //var problem = new PalindromeProblem(); 98 //var problem = new HardPalindromeProblem(); 99 //var problem = new RoyalPairProblem(); 100 //var problem = new EvenParityProblem(); 101 var alg = new MctsSampler(problem, 25, myLocalRand, myRandomTries, myPolicyFactory); 102 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); 103 //var alg = new AlternativesContextSampler(problem, 25); 104 105 alg.SolutionEvaluated += (sentence, quality) => { 106 iterations++; 107 globalStatistics.AddSentence(sentence, quality); 108 if (iterations % 10000 == 0) { 109 Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, myPolicyFactory(myLocalRand, 1), globalStatistics); 110 } 111 }; 112 113 sw.Start(); 114 115 alg.Run(maxIterations); 116 117 sw.Stop(); 118 //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics); 119 //} 120 //}); 121 //tasks.Add(t); 122 }); 79 foreach (var problem in new Tuple<IProblem, int>[] 80 { 81 Tuple.Create((IProblem)new SantaFeAntProblem(), 17), 82 Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23), 83 }) 84 foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) { 85 foreach (var policy in policies) { 86 var myRandomTries = randomTries; 87 var localRand = new Random(localRandSeed); 88 var options = new ParallelOptions(); 89 options.MaxDegreeOfParallelism = 1; 90 Parallel.For(0, reps, options, (i) => { 91 //var t = Task.Run(() => { 92 Random myLocalRand; 93 lock (localRand) 94 myLocalRand = new Random(localRand.Next()); 95 96 //for (int i = 0; i < reps; i++) { 97 98 int iterations = 0; 99 var globalStatistics = new SentenceSetStatistics(); 100 101 // var problem = new SymbolicRegressionPoly10Problem(); 102 // var problem = new SantaFeAntProblem(); 103 //var problem = new PalindromeProblem(); 104 //var problem = new HardPalindromeProblem(); 105 //var problem = new RoyalPairProblem(); 106 //var problem = new EvenParityProblem(); 107 var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each experiment 108 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); 109 //var alg = new AlternativesContextSampler(problem, 25); 110 111 alg.SolutionEvaluated += (sentence, quality) => { 112 iterations++; 113 globalStatistics.AddSentence(sentence, quality); 114 if (iterations % 10000 == 0) { 115 Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, policy(), globalStatistics); 116 } 117 }; 118 119 120 alg.Run(maxIterations); 121 122 //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics); 123 //} 124 //}); 125 //tasks.Add(t); 126 }); 127 } 123 128 } 124 }125 129 //Task.WaitAll(tasks.ToArray()); 126 130 } 127 131 128 132 private static void RunDemo() { 133 // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!) 134 // TODO: implement GaussianWithUnknownMeanAndVariance Model for Thompson Sampling (verify with unit test if correct mean and variance is identified) 135 // TODO: separate value function from policy 136 // TODO: debug and verify implementation variants of Gaussian Thompson Sampling with unit test 137 // TODO: refactor Policies to use banditInfos (policies are factories for bandit infos and bandit info only has an update routine, each policy works only with it's type of banditinfo) 138 // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents 139 // TODO: exhaustive search with priority list 129 140 // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling 130 141 // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem? … … 133 144 // TODO: research thompson sampling for max bandit? 134 145 // TODO: ausführlicher test von strategien für k-armed max bandit 135 // TODO: verify TA implementation using example from the original paper 136 // TODO: reference HL.ProblemInstances and try on tower dataset 146 // TODO: verify TA implementation using example from the original paper 137 147 // TODO: compare results for different policies also for the symb-reg problem 138 148 // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence) … … 144 154 // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen 145 155 // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen 146 147 148 int maxIterations = 10000000; 156 // TODO: constant optimization 157 158 159 int maxIterations = 100000; 149 160 int iterations = 0; 150 161 var sw = new Stopwatch(); … … 154 165 var random = new Random(); 155 166 156 //var problem = new SymbolicRegressionPoly10Problem(); 157 var problem = new SantaFeAntProblem(); 167 var problem = new SymbolicRegressionPoly10Problem(); 168 //var problem = new SantaFeAntProblem(); // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); 169 //var problem = new SymbolicRegressionProblem("Tower"); // very good results e.g. new EpsGreedyPolicy(0.2) using max reward as quality !!! 158 170 //var problem = new PalindromeProblem(); 159 171 //var problem = new HardPalindromeProblem(); 160 172 //var problem = new RoyalPairProblem(); 161 173 //var problem = new EvenParityProblem(); 162 //var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new GenericThompsonSamplingPolicy(rand, numActions, new GaussianModel(numActions, 0.5, 10)));174 var alg = new MctsSampler(problem, 23, random, 10, new EpsGreedyPolicy(0.2)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) works well for Ant 163 175 //var alg = new ExhaustiveBreadthFirstSearch(problem, 17); 164 176 //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions)); 165 177 //var alg = new ExhaustiveDepthFirstSearch(problem, 17); 166 178 // var alg = new AlternativesSampler(problem, 17); 167 var alg = new RandomSearch(problem, random, 17); 179 // var alg = new RandomSearch(problem, random, 17); 180 // var alg = new ExhaustiveRandomFirstSearch(problem, random, 17); 168 181 169 182 alg.FoundNewBestSolution += (sentence, quality) => { 170 183 bestQuality = quality; 171 184 bestSentence = sentence; 172 Console.WriteLine("{0, 10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);185 Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); 173 186 }; 174 187 alg.SolutionEvaluated += (sentence, quality) => { … … 176 189 globalStatistics.AddSentence(sentence, quality); 177 190 if (iterations % 1000 == 0) { 178 //alg.PrintStats();191 alg.PrintStats(); 179 192 } 180 193 if (iterations % 10000 == 0) { 181 194 //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence); 182 195 //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); 183 Console.WriteLine( globalStatistics);196 Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); 184 197 } 185 198 };
Note: See TracChangeset
for help on using the changeset viewer.