Changeset 11732


Ignore:
Timestamp:
01/07/15 09:21:46 (5 years ago)
Author:
gkronber
Message:

#2283: refactoring and bug fixes

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization
Files:
16 added
45 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/GrammaticalOptimization.sln

    r11727 r11732  
    1313EndProject
    1414Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Common", "HeuristicLab.Common\HeuristicLab.Common.csproj", "{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}"
     15EndProject
     16Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Problems.GrammaticalOptimization.SymbReg", "HeuristicLab.Problems.GrammaticalOptimization.SymbReg\HeuristicLab.Problems.GrammaticalOptimization.SymbReg.csproj", "{17A7A380-86CE-482D-8D22-CBD70CC97F0D}"
    1517EndProject
    1618Global
     
    4446    {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Release|Any CPU.ActiveCfg = Release|Any CPU
    4547    {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Release|Any CPU.Build.0 = Release|Any CPU
     48    {17A7A380-86CE-482D-8D22-CBD70CC97F0D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
     49    {17A7A380-86CE-482D-8D22-CBD70CC97F0D}.Debug|Any CPU.Build.0 = Debug|Any CPU
     50    {17A7A380-86CE-482D-8D22-CBD70CC97F0D}.Release|Any CPU.ActiveCfg = Release|Any CPU
     51    {17A7A380-86CE-482D-8D22-CBD70CC97F0D}.Release|Any CPU.Build.0 = Release|Any CPU
    4652  EndGlobalSection
    4753  GlobalSection(SolutionProperties) = preSolution
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

    r11730 r11732  
    3131  </PropertyGroup>
    3232  <ItemGroup>
     33    <Reference Include="ALGLIB-3.7.0">
     34      <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath>
     35    </Reference>
    3336    <Reference Include="System" />
    3437    <Reference Include="System.Core" />
     
    4245    <Compile Include="BanditHelper.cs" />
    4346    <Compile Include="Bandits\BernoulliBandit.cs" />
     47    <Compile Include="Bandits\GaussianBandit.cs" />
    4448    <Compile Include="Bandits\GaussianMixtureBandit.cs" />
    4549    <Compile Include="Bandits\IBandit.cs" />
    4650    <Compile Include="Bandits\TruncatedNormalBandit.cs" />
     51    <Compile Include="OnlineMeanAndVarianceEstimator.cs" />
     52    <Compile Include="IPolicyActionInfo.cs" />
    4753    <Compile Include="Models\BernoulliModel.cs" />
    4854    <Compile Include="Models\GaussianModel.cs" />
    49     <Compile Include="Models\GaussianMixtureModel.cs" />
    5055    <Compile Include="Models\IModel.cs" />
    51     <Compile Include="Policies\BanditPolicy.cs" />
    52     <Compile Include="Policies\BernoulliThompsonSamplingPolicy.cs" />
    53     <Compile Include="Policies\BoltzmannExplorationPolicy.cs" />
    54     <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs" />
    55     <Compile Include="Policies\GenericThompsonSamplingPolicy.cs" />
    56     <Compile Include="Policies\ThresholdAscentPolicy.cs" />
    57     <Compile Include="Policies\UCTPolicy.cs" />
    58     <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs" />
    59     <Compile Include="Policies\Exp3Policy.cs" />
    60     <Compile Include="Policies\EpsGreedyPolicy.cs" />
     56    <Compile Include="Policies\BernoulliThompsonSamplingPolicy.cs">
     57      <SubType>Code</SubType>
     58    </Compile>
     59    <Compile Include="Policies\BoltzmannExplorationPolicy.cs">
     60      <SubType>Code</SubType>
     61    </Compile>
     62    <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs">
     63      <SubType>Code</SubType>
     64    </Compile>
     65    <Compile Include="Policies\BernoulliPolicyActionInfo.cs" />
     66    <Compile Include="Policies\ModelPolicyActionInfo.cs" />
     67    <Compile Include="Policies\EpsGreedyPolicy.cs">
     68      <SubType>Code</SubType>
     69    </Compile>
     70    <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs">
     71      <SubType>Code</SubType>
     72    </Compile>
     73    <Compile Include="Policies\GenericThompsonSamplingPolicy.cs">
     74      <SubType>Code</SubType>
     75    </Compile>
     76    <Compile Include="Policies\MeanAndVariancePolicyActionInfo.cs" />
     77    <Compile Include="Policies\DefaultPolicyActionInfo.cs" />
     78    <Compile Include="Policies\EmptyPolicyActionInfo.cs" />
    6179    <Compile Include="Policies\RandomPolicy.cs" />
    6280    <Compile Include="Policies\UCB1Policy.cs" />
    63     <Compile Include="Policies\UCB1TunedPolicy.cs" />
    64     <Compile Include="Policies\UCBNormalPolicy.cs" />
    6581    <Compile Include="IPolicy.cs" />
     82    <Compile Include="Policies\UCB1TunedPolicy.cs">
     83      <SubType>Code</SubType>
     84    </Compile>
     85    <Compile Include="Policies\UCBNormalPolicy.cs">
     86      <SubType>Code</SubType>
     87    </Compile>
     88    <Compile Include="Policies\UCTPolicy.cs">
     89      <SubType>Code</SubType>
     90    </Compile>
    6691    <Compile Include="Properties\AssemblyInfo.cs" />
    6792  </ItemGroup>
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs

    r11730 r11732  
    88  // this interface represents a policy for reinforcement learning
    99  public interface IPolicy {
    10     IEnumerable<int> Actions { get; }
    11     int SelectAction(); // action selection ...
    12     void UpdateReward(int action, double reward); // ... and reward update are defined as usual
    13 
    14     // policies must also support disabling of potential actions
    15     // for instance if we know that an action in a state has a deterministic
    16     // reward we need to sample it only once
    17     // it is necessary to sample an action only once
    18     void DisableAction(int action);
    19 
    20     // reset causes the policy to be reinitialized to it's initial state (as after constructor-call)
    21     void Reset();
    22 
    23     void PrintStats();
     10    int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos);
     11    IPolicyActionInfo CreateActionInfo();
    2412  }
    2513}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

    r11730 r11732  
    99namespace HeuristicLab.Algorithms.Bandits.Models {
    1010  public class BernoulliModel : IModel {
    11     private readonly int numActions;
    12     private readonly int[] success;
    13     private readonly int[] failure;
     11    private int success;
     12    private int failure;
    1413
    1514    // parameters of beta prior distribution
     
    1716    private readonly double beta;
    1817
    19     public BernoulliModel(int numActions, double alpha = 1.0, double beta = 1.0) {
    20       this.numActions = numActions;
    21       this.success = new int[numActions];
    22       this.failure = new int[numActions];
     18    public BernoulliModel(double alpha = 1.0, double beta = 1.0) {
    2319      this.alpha = alpha;
    2420      this.beta = beta;
    2521    }
    2622
    27 
    28     public double[] SampleExpectedRewards(Random random) {
     23    public double SampleExpectedReward(Random random) {
    2924      // sample bernoulli mean from beta prior
    30       var theta = new double[numActions];
    31       for (int a = 0; a < numActions; a++) {
    32         if (success[a] == -1)
    33           theta[a] = 0.0;
    34         else {
    35           theta[a] = Rand.BetaRand(random, success[a] + alpha, failure[a] + beta);
    36         }
    37       }
    38 
    39       // no need to sample we know the exact expected value
    40       // the expected value of a bernoulli variable is just theta
    41       return theta.Select(t => t).ToArray();
     25      return Rand.BetaRand(random, success + alpha, failure + beta);
    4226    }
    4327
    44     public void Update(int action, double reward) {
    45       const double EPSILON = 1E-6;
    46       Debug.Assert(Math.Abs(reward - 0.0) < EPSILON || Math.Abs(reward - 1.0) < EPSILON);
    47       if (Math.Abs(reward - 1.0) < EPSILON) {
    48         success[action]++;
     28    public void Update(double reward) {
     29      Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0));
     30      if (reward.IsAlmost(1.0)) {
     31        success++;
    4932      } else {
    50         failure[action]++;
     33        failure++;
    5134      }
    5235    }
    5336
    54     public void Disable(int action) {
    55       success[action] = -1;
    56     }
    57 
    5837    public void Reset() {
    59       Array.Clear(success, 0, numActions);
    60       Array.Clear(failure, 0, numActions);
     38      success = 0;
     39      failure = 0;
    6140    }
    6241
    6342    public void PrintStats() {
    64       for (int i = 0; i < numActions; i++) {
    65         Console.Write("{0:F2} ", success[i] / (double)failure[i]);
    66       }
     43      Console.Write("{0:F2} ", success / (double)failure);
     44    }
     45
     46    public object Clone() {
     47      return new BernoulliModel() { failure = this.failure, success = this.success };
    6748    }
    6849  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs

    r11730 r11732  
    11using System;
    2 using System.Collections.Generic;
    3 using System.Diagnostics;
    4 using System.Linq;
    5 using System.Text;
    6 using System.Threading.Tasks;
    72using HeuristicLab.Common;
    83
    94namespace HeuristicLab.Algorithms.Bandits.Models {
    10   // bayesian estimation of a Gaussian with unknown mean and known variance
     5  // bayesian estimation of a Gaussian with
     6  // 1) unknown mean and known variance
     7  // 2) unknown mean and unknown variance
    118  public class GaussianModel : IModel {
    12     private readonly int numActions;
    13     private readonly int[] tries;
    14     private readonly double[] sumRewards;
    15 
     9    private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
    1610
    1711    // parameters of Gaussian prior for mean
     
    1913    private readonly double meanPriorVariance;
    2014
     15    private readonly bool knownVariance;
    2116    private readonly double rewardVariance = 0.1; // assumed know reward variance
    2217
    23     public GaussianModel(int numActions, double meanPriorMu, double meanPriorVariance) {
    24       this.numActions = numActions;
    25       this.tries = new int[numActions];
    26       this.sumRewards = new double[numActions];
     18    // parameters of Gamma prior for precision (= inverse variance)
     19    private readonly int precisionPriorAlpha;
     20    private readonly double precisionPriorBeta;
     21
     22    // non-informative prior
     23    private const double priorK = 1.0;
     24
     25    // this constructor assumes the variance is known
     26    public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) {
    2727      this.meanPriorMu = meanPriorMu;
    2828      this.meanPriorVariance = meanPriorVariance;
     29
     30      this.knownVariance = true;
     31      this.rewardVariance = rewardVariance;
     32    }
     33
     34    // this constructor assumes the variance is also unknown
     35    // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89
     36    public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) {
     37      this.meanPriorMu = meanPriorMu;
     38      this.meanPriorVariance = meanPriorVariance;
     39
     40      this.knownVariance = false;
     41      this.precisionPriorAlpha = precisionPriorAlpha;
     42      this.precisionPriorBeta = precisionPriorBeta;
    2943    }
    3044
    3145
    32     public double[] SampleExpectedRewards(Random random) {
     46    public double SampleExpectedReward(Random random) {
     47      if (knownVariance) {
     48        return SampleExpectedRewardKnownVariance(random);
     49      } else {
     50        return SampleExpectedRewardUnknownVariance(random);
     51      }
     52    }
     53
     54    private double SampleExpectedRewardKnownVariance(Random random) {
    3355      // expected values for reward
    34       var theta = new double[numActions];
     56      // calculate posterior mean and variance (for mean reward)
    3557
    36       for (int a = 0; a < numActions; a++) {
    37         if (tries[a] == -1) {
    38           theta[a] = double.NegativeInfinity; // disabled action
    39         } else {
    40           // calculate posterior mean and variance (for mean reward)
     58      // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
     59      var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
     60      var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
    4161
    42           // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
    43           var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / meanPriorVariance);
    44           var posteriorMean = posteriorVariance * (meanPriorMu / meanPriorVariance + sumRewards[a] / rewardVariance);
     62      // sample a mean from the posterior
     63      var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean;
     64      // theta already represents the expected reward value => nothing else to do
     65      return posteriorMeanSample;
    4566
    46           // sample a mean from the posterior
    47           theta[a] = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
    48           // theta already represents the expected reward value => nothing else to do
    49         }
     67      // return 0.99-quantile value
     68      //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean;
     69    }
     70
     71    // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
     72    private double SampleExpectedRewardUnknownVariance(Random random) {
     73
     74      var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
     75      var posteriorK = priorK + estimator.N;
     76      var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
     77      double posteriorBeta;
     78      if (estimator.N > 0) {
     79        posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
     80      } else {
     81        posteriorBeta = precisionPriorBeta;
    5082      }
    5183
     84      // sample from the posterior marginal for mu (expected value) equ. 91
     85      // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
     86
     87      // sample from Tk distribution : http://stats.stackexchange.com/a/70270
     88      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
     89
     90      var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
    5291      return theta;
     92
     93      //return alglib.invnormaldistribution(random.NextDouble()) * + theta;
     94      //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean;
    5395    }
    5496
    55     public void Update(int action, double reward) {
    56       sumRewards[action] += reward;
    57       tries[action]++;
    58     }
    5997
    60     public void Disable(int action) {
    61       tries[action] = -1;
    62       sumRewards[action] = 0.0;
     98    public void Update(double reward) {
     99      estimator.UpdateReward(reward);
    63100    }
    64101
    65102    public void Reset() {
    66       Array.Clear(tries, 0, numActions);
    67       Array.Clear(sumRewards, 0, numActions);
     103      estimator.Reset();
    68104    }
    69105
    70106    public void PrintStats() {
    71       for (int i = 0; i < numActions; i++) {
    72         Console.Write("{0:F2} ", sumRewards[i] / (double)tries[i]);
    73       }
     107      Console.Write("{0:F2} ", estimator.Avg);
     108    }
     109
     110    public object Clone() {
     111      if (knownVariance)
     112        return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance);
     113      else
     114        return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
    74115    }
    75116  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/IModel.cs

    r11730 r11732  
    66
    77namespace HeuristicLab.Algorithms.Bandits {
    8   public interface IModel {
    9     double[] SampleExpectedRewards(Random random);
    10     void Update(int action, double reward);
    11     void Disable(int action);
     8  // represents a model for the reward distribution (of an action given a state)
     9  public interface IModel : ICloneable {
     10    double SampleExpectedReward(Random random);
     11    void Update(double reward);
    1212    void Reset();
    1313    void PrintStats();
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BanditPolicy.cs

    r11730 r11732  
    77
    88namespace HeuristicLab.Algorithms.Bandits {
    9   public abstract class BanditPolicy : IPolicy {
     9  public abstract class BanditPolicy<TPolicyActionInfo> : IPolicy<TPolicyActionInfo> where TPolicyActionInfo : IPolicyActionInfo {
    1010    public IEnumerable<int> Actions { get; private set; }
    1111    private readonly int numInitialActions;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BernoulliThompsonSamplingPolicy.cs

    r11730 r11732  
    88
    99namespace HeuristicLab.Algorithms.Bandits {
    10   public class BernoulliThompsonSamplingPolicy : BanditPolicy {
    11     private readonly Random random;
    12     private readonly int[] success;
    13     private readonly int[] failure;
    14 
     10  public class BernoulliThompsonSamplingPolicy : IPolicy {
    1511    // parameters of beta prior distribution
    1612    private readonly double alpha = 1.0;
    1713    private readonly double beta = 1.0;
    1814
    19     public BernoulliThompsonSamplingPolicy(Random random, int numActions)
    20       : base(numActions) {
    21       this.random = random;
    22       this.success = new int[numActions];
    23       this.failure = new int[numActions];
    24     }
     15    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     16      var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>(); // TODO: performance
     17      int bestAction = -1;
     18      double maxTheta = double.NegativeInfinity;
     19      var aIdx = -1;
    2520
    26     public override int SelectAction() {
    27       Debug.Assert(Actions.Any());
    28       var maxTheta = double.NegativeInfinity;
    29       int bestAction = -1;
    30       foreach (var a in Actions) {
    31         var theta = Rand.BetaRand(random, success[a] + alpha, failure[a] + beta);
     21      foreach (var aInfo in myActionInfos) {
     22        aIdx++;
     23        if (aInfo.Disabled) continue;
     24        var theta = Rand.BetaRand(random, aInfo.NumSuccess + alpha, aInfo.NumFailure + beta);
    3225        if (theta > maxTheta) {
    3326          maxTheta = theta;
    34           bestAction = a;
     27          bestAction = aIdx;
    3528        }
    3629      }
     30      Debug.Assert(bestAction > -1);
    3731      return bestAction;
    3832    }
    3933
    40     public override void UpdateReward(int action, double reward) {
    41       Debug.Assert(Actions.Contains(action));
    42 
    43       if (reward > 0) success[action]++;
    44       else failure[action]++;
     34    public IPolicyActionInfo CreateActionInfo() {
     35      return new BernoulliPolicyActionInfo();
    4536    }
    4637
    47     public override void DisableAction(int action) {
    48       base.DisableAction(action);
    49       success[action] = -1;
    50     }
    51 
    52     public override void Reset() {
    53       base.Reset();
    54       Array.Clear(success, 0, success.Length);
    55       Array.Clear(failure, 0, failure.Length);
    56     }
    57 
    58     public override void PrintStats() {
    59       for (int i = 0; i < success.Length; i++) {
    60         if (success[i] >= 0) {
    61           Console.Write("{0,5:F2}", success[i] / failure[i]);
    62         } else {
    63           Console.Write("{0,5}", "");
    64         }
    65       }
    66       Console.WriteLine();
    67     }
    6838
    6939    public override string ToString() {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BoltzmannExplorationPolicy.cs

    r11730 r11732  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    89namespace HeuristicLab.Algorithms.Bandits {
    910  // also called softmax policy
    10   public class BoltzmannExplorationPolicy : BanditPolicy {
    11     private readonly Random random;
    12     private readonly double eps;
    13     private readonly int[] tries;
    14     private readonly double[] sumReward;
     11  public class BoltzmannExplorationPolicy : IPolicy {
    1512    private readonly double beta;
    1613
    17     public BoltzmannExplorationPolicy(Random random, int numActions, double beta)
    18       : base(numActions) {
     14    public BoltzmannExplorationPolicy(double beta) {
    1915      if (beta < 0) throw new ArgumentException();
    20       this.random = random;
    2116      this.beta = beta;
    22       this.tries = new int[numActions];
    23       this.sumReward = new double[numActions];
     17    }
     18    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     19      Debug.Assert(actionInfos.Any());
     20
     21      // select best
     22      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
     23      Debug.Assert(myActionInfos.Any(a => !a.Disabled));
     24      double[] w = new double[myActionInfos.Length];
     25
     26      for (int a = 0; a < myActionInfos.Length; a++) {
     27        if (myActionInfos[a].Disabled) {
     28          w[a] = 0; continue;
     29        }
     30        if (myActionInfos[a].Tries == 0) return a;
     31        var sumReward = myActionInfos[a].SumReward;
     32        var tries = myActionInfos[a].Tries;
     33        var avgReward = sumReward / tries;
     34        w[a] = Math.Exp(beta * avgReward);
     35      }
     36
     37
     38      var bestAction = Enumerable.Range(0, w.Length).SampleProportional(random, w).First();
     39      Debug.Assert(bestAction >= 0);
     40      Debug.Assert(bestAction < w.Length);
     41      Debug.Assert(!myActionInfos[bestAction].Disabled);
     42      return bestAction;
    2443    }
    2544
    26     public override int SelectAction() {
    27       Debug.Assert(Actions.Any());
    28       // select best
    29       var maxReward = double.NegativeInfinity;
    30       int bestAction = -1;
    31       if (Actions.Any(a => tries[a] == 0))
    32         return Actions.First(a => tries[a] == 0);
    33 
    34       var ts = Actions.Select(a => Math.Exp(beta * sumReward[a] / tries[a]));
    35       var r = random.NextDouble() * ts.Sum();
    36 
    37       var agg = 0.0;
    38       foreach (var p in Actions.Zip(ts, Tuple.Create)) {
    39         agg += p.Item2;
    40         if (agg >= r) return p.Item1;
    41       }
    42       throw new InvalidProgramException();
    43     }
    44     public override void UpdateReward(int action, double reward) {
    45       Debug.Assert(Actions.Contains(action));
    46 
    47       tries[action]++;
    48       sumReward[action] += reward;
    49     }
    50 
    51     public override void DisableAction(int action) {
    52       base.DisableAction(action);
    53       sumReward[action] = 0;
    54       tries[action] = -1;
    55     }
    56 
    57     public override void Reset() {
    58       base.Reset();
    59       Array.Clear(tries, 0, tries.Length);
    60       Array.Clear(sumReward, 0, sumReward.Length);
    61     }
    62 
    63     public override void PrintStats() {
    64       for (int i = 0; i < sumReward.Length; i++) {
    65         if (tries[i] >= 0) {
    66           Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
    67         } else {
    68           Console.Write("{0,5}", "");
    69         }
    70       }
    71       Console.WriteLine();
     45    public IPolicyActionInfo CreateActionInfo() {
     46      return new DefaultPolicyActionInfo();
    7247    }
    7348
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/ChernoffIntervalEstimationPolicy.cs

    r11730 r11732  
    1010International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */
    1111
    12   public class ChernoffIntervalEstimationPolicy : BanditPolicy {
    13     private readonly int[] tries;
    14     private readonly double[] sumReward;
    15     private int totalTries = 0;
     12  public class ChernoffIntervalEstimationPolicy : IPolicy {
    1613    private readonly double delta;
    1714
    18     public ChernoffIntervalEstimationPolicy(int numActions, double delta = 0.01)
    19       : base(numActions) {
     15    public ChernoffIntervalEstimationPolicy(double delta = 0.01) {
    2016      this.delta = delta;
    21       this.tries = new int[numActions];
    22       this.sumReward = new double[numActions];
    2317    }
    24 
    25     public override int SelectAction() {
     18    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     19      Debug.Assert(actionInfos.Any());
     20      // select best
     21      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
     22      int k = myActionInfos.Length;
     23      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    2624      int bestAction = -1;
    2725      double bestQ = double.NegativeInfinity;
    28       double k = Actions.Count();
    29       Debug.Assert(k > 0);
    30       foreach (var a in Actions) {
    31         if (tries[a] == 0) return a;
     26      for (int a = 0; a < myActionInfos.Length; a++) {
     27        if (myActionInfos[a].Disabled) continue;
     28        if (myActionInfos[a].Tries == 0) return a;
     29
     30        var sumReward = myActionInfos[a].SumReward;
     31        var tries = myActionInfos[a].Tries;
     32
     33        var avgReward = sumReward / tries;
     34
    3235        // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
    3336        // var alpha = Math.Log(2 * totalTries * k / delta);
    3437        double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
    35         double mu = sumReward[a] / tries[a];
    36         var q = mu + (alpha + Math.Sqrt(2 * tries[a] * mu * alpha + alpha * alpha)) / tries[a];
     38        var q = avgReward + (alpha + Math.Sqrt(2 * tries * avgReward * alpha + alpha * alpha)) / tries;
    3739        if (q > bestQ) {
    3840          bestQ = q;
     
    4042        }
    4143      }
     44      Debug.Assert(bestAction >= 0);
    4245      return bestAction;
    4346    }
    44     public override void UpdateReward(int action, double reward) {
    45       Debug.Assert(Actions.Contains(action));
    46       totalTries++;
    47       tries[action]++;
    48       sumReward[action] += reward;
     47
     48    public IPolicyActionInfo CreateActionInfo() {
     49      return new DefaultPolicyActionInfo();
    4950    }
    5051
    51     public override void DisableAction(int action) {
    52       base.DisableAction(action);
    53       totalTries -= tries[action];
    54       tries[action] = -1;
    55       sumReward[action] = 0;
    56     }
    57 
    58     public override void Reset() {
    59       base.Reset();
    60       totalTries = 0;
    61       Array.Clear(tries, 0, tries.Length);
    62       Array.Clear(sumReward, 0, sumReward.Length);
    63     }
    64 
    65     public override void PrintStats() {
    66       for (int i = 0; i < sumReward.Length; i++) {
    67         if (tries[i] >= 0) {
    68           Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
    69         } else {
    70           Console.Write("{0,5}", "");
    71         }
    72       }
    73       Console.WriteLine();
    74     }
    7552    public override string ToString() {
    7653      return string.Format("ChernoffIntervalEstimationPolicy({0:F2})", delta);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs

    r11730 r11732  
    77
    88namespace HeuristicLab.Algorithms.Bandits {
    9   public class EpsGreedyPolicy : BanditPolicy {
    10     private readonly Random random;
     9  public class EpsGreedyPolicy : IPolicy {
    1110    private readonly double eps;
    12     private readonly int[] tries;
    13     private readonly double[] sumReward;
    1411    private readonly RandomPolicy randomPolicy;
    1512
    16     public EpsGreedyPolicy(Random random, int numActions, double eps)
    17       : base(numActions) {
    18       this.random = random;
     13    public EpsGreedyPolicy(double eps) {
    1914      this.eps = eps;
    20       this.randomPolicy = new RandomPolicy(random, numActions);
    21       this.tries = new int[numActions];
    22       this.sumReward = new double[numActions];
     15      this.randomPolicy = new RandomPolicy();
    2316    }
    24 
    25     public override int SelectAction() {
    26       Debug.Assert(Actions.Any());
     17    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     18      Debug.Assert(actionInfos.Any());
    2719      if (random.NextDouble() > eps) {
    2820        // select best
    29         var bestQ = double.NegativeInfinity;
     21        var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    3022        int bestAction = -1;
    31         foreach (var a in Actions) {
    32           if (tries[a] == 0) return a;
    33           var q = sumReward[a] / tries[a];
    34           if (bestQ < q) {
     23        double bestQ = double.NegativeInfinity;
     24        int aIdx = -1;
     25        foreach (var aInfo in myActionInfos) {
     26
     27          aIdx++;
     28          if (aInfo.Disabled) continue;
     29          if (aInfo.Tries == 0) return aIdx;
     30
     31
     32          var avgReward = aInfo.SumReward / aInfo.Tries;         
     33          //var q = avgReward;
     34          var q = aInfo.MaxReward;
     35          if (q > bestQ) {
    3536            bestQ = q;
    36             bestAction = a;
     37            bestAction = aIdx;
    3738          }
    3839        }
     
    4142      } else {
    4243        // select random
    43         return randomPolicy.SelectAction();
     44        return randomPolicy.SelectAction(random, actionInfos);
    4445      }
    4546    }
    46     public override void UpdateReward(int action, double reward) {
    47       Debug.Assert(Actions.Contains(action));
    4847
    49       randomPolicy.UpdateReward(action, reward); // does nothing
    50       tries[action]++;
    51       sumReward[action] += reward;
     48    public IPolicyActionInfo CreateActionInfo() {
     49      return new DefaultPolicyActionInfo();
    5250    }
    5351
    54     public override void DisableAction(int action) {
    55       base.DisableAction(action);
    56       randomPolicy.DisableAction(action);
    57       sumReward[action] = 0;
    58       tries[action] = -1;
    59     }
    6052
    61     public override void Reset() {
    62       base.Reset();
    63       randomPolicy.Reset();
    64       Array.Clear(tries, 0, tries.Length);
    65       Array.Clear(sumReward, 0, sumReward.Length);
    66     }
    67     public override void PrintStats() {
    68       for (int i = 0; i < sumReward.Length; i++) {
    69         if (tries[i] >= 0) {
    70           Console.Write(" {0,5:F2} {1}", sumReward[i] / tries[i], tries[i]);
    71         } else {
    72           Console.Write("-", "");
    73         }
    74       }
    75       Console.WriteLine();
    76     }
    7753    public override string ToString() {
    7854      return string.Format("EpsGreedyPolicy({0:F2})", eps);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GaussianThompsonSamplingPolicy.cs

    r11730 r11732  
    11using System;
     2using System.Collections.Generic;
    23using System.Diagnostics;
    34using System.Linq;
     
    56
    67namespace HeuristicLab.Algorithms.Bandits {
    7  
    8   public class GaussianThompsonSamplingPolicy : BanditPolicy {
    9     private readonly Random random;
    10     private readonly double[] sampleMean;
    11     private readonly double[] sampleM2;
    12     private readonly int[] tries;
     8
     9  public class GaussianThompsonSamplingPolicy : IPolicy {
    1310    private bool compatibility;
    1411
     
    2118
    2219
    23     public GaussianThompsonSamplingPolicy(Random random, int numActions, bool compatibility = false)
    24       : base(numActions) {
    25       this.random = random;
    26       this.sampleMean = new double[numActions];
    27       this.sampleM2 = new double[numActions];
    28       this.tries = new int[numActions];
     20    public GaussianThompsonSamplingPolicy(bool compatibility = false) {
    2921      this.compatibility = compatibility;
    3022    }
    3123
     24    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     25      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
     26      int bestAction = -1;
     27      double bestQ = double.NegativeInfinity;
    3228
    33     public override int SelectAction() {
    34       Debug.Assert(Actions.Any());
    35       var maxTheta = double.NegativeInfinity;
    36       int bestAction = -1;
    37       foreach (var a in Actions) {
    38         if(tries[a] == -1) continue; // skip disabled actions
     29      int aIdx = -1;
     30      foreach (var aInfo in myActionInfos) {
     31        aIdx++;
     32        if (aInfo.Disabled) continue;
     33
     34        var tries = aInfo.Tries;
     35        var sampleMean = aInfo.AvgReward;
     36        var sampleVariance = aInfo.RewardVariance;
     37
    3938        double theta;
    4039        if (compatibility) {
    41           if (tries[a] < 2) return a;
    42           var mu = sampleMean[a];
    43           var variance = sampleM2[a] / tries[a];
     40          if (tries < 2) return aIdx;
     41          var mu = sampleMean;
     42          var variance = sampleVariance;
    4443          var stdDev = Math.Sqrt(variance);
    4544          theta = Rand.RandNormal(random) * stdDev + mu;
     
    4847
    4948          // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
    50           var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / priorVariance);
    51           var posteriorMean = posteriorVariance * (priorMean / priorVariance + tries[a] * sampleMean[a] / rewardVariance);
     49          var posteriorVariance = 1.0 / (tries / rewardVariance + 1.0 / priorVariance);
     50          var posteriorMean = posteriorVariance * (priorMean / priorVariance + tries * sampleMean / rewardVariance);
    5251
    5352          // sample a mean from the posterior
     
    5655          // theta already represents the expected reward value => nothing else to do
    5756        }
    58         if (theta > maxTheta) {
    59           maxTheta = theta;
    60           bestAction = a;
     57
     58        if (theta > bestQ) {
     59          bestQ = theta;
     60          bestAction = aIdx;
    6161        }
    6262      }
    63       Debug.Assert(Actions.Contains(bestAction));
     63      Debug.Assert(bestAction > -1);
    6464      return bestAction;
    6565    }
    6666
    67     public override void UpdateReward(int action, double reward) {
    68       Debug.Assert(Actions.Contains(action));
    69       tries[action]++;
    70       var delta = reward - sampleMean[action];
    71       sampleMean[action] += delta / tries[action];
    72       sampleM2[action] += sampleM2[action] + delta * (reward - sampleMean[action]);
     67    public IPolicyActionInfo CreateActionInfo() {
     68      return new MeanAndVariancePolicyActionInfo();
    7369    }
    7470
    75     public override void DisableAction(int action) {
    76       base.DisableAction(action);
    77       sampleMean[action] = 0;
    78       sampleM2[action] = 0;
    79       tries[action] = -1;
    80     }
    8171
    82     public override void Reset() {
    83       base.Reset();
    84       Array.Clear(sampleMean, 0, sampleMean.Length);
    85       Array.Clear(sampleM2, 0, sampleM2.Length);
    86       Array.Clear(tries, 0, tries.Length);
    87     }
     72    //public override void UpdateReward(int action, double reward) {
     73    //  Debug.Assert(Actions.Contains(action));
     74    //  tries[action]++;
     75    //  var delta = reward - sampleMean[action];
     76    //  sampleMean[action] += delta / tries[action];
     77    //  sampleM2[action] += sampleM2[action] + delta * (reward - sampleMean[action]);
     78    //}
    8879
    89     public override void PrintStats() {
    90       for (int i = 0; i < sampleMean.Length; i++) {
    91         if (tries[i] >= 0) {
    92           Console.Write(" {0,5:F2} {1}", sampleMean[i] / tries[i], tries[i]);
    93         } else {
    94           Console.Write("{0,5}", "");
    95         }
    96       }
    97       Console.WriteLine();
    98     }
    9980    public override string ToString() {
    10081      return "GaussianThompsonSamplingPolicy";
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GenericThompsonSamplingPolicy.cs

    r11730 r11732  
    88
    99namespace HeuristicLab.Algorithms.Bandits {
    10   public class GenericThompsonSamplingPolicy : BanditPolicy {
    11     private readonly Random random;
     10  public class GenericThompsonSamplingPolicy : IPolicy {
    1211    private readonly IModel model;
    1312
    14     public GenericThompsonSamplingPolicy(Random random, int numActions, IModel model)
    15       : base(numActions) {
    16       this.random = random;
     13    public GenericThompsonSamplingPolicy(IModel model) {
    1714      this.model = model;
    1815    }
    1916
    20     public override int SelectAction() {
    21       Debug.Assert(Actions.Any());
    22       var maxR = double.NegativeInfinity;
     17    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     18      var myActionInfos = actionInfos.OfType<ModelPolicyActionInfo>();
    2319      int bestAction = -1;
    24       var expRewards = model.SampleExpectedRewards(random);
    25       foreach (var a in Actions) {
    26         var r = expRewards[a];
    27         if (r > maxR) {
    28           maxR = r;
    29           bestAction = a;
     20      double bestQ = double.NegativeInfinity;
     21      var aIdx = -1;
     22      foreach (var aInfo in myActionInfos) {
     23        aIdx++;
     24        if (aInfo.Disabled) continue;
     25        //if (aInfo.Tries == 0) return aIdx;
     26        var q = aInfo.SampleExpectedReward(random);
     27        if (q > bestQ) {
     28          bestQ = q;
     29          bestAction = aIdx;
    3030        }
    3131      }
     32      Debug.Assert(bestAction > -1);
    3233      return bestAction;
    3334    }
    3435
    35     public override void UpdateReward(int action, double reward) {
    36       Debug.Assert(Actions.Contains(action));
    37 
    38       model.Update(action, reward);
    39     }
    40 
    41     public override void DisableAction(int action) {
    42       base.DisableAction(action);
    43       model.Disable(action);
    44     }
    45 
    46     public override void Reset() {
    47       base.Reset();
    48       model.Reset();
    49     }
    50 
    51     public override void PrintStats() {
    52       model.PrintStats();
     36    public IPolicyActionInfo CreateActionInfo() {
     37      return new ModelPolicyActionInfo((IModel)model.Clone());
    5338    }
    5439
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/RandomPolicy.cs

    r11730 r11732  
    88
    99namespace HeuristicLab.Algorithms.Bandits {
    10   public class RandomPolicy : BanditPolicy {
    11     private readonly Random random;
     10  public class RandomPolicy : IPolicy {
    1211
    13     public RandomPolicy(Random random, int numActions)
    14       : base(numActions) {
    15       this.random = random;
    16     }
    17 
    18     public override int SelectAction() {
    19       Debug.Assert(Actions.Any());
    20       return Actions.SelectRandom(random);
    21     }
    22     public override void UpdateReward(int action, double reward) {
    23       // do nothing
    24     }
    25     public override void PrintStats() {
    26       Console.WriteLine("Random");
    27     }
    2812    public override string ToString() {
    2913      return "RandomPolicy";
    3014    }
     15
     16    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     17      return actionInfos
     18        .Select((a, i) => Tuple.Create(a, i))
     19        .Where(p => !p.Item1.Disabled)
     20        .SelectRandom(random).Item2;
     21    }
     22
     23    public IPolicyActionInfo CreateActionInfo() {
     24      return new EmptyPolicyActionInfo();
     25    }
    3126  }
    3227}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs

    r11730 r11732  
    77
    88namespace HeuristicLab.Algorithms.Bandits {
    9   public class UCB1Policy : BanditPolicy {
    10     private readonly int[] tries;
    11     private readonly double[] sumReward;
    12     private int totalTries = 0;
    13     public UCB1Policy(int numActions)
    14       : base(numActions) {
    15       this.tries = new int[numActions];
    16       this.sumReward = new double[numActions];
    17     }
    18 
    19     public override int SelectAction() {
     9  public class UCB1Policy : IPolicy {
     10    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     11      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
    2012      int bestAction = -1;
    2113      double bestQ = double.NegativeInfinity;
    22       foreach (var a in Actions) {
    23         if (tries[a] == 0) return a;
    24         var q = sumReward[a] / tries[a] + Math.Sqrt((2 * Math.Log(totalTries)) / tries[a]);
     14      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
     15
     16      for (int a = 0; a < myActionInfos.Length; a++) {
     17        if (myActionInfos[a].Disabled) continue;
     18        if (myActionInfos[a].Tries == 0) return a;
     19        var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + Math.Sqrt((2 * Math.Log(totalTries)) / myActionInfos[a].Tries);
    2520        if (q > bestQ) {
    2621          bestQ = q;
     
    2823        }
    2924      }
     25      Debug.Assert(bestAction > -1);
    3026      return bestAction;
    3127    }
    32     public override void UpdateReward(int action, double reward) {
    33       Debug.Assert(Actions.Contains(action));
    34       totalTries++;
    35       tries[action]++;
    36       sumReward[action] += reward;
    37     }
    3828
    39     public override void DisableAction(int action) {
    40       base.DisableAction(action);
    41       totalTries -= tries[action];
    42       tries[action] = -1;
    43       sumReward[action] = 0;
    44     }
    45 
    46     public override void Reset() {
    47       base.Reset();
    48       totalTries = 0;
    49       Array.Clear(tries, 0, tries.Length);
    50       Array.Clear(sumReward, 0, sumReward.Length);
    51     }
    52     public override void PrintStats() {
    53       for (int i = 0; i < sumReward.Length; i++) {
    54         if (tries[i] >= 0) {
    55           Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
    56         } else {
    57           Console.Write("{0,5}", "");
    58         }
    59       }
    60       Console.WriteLine();
     29    public IPolicyActionInfo CreateActionInfo() {
     30      return new DefaultPolicyActionInfo();
    6131    }
    6232    public override string ToString() {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs

    r11730 r11732  
    77
    88namespace HeuristicLab.Algorithms.Bandits {
    9   public class UCB1TunedPolicy : BanditPolicy {
    10     private readonly int[] tries;
    11     private readonly double[] sumReward;
    12     private readonly double[] sumSqrReward;
    13     private int totalTries = 0;
    14     public UCB1TunedPolicy(int numActions)
    15       : base(numActions) {
    16       this.tries = new int[numActions];
    17       this.sumReward = new double[numActions];
    18       this.sumSqrReward = new double[numActions];
    19     }
     9  public class UCB1TunedPolicy : IPolicy {
    2010
    21     private double V(int arm) {
    22       var s = tries[arm];
    23       return sumSqrReward[arm] / s - Math.Pow(sumReward[arm] / s, 2) + Math.Sqrt(2 * Math.Log(totalTries) / s);
    24     }
    25 
    26 
    27     public override int SelectAction() {
    28       Debug.Assert(Actions.Any());
     11    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     12      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>().ToArray(); // TODO: performance
    2913      int bestAction = -1;
    3014      double bestQ = double.NegativeInfinity;
    31       foreach (var a in Actions) {
    32         if (tries[a] == 0) return a;
    33         var q = sumReward[a] / tries[a] + Math.Sqrt((Math.Log(totalTries) / tries[a]) * Math.Min(1.0 / 4, V(a))); // 1/4 is upper bound of bernoulli distributed variable
     15      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
     16
     17      for (int a = 0; a < myActionInfos.Length; a++) {
     18        if (myActionInfos[a].Disabled) continue;
     19        if (myActionInfos[a].Tries == 0) return a;
     20
     21        var sumReward = myActionInfos[a].SumReward;
     22        var tries = myActionInfos[a].Tries;
     23
     24        var avgReward = sumReward / tries;
     25        var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(myActionInfos[a], totalTries))); // 1/4 is upper bound of bernoulli distributed variable
    3426        if (q > bestQ) {
    3527          bestQ = q;
     
    3729        }
    3830      }
     31      Debug.Assert(bestAction > -1);
    3932      return bestAction;
    4033    }
    41     public override void UpdateReward(int action, double reward) {
    42       Debug.Assert(Actions.Contains(action));
    43       totalTries++;
    44       tries[action]++;
    45       sumReward[action] += reward;
    46       sumSqrReward[action] += reward * reward;
     34
     35    public IPolicyActionInfo CreateActionInfo() {
     36      return new MeanAndVariancePolicyActionInfo();
    4737    }
    4838
    49     public override void DisableAction(int action) {
    50       base.DisableAction(action);
    51       totalTries -= tries[action];
    52       tries[action] = -1;
    53       sumReward[action] = 0;
    54       sumSqrReward[action] = 0;
     39    private double V(MeanAndVariancePolicyActionInfo actionInfo, int totalTries) {
     40      var s = actionInfo.Tries;
     41      return actionInfo.RewardVariance + Math.Sqrt(2 * Math.Log(totalTries) / s);
    5542    }
    5643
    57     public override void Reset() {
    58       base.Reset();
    59       totalTries = 0;
    60       Array.Clear(tries, 0, tries.Length);
    61       Array.Clear(sumReward, 0, sumReward.Length);
    62       Array.Clear(sumSqrReward, 0, sumSqrReward.Length);
    63     }
    64     public override void PrintStats() {
    65       for (int i = 0; i < sumReward.Length; i++) {
    66         if (tries[i] >= 0) {
    67           Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
    68         } else {
    69           Console.Write("{0,5}", "");
    70         }
    71       }
    72       Console.WriteLine();
    73     }
    7444    public override string ToString() {
    7545      return "UCB1TunedPolicy";
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCBNormalPolicy.cs

    r11730 r11732  
    77
    88namespace HeuristicLab.Algorithms.Bandits {
    9   public class UCBNormalPolicy : BanditPolicy {
    10     private readonly int[] tries;
    11     private readonly double[] sumReward;
    12     private readonly double[] sumSqrReward;
    13     private int totalTries = 0;
    14     public UCBNormalPolicy(int numActions)
    15       : base(numActions) {
    16       this.tries = new int[numActions];
    17       this.sumReward = new double[numActions];
    18       this.sumSqrReward = new double[numActions];
    19     }
     9  public class UCBNormalPolicy : IPolicy {
    2010
    21     public override int SelectAction() {
    22       Debug.Assert(Actions.Any());
     11    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     12      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>().ToArray(); // TODO: performance
    2313      int bestAction = -1;
    2414      double bestQ = double.NegativeInfinity;
    25       foreach (var a in Actions) {
    26         if (totalTries <= 1 || tries[a] <= 1 || tries[a] <= Math.Ceiling(8 * Math.Log(totalTries))) return a;
    27         var avgReward = sumReward[a] / tries[a];
    28         var estVariance = 16 * ((sumSqrReward[a] - tries[a] * Math.Pow(avgReward, 2)) / (tries[a] - 1)) * (Math.Log(totalTries - 1) / tries[a]);
     15      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
     16
     17      for (int a = 0; a < myActionInfos.Length; a++) {
     18        if (myActionInfos[a].Disabled) continue;
     19        if (totalTries <= 1 || myActionInfos[a].Tries <= 1 || myActionInfos[a].Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return a;
     20
     21        var tries = myActionInfos[a].Tries;
     22        var avgReward = myActionInfos[a].AvgReward;
     23        var rewardVariance = myActionInfos[a].RewardVariance;
     24        var estVariance = 16 * rewardVariance * (Math.Log(totalTries - 1) / tries);
    2925        if (estVariance < 0) estVariance = 0; // numerical problems
    3026        var q = avgReward
     
    3531        }
    3632      }
    37       Debug.Assert(Actions.Contains(bestAction));
     33      Debug.Assert(bestAction > -1);
    3834      return bestAction;
    3935    }
    40     public override void UpdateReward(int action, double reward) {
    41       Debug.Assert(Actions.Contains(action));
    42       totalTries++;
    43       tries[action]++;
    44       sumReward[action] += reward;
    45       sumSqrReward[action] += reward * reward;
     36
     37    public IPolicyActionInfo CreateActionInfo() {
     38      return new MeanAndVariancePolicyActionInfo();
    4639    }
    4740
    48     public override void DisableAction(int action) {
    49       base.DisableAction(action);
    50       totalTries -= tries[action];
    51       tries[action] = -1;
    52       sumReward[action] = 0;
    53       sumSqrReward[action] = 0;
    54     }
    55 
    56     public override void Reset() {
    57       base.Reset();
    58       totalTries = 0;
    59       Array.Clear(tries, 0, tries.Length);
    60       Array.Clear(sumReward, 0, sumReward.Length);
    61       Array.Clear(sumSqrReward, 0, sumSqrReward.Length);
    62     }
    63     public override void PrintStats() {
    64       for (int i = 0; i < sumReward.Length; i++) {
    65         if (tries[i] >= 0) {
    66           Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
    67         } else {
    68           Console.Write("{0,5}", "");
    69         }
    70       }
    71       Console.WriteLine();
    72     }
    7341    public override string ToString() {
    7442      return "UCBNormalPolicy";
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCTPolicy.cs

    r11730 r11732  
    88namespace HeuristicLab.Algorithms.Bandits {
    99  /* Kocsis et al. Bandit based Monte-Carlo Planning */
    10   public class UCTPolicy : BanditPolicy {
    11     private readonly int[] tries;
    12     private readonly double[] sumReward;
    13     private int totalTries = 0;
     10  public class UCTPolicy : IPolicy {
    1411    private readonly double c;
    1512
    16     public UCTPolicy(int numActions, double c = 1.0)
    17       : base(numActions) {
    18       this.tries = new int[numActions];
    19       this.sumReward = new double[numActions];
     13    public UCTPolicy(double c = 1.0) {
    2014      this.c = c;
    2115    }
    2216
    23     public override int SelectAction() {
     17
     18    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     19      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
    2420      int bestAction = -1;
    2521      double bestQ = double.NegativeInfinity;
    26       foreach (var a in Actions) {
    27         if (tries[a] == 0) return a;
    28         var q = sumReward[a] / tries[a] + 2 * c * Math.Sqrt(Math.Log(totalTries) / tries[a]);
     22      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
     23
     24      for (int a = 0; a < myActionInfos.Length; a++) {
     25        if (myActionInfos[a].Disabled) continue;
     26        if (myActionInfos[a].Tries == 0) return a;
     27        var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + 2 * c * Math.Sqrt(Math.Log(totalTries) / myActionInfos[a].Tries);
    2928        if (q > bestQ) {
    3029          bestQ = q;
     
    3231        }
    3332      }
     33      Debug.Assert(bestAction > -1);
    3434      return bestAction;
    3535    }
    36     public override void UpdateReward(int action, double reward) {
    37       Debug.Assert(Actions.Contains(action));
    38       totalTries++;
    39       tries[action]++;
    40       sumReward[action] += reward;
     36
     37    public IPolicyActionInfo CreateActionInfo() {
     38      return new DefaultPolicyActionInfo();
    4139    }
    4240
    43     public override void DisableAction(int action) {
    44       base.DisableAction(action);
    45       totalTries -= tries[action];
    46       tries[action] = -1;
    47       sumReward[action] = 0;
    48     }
    49 
    50     public override void Reset() {
    51       base.Reset();
    52       totalTries = 0;
    53       Array.Clear(tries, 0, tries.Length);
    54       Array.Clear(sumReward, 0, sumReward.Length);
    55     }
    56     public override void PrintStats() {
    57       for (int i = 0; i < sumReward.Length; i++) {
    58         if (tries[i] >= 0) {
    59           Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
    60         } else {
    61           Console.Write("{0,5}", "");
    62         }
    63       }
    64       Console.WriteLine();
    65     }
    6641    public override string ToString() {
    6742      return string.Format("UCTPolicy({0:F2})", c);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs

    r11730 r11732  
    1717    private readonly Random random;
    1818    private readonly int contextLen;
    19     private readonly Func<Random, int, IPolicy> policyFactory;
     19    private readonly IPolicy policy;
    2020
    21     public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, Func<Random, int, IPolicy> policyFactory) {
     21    public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, IPolicy policy) {
    2222      this.maxLen = maxLen;
    2323      this.problem = problem;
    2424      this.random = random;
    2525      this.contextLen = contextLen;
    26       this.policyFactory = policyFactory;
     26      this.policy = policy;
    2727    }
    2828
     
    3232      for (int i = 0; i < maxIterations; i++) {
    3333        var sentence = SampleSentence(problem.Grammar).ToString();
    34         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
     34        var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
    3535        DistributeReward(quality);
    3636
     
    4545
    4646
    47     private Dictionary<string, IPolicy> ntPolicy;
     47    private Dictionary<string, IPolicyActionInfo[]> contextActionInfos;
    4848    private List<Tuple<string, int>> updateChain;
    4949
    5050    private void InitPolicies(IGrammar grammar) {
    51       this.ntPolicy = new Dictionary<string, IPolicy>();
     51      this.contextActionInfos = new Dictionary<string, IPolicyActionInfo[]>();
    5252      this.updateChain = new List<Tuple<string, int>>();
    5353    }
     
    8383          var lft = phrase.Subsequence(startIdx, endIdx - startIdx + 1).ToString();
    8484          lft = problem.Hash(lft);
    85           if (!ntPolicy.ContainsKey(lft)) {
    86             ntPolicy.Add(lft, policyFactory(random, g.GetAlternatives(nt).Count()));
     85          if (!contextActionInfos.ContainsKey(lft)) {
     86            contextActionInfos.Add(lft, g.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray());
    8787          }
    88           var selectedAltIdx = ntPolicy[lft].SelectAction();
     88          var selectedAltIdx = policy.SelectAction(random, contextActionInfos[lft]);
    8989          selectedAlt = alts.ElementAt(selectedAltIdx);
    9090          updateChain.Add(Tuple.Create(lft, selectedAltIdx));
     
    103103        var lft = e.Item1;
    104104        var action = e.Item2;
    105         ntPolicy[lft].UpdateReward(action, reward);
     105        contextActionInfos[lft][action].UpdateReward(reward);
    106106      }
    107107    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs

    r11730 r11732  
    1616    private readonly Random random;
    1717    private readonly IProblem problem;
     18    private readonly IPolicy policy;
    1819
    19     public AlternativesSampler(IProblem problem, int maxLen) {
     20    public AlternativesSampler(IProblem problem, IPolicy policy, int maxLen) {
    2021      this.problem = problem;
    2122      this.maxLen = maxLen;
    22       this.random = new Random(31415);
     23      this.random = new Random();
     24      this.policy = policy;
    2325    }
    2426
     
    2830      for (int i = 0; i < maxIterations; i++) {
    2931        var sentence = SampleSentence(problem.Grammar).ToString();
    30         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
     32        var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
    3133        DistributeReward(quality);
    3234
     
    4143
    4244
    43     private Dictionary<char, IPolicy> ntPolicy;
     45    private Dictionary<char, IPolicyActionInfo[]> ntActionInfos;
    4446    private List<Tuple<char, int>> updateChain;
    4547
    4648    private void InitPolicies(IGrammar grammar) {
    47       this.ntPolicy = new Dictionary<char, IPolicy>();
     49      this.ntActionInfos = new Dictionary<char, IPolicyActionInfo[]>();
    4850      this.updateChain = new List<Tuple<char, int>>();
    4951      foreach (var nt in grammar.NonTerminalSymbols) {
    50         ntPolicy.Add(nt, new EpsGreedyPolicy(random, grammar.GetAlternatives(nt).Count(), 0.1));
     52        ntActionInfos.Add(nt, grammar.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray());
    5153      }
    5254    }
     
    7779        } else {
    7880          // all alts are allowed => select using bandit policy
    79           var selectedAltIdx = ntPolicy[nt].SelectAction();
     81          var selectedAltIdx = policy.SelectAction(random, ntActionInfos[nt]);
    8082          selectedAlt = alts.ElementAt(selectedAltIdx);
    8183          updateChain.Add(Tuple.Create(nt, selectedAltIdx));
     
    9597        var nt = e.Item1;
    9698        var action = e.Item2;
    97         ntPolicy[nt].UpdateReward(action, reward);
     99        ntActionInfos[nt][action].UpdateReward(reward);
    98100      }
    99101    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveBreadthFirstSearch.cs

    r11730 r11732  
    2626      for (int i = 0; sentenceEnumerator.MoveNext() && i < maxIterations; i++) {
    2727        var sentence = sentenceEnumerator.Current.ToString();
    28         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
     28        var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
    2929        RaiseSolutionEvaluated(sentence, quality);
    3030
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveDepthFirstSearch.cs

    r11730 r11732  
    2626      for (int i = 0; sentenceEnumerator.MoveNext() && i < maxIterations; i++) {
    2727        var sentence = sentenceEnumerator.Current.ToString();
    28         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
     28        var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
    2929        RaiseSolutionEvaluated(sentence, quality);
    3030
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

    r11727 r11732  
    4545    <Compile Include="AlternativesSampler.cs" />
    4646    <Compile Include="AlternativesContextSampler.cs" />
     47    <Compile Include="ExhaustiveRandomFirstSearch.cs" />
    4748    <Compile Include="MctsSampler.cs" />
    4849    <Compile Include="ExhaustiveDepthFirstSearch.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

    r11730 r11732  
    1313      public int randomTries;
    1414      public int policyTries;
    15       public IPolicy policy;
     15      public IPolicyActionInfo actionInfo;
    1616      public TreeNode[] children;
    1717      public bool done = false;
     
    2222
    2323      public override string ToString() {
    24         return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, policy);
     24        return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, actionInfo);
    2525      }
    2626    }
     
    3434    private readonly Random random;
    3535    private readonly int randomTries;
    36     private readonly Func<Random, int, IPolicy> policyFactory;
     36    private readonly IPolicy policy;
    3737
    38     private List<Tuple<TreeNode, int>> updateChain;
     38    private List<TreeNode> updateChain;
    3939    private TreeNode rootNode;
    4040
     
    4242    public int treeSize;
    4343
    44     public MctsSampler(IProblem problem, int maxLen, Random random) :
    45       this(problem, maxLen, random, 10, (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1)) {
     44    // public MctsSampler(IProblem problem, int maxLen, Random random) :
     45    //   this(problem, maxLen, random, 10, (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1)) {
     46    //
     47    // }
    4648
    47     }
    48 
    49     public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, Func<Random, int, IPolicy> policyFactory) {
     49    public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IPolicy policy) {
    5050      this.maxLen = maxLen;
    5151      this.problem = problem;
    5252      this.random = random;
    5353      this.randomTries = randomTries;
    54       this.policyFactory = policyFactory;
     54      this.policy = policy;
    5555    }
    5656
     
    6060      for (int i = 0; !rootNode.done && i < maxIterations; i++) {
    6161        var sentence = SampleSentence(problem.Grammar).ToString();
    62         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
     62        var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
    6363        Debug.Assert(quality >= 0 && quality <= 1.0);
    6464        DistributeReward(quality);
     
    7979      var n = rootNode;
    8080      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries);
    81       while (n.policy != null) {
     81      while (n.children != null) {
    8282        Console.WriteLine();
    8383        Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
     
    9090
    9191    private void InitPolicies(IGrammar grammar) {
    92       this.updateChain = new List<Tuple<TreeNode, int>>();
     92      this.updateChain = new List<TreeNode>();
    9393
    9494      rootNode = new TreeNode(grammar.SentenceSymbol.ToString());
     95      rootNode.actionInfo = policy.CreateActionInfo();
    9596      treeDepth = 0;
    9697      treeSize = 0;
     
    108109      TreeNode n = rootNode;
    109110      bool done = phrase.IsTerminal;
    110       int selectedAltIdx = -1;
    111111      var curDepth = 0;
    112112      while (!done) {
    113         char nt = phrase.FirstNonTerminal;
    114 
    115         int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
    116         Debug.Assert(maxLenOfReplacement > 0);
    117 
    118         var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
     113        updateChain.Add(n);
    119114
    120115        if (n.randomTries < randomTries) {
    121116          n.randomTries++;
     117          treeDepth = Math.Max(treeDepth, curDepth);
     118          return g.CompleteSentenceRandomly(random, phrase, maxLen);
     119        } else {
     120          char nt = phrase.FirstNonTerminal;
    122121
    123           treeDepth = Math.Max(treeDepth, curDepth);
     122          int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
     123          Debug.Assert(maxLenOfReplacement > 0);
    124124
    125           return g.CompleteSentenceRandomly(random, phrase, maxLen);
    126         } else if (n.randomTries == randomTries && n.policy == null) {
    127           n.policy = policyFactory(random, alts.Count());
    128           //n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative
    129           n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative
     125          var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
    130126
    131           treeSize += n.children.Length;
    132         }
    133         n.policyTries++;
    134         // => select using bandit policy
    135         selectedAltIdx = n.policy.SelectAction();
    136         Sequence selectedAlt = alts.ElementAt(selectedAltIdx);
     127          if (n.randomTries == randomTries && n.children == null) {
     128            n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative
     129            //n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative
     130            foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo();
     131            treeSize += n.children.Length;
     132          }
     133          n.policyTries++;
     134          // => select using bandit policy
     135          int selectedAltIdx = policy.SelectAction(random, n.children.Select(c => c.actionInfo));
     136          Sequence selectedAlt = alts.ElementAt(selectedAltIdx);
    137137
    138         // replace nt with alt
    139         phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
     138          // replace nt with alt
     139          phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
    140140
    141         updateChain.Add(Tuple.Create(n, selectedAltIdx));
     141          curDepth++;
    142142
    143         curDepth++;
     143          done = phrase.IsTerminal;
    144144
    145         done = phrase.IsTerminal;
    146         if (!done) {
    147145          // prepare for next iteration
    148146          n = n.children[selectedAltIdx];
    149           Debug.Assert(!n.done);
    150147        }
    151148      } // while
    152149
     150      updateChain.Add(n);
     151
     152
    153153      // the last node is a leaf node (sentence is done), so we never need to visit this node again
    154       n.children[selectedAltIdx].done = true;
     154      n.done = true;
     155      n.actionInfo.Disable();
    155156
    156157      treeDepth = Math.Max(treeDepth, curDepth);
     
    163164
    164165      foreach (var e in updateChain) {
    165         var node = e.Item1;
    166         var policy = node.policy;
    167         var action = e.Item2;
    168         //policy.UpdateReward(action, reward / updateChain.Count);
    169         policy.UpdateReward(action, reward);
    170 
    171         if (node.children[action].done) node.policy.DisableAction(action);
    172         if (node.children.All(c => c.done)) node.done = true;
     166        var node = e;
     167        if (node.children != null && node.children.All(c => c.done)) {
     168          node.done = true;
     169          node.actionInfo.Disable();
     170        }
     171        if (!node.done) {
     172          node.actionInfo.UpdateReward(reward);
     173          //policy.UpdateReward(action, reward / updateChain.Count);
     174        }
    173175      }
    174176    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/RandomSearch.cs

    r11730 r11732  
    2525      for (int i = 0; i < maxIterations; i++) {
    2626        var sentence = CreateSentence(problem.Grammar).ToString();
    27         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
     27        var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
    2828        RaiseSolutionEvaluated(sentence, quality);
    2929
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs

    r11730 r11732  
    2929      }
    3030    }
     31
     32    public static double RSq(IEnumerable<double> xs, IEnumerable<double> ys) {
     33      // two pass implementation, but we don't care
     34      var meanX = xs.Average();
     35      var meanY = ys.Average();
     36
     37      var s = 0.0;
     38      var ssX = 0.0;
     39      var ssY = 0.0;
     40      foreach (var p in xs.Zip(ys, (x, y) => new { x, y })) {
     41        s += (p.x - meanX) * (p.y - meanY);
     42        ssX += Math.Pow(p.x - meanX, 2);
     43        ssY += Math.Pow(p.y - meanY, 2);
     44      }
     45
     46      if (s.IsAlmost(0)) return 0;
     47      if (ssX.IsAlmost(0) || ssY.IsAlmost(0)) return 0;
     48      return s * s / (ssX * ssY);
     49    }
     50
     51
    3152  }
    3253}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Rand.cs

    r11727 r11732  
    3232     * For Gamma(a,b), scale the result by b.
    3333     */
    34     private static double GammaRand(Random random, double a) {
     34    public static double GammaRand(Random random, double a) {
    3535      /* Algorithm:
    3636       * G. Marsaglia and W.W. Tsang, A simple method for generating gamma
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/HeuristicLab.Problems.GrammaticalOptimization.Test.csproj

    r11730 r11732  
    3939  </PropertyGroup>
    4040  <ItemGroup>
     41    <Reference Include="HeuristicLab.Problems.Instances-3.3">
     42      <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath>
     43    </Reference>
     44    <Reference Include="HeuristicLab.Problems.Instances.DataAnalysis-3.3">
     45      <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances.DataAnalysis-3.3.dll</HintPath>
     46    </Reference>
    4147    <Reference Include="System" />
    4248    <Reference Include="System.Core">
     
    5763  </Choose>
    5864  <ItemGroup>
     65    <Compile Include="TestSymbRegInstances.cs" />
    5966    <Compile Include="TestSequence.cs" />
    6067    <Compile Include="TestBanditPolicies.cs" />
     
    7178      <Project>{eea07488-1a51-412a-a52c-53b754a628b3}</Project>
    7279      <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name>
     80    </ProjectReference>
     81    <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization.SymbReg\HeuristicLab.Problems.GrammaticalOptimization.SymbReg.csproj">
     82      <Project>{17a7a380-86ce-482d-8d22-cbd70cc97f0d}</Project>
     83      <Name>HeuristicLab.Problems.GrammaticalOptimization.SymbReg</Name>
    7384    </ProjectReference>
    7485    <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj">
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

    r11730 r11732  
    1010  [TestClass]
    1111  public class TestBanditPolicies {
     12    [TestMethod]
     13    public void ComparePoliciesForGaussianUnknownVarianceBandit() {
     14      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     15      var randSeed = 31415;
     16      var nArms = 20;
     17
     18      // Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(20, 0.01));
     19      // Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(100, 0.01));
     20      // Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(500, 0.01));
     21      // Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(1000, 0.01));
     22      Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1)));
     23      Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 0.1)));
     24      Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
     25      Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy());
     26      Console.WriteLine("UCBNormal"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
     27      Console.WriteLine("Random"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
     28
     29    }
    1230
    1331
     
    1533    public void ComparePoliciesForBernoulliBandit() {
    1634      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
    17 
    18       var globalRand = new Random(31415);
    19       var seedForPolicy = globalRand.Next();
     35      var randSeed = 31415;
    2036      var nArms = 20;
    2137      //Console.WriteLine("Exp3 (gamma=0.01)");
     
    2339      //Console.WriteLine("Exp3 (gamma=0.05)");
    2440      //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
    25       Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
    26       Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new BernoulliModel(nArms)));
     41      Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new BernoulliThompsonSamplingPolicy());
     42      Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new GenericThompsonSamplingPolicy(new BernoulliModel()));
    2743      Console.WriteLine("Random");
    28       TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
     44      TestPolicyBernoulli(randSeed, nArms, new RandomPolicy());
    2945      Console.WriteLine("UCB1");
    30       TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(nArms));
     46      TestPolicyBernoulli(randSeed, nArms, new UCB1Policy());
    3147      Console.WriteLine("UCB1Tuned");
    32       TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(nArms));
     48      TestPolicyBernoulli(randSeed, nArms, new UCB1TunedPolicy());
    3349      Console.WriteLine("UCB1Normal");
    34       TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(nArms));
     50      TestPolicyBernoulli(randSeed, nArms, new UCBNormalPolicy());
    3551      Console.WriteLine("Eps(0.01)");
    36       TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
     52      TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.01));
    3753      Console.WriteLine("Eps(0.05)");
    38       TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
     54      TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.05));
    3955      //Console.WriteLine("Eps(0.1)");
    40       //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
     56      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.1));
    4157      //Console.WriteLine("Eps(0.2)");
    42       //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
     58      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.2));
    4359      //Console.WriteLine("Eps(0.5)");
    44       //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
    45       Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.1));
    46       Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.5));
    47       Console.WriteLine("UCT(1)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 1));
    48       Console.WriteLine("UCT(2)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 2));
    49       Console.WriteLine("UCT(5)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 5));
    50       Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
    51       Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
    52       Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
    53       Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
    54       Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
    55       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
    56       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
    57       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
     60      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.5));
     61      Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.1));
     62      Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.5));
     63      Console.WriteLine("UCT(1)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(1));
     64      Console.WriteLine("UCT(2)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(2));
     65      Console.WriteLine("UCT(5)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(5));
     66      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.1));
     67      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.5));
     68      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(1));
     69      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(10));
     70      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(100));
     71      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.01));
     72      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.05));
     73      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.1));
    5874
    5975      // not applicable to bernoulli rewards
     
    7086
    7187    [TestMethod]
    72     public void ComparePoliciesForNormalBandit() {
    73       CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
    74 
    75       var globalRand = new Random(31415);
    76       var seedForPolicy = globalRand.Next();
    77       var nArms = 20;
    78       Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true));
    79       Console.WriteLine("Thompson (Gaussian new)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
    80       Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyNormal(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1)));
     88    public void ComparePoliciesForGaussianBandit() {
     89      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     90
     91      var randSeed = 31415;
     92      var nArms = 20;
     93      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
     94      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy());
     95      Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
    8196      /*
    82       Console.WriteLine("Random"); TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
    83       Console.WriteLine("UCB1"); TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
    84       Console.WriteLine("UCB1Tuned"); TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
    85       Console.WriteLine("UCB1Normal"); TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
     97      Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
     98      Console.WriteLine("UCB1"); TestPolicyNormal(randSeed, nArms, new UCB1Policy(nArms));
     99      Console.WriteLine("UCB1Tuned"); TestPolicyNormal(randSeed, nArms, new UCB1TunedPolicy(nArms));
     100      Console.WriteLine("UCB1Normal"); TestPolicyNormal(randSeed, nArms, new UCBNormalPolicy(nArms));
    86101      //Console.WriteLine("Exp3 (gamma=0.01)");
    87       //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
     102      //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
    88103      //Console.WriteLine("Exp3 (gamma=0.05)");
    89       //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
    90       Console.WriteLine("Eps(0.01)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
    91       Console.WriteLine("Eps(0.05)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
     104      //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
     105      Console.WriteLine("Eps(0.01)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
     106      Console.WriteLine("Eps(0.05)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
    92107      //Console.WriteLine("Eps(0.1)");
    93       //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
     108      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
    94109      //Console.WriteLine("Eps(0.2)");
    95       //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
     110      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
    96111      //Console.WriteLine("Eps(0.5)");
    97       //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
    98       Console.WriteLine("UCT(0.1)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.1));
    99       Console.WriteLine("UCT(0.5)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.5));
    100       Console.WriteLine("UCT(1)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 1));
    101       Console.WriteLine("UCT(2)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 2));
    102       Console.WriteLine("UCT(5)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 5));
    103       Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
    104       Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
    105       Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
    106       Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
    107       Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
    108       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
    109       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
    110       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
    111       Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
    112       Console.WriteLine("ThresholdAscent(10,0.05)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
    113       Console.WriteLine("ThresholdAscent(10,0.1)   "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
    114       Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
    115       Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
    116       Console.WriteLine("ThresholdAscent(100,0.1)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
    117       Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
    118       Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
    119       Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
     112      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
     113      Console.WriteLine("UCT(0.1)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.1));
     114      Console.WriteLine("UCT(0.5)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.5));
     115      Console.WriteLine("UCT(1)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 1));
     116      Console.WriteLine("UCT(2)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 2));
     117      Console.WriteLine("UCT(5)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 5));
     118      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
     119      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
     120      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
     121      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
     122      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
     123      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
     124      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
     125      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
     126      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
     127      Console.WriteLine("ThresholdAscent(10,0.05)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
     128      Console.WriteLine("ThresholdAscent(10,0.1)   "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
     129      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
     130      Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
     131      Console.WriteLine("ThresholdAscent(100,0.1)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
     132      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
     133      Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
     134      Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
    120135       */
    121136    }
     
    124139    public void ComparePoliciesForGaussianMixtureBandit() {
    125140      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
    126 
    127       var globalRand = new Random(31415);
    128       var seedForPolicy = globalRand.Next();
    129       var nArms = 20;
    130       Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true));
    131       Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
    132       Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1)));
     141      var randSeed = 31415;
     142      var nArms = 20;
     143      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
     144      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy());
     145      Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
    133146
    134147      /*
    135       Console.WriteLine("Random"); TestPolicyGaussianMixture(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
    136       Console.WriteLine("UCB1"); TestPolicyGaussianMixture(globalRand, nArms, new UCB1Policy(nArms));
    137       Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(globalRand, nArms, new UCB1TunedPolicy(nArms));
    138       Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(globalRand, nArms, new UCBNormalPolicy(nArms));
    139       Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
    140       Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
    141       Console.WriteLine("UCT(1)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 1));
    142       Console.WriteLine("UCT(2)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 2));
    143       Console.WriteLine("UCT(5)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 5));
    144       Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
    145       Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
    146       Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
    147 
    148       Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
    149       Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
    150       Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
    151       Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
     148      Console.WriteLine("Random"); TestPolicyGaussianMixture(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
     149      Console.WriteLine("UCB1"); TestPolicyGaussianMixture(randSeed, nArms, new UCB1Policy(nArms));
     150      Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(randSeed, nArms, new UCB1TunedPolicy(nArms));
     151      Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(randSeed, nArms, new UCBNormalPolicy(nArms));
     152      Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
     153      Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
     154      Console.WriteLine("UCT(1)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 1));
     155      Console.WriteLine("UCT(2)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 2));
     156      Console.WriteLine("UCT(5)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 5));
     157      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
     158      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
     159      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
     160
     161      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
     162      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
     163      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
     164      Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
    152165       */
    153166    }
    154167
    155168
    156     private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) {
    157       TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
    158     }
    159     private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
    160       TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
    161     }
    162     private void TestPolicyGaussianMixture(Random globalRand, int nArms, IPolicy policy) {
    163       TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
    164     }
    165 
    166 
    167     private void TestPolicy(Random globalRand, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) {
     169    private void TestPolicyBernoulli(int randSeed, int nArms, IPolicy policy) {
     170      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
     171    }
     172    private void TestPolicyGaussian(int randSeed, int nArms, IPolicy policy) {
     173      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
     174    }
     175    private void TestPolicyGaussianMixture(int randSeed, int nArms, IPolicy policy) {
     176      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
     177    }
     178    private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IPolicy policy) {
     179      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions));
     180    }
     181
     182
     183    private void TestPolicy(int randSeed, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) {
    168184      var maxIt = 1E5;
    169       var reps = 30; // independent runs
     185      var reps = 10; // independent runs
    170186      var regretForIteration = new Dictionary<int, List<double>>();
    171187      var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
    172188      var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
     189      var globalRandom = new Random(randSeed);
     190      var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test
     191      var policyRandom = new Random(globalRandom.Next());
     192
    173193      // calculate statistics
    174194      for (int r = 0; r < reps; r++) {
    175195        var nextLogStep = 1;
    176         var b = banditFactory(new Random(globalRand.Next()), nArms);
    177         policy.Reset();
     196        var b = banditFactory(banditRandom, nArms);
    178197        var totalRegret = 0.0;
    179198        var totalPullsOfSuboptimalArmsExp = 0.0;
    180199        var totalPullsOfSuboptimalArmsMax = 0.0;
     200        var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray();
    181201        for (int i = 0; i <= maxIt; i++) {
    182           var selectedAction = policy.SelectAction();
     202          var selectedAction = policy.SelectAction(policyRandom, actionInfos);
    183203          var reward = b.Pull(selectedAction);
    184           policy.UpdateReward(selectedAction, reward);
     204          actionInfos[selectedAction].UpdateReward(reward);
    185205
    186206          // collect stats
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/EvenParityProblem.cs

    r11727 r11732  
    2020    private readonly ExpressionInterpreter interpreter = new ExpressionInterpreter();
    2121    public EvenParityProblem() {
    22       this.grammar = new Grammar(grammarString);
     22      this.grammar = new Grammar (grammarString);
    2323    }
    2424
    25     public double GetBestKnownQuality(int maxLen) {
     25    public double BestKnownQuality(int maxLen) {
    2626      // for now only an upper bound is returned, ideally all fitness cases are predicted correctly
    2727      return Math.Pow(2, 4);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/ExpressionInterpreter.cs

    r11727 r11732  
    5454      var r = 0.0;
    5555      r = Term(d);
    56       while (CurSy() == '+' || CurSy() == '-' || CurSy() == '^') {
    57         if (CurSy() == '+') {
     56      var curSy = CurSy();
     57      while (curSy == '+' || curSy == '-' || curSy == '^') {
     58        if (curSy == '+') {
    5859          NewSy();
    5960          r += Expr(d);
    60         } else if (CurSy() == '-') {
     61        } else if (curSy == '-') {
    6162          NewSy();
    6263          r -= Expr(d);
    63         } else if (CurSy() == '^') {
     64        } else {
    6465          NewSy();
    6566          var e = Expr(d);
    6667          r = Not(r) * e + r * Not(e); // xor = (!x AND y) OR (x AND !y)
    6768        }
     69        curSy = CurSy();
    6870      }
    6971      return r;
     
    7375      var r = 0.0;
    7476      r = Fact(d);
    75       while (CurSy() == '*' || CurSy() == '/') {
    76         if (CurSy() == '*') {
     77      var curSy = CurSy();
     78      while (curSy == '*' || curSy == '/') {
     79        if (curSy == '*') {
    7780          NewSy();
    7881          r *= Term(d);
    79         } else if (CurSy() == '/') {
     82        } else {
    8083          NewSy();
    8184          r /= Term(d);
    8285        }
     86        curSy = CurSy();
    8387      }
    8488      return r;
     
    8791    private double Fact(double[] d) {
    8892      double r = 0.0;
    89       if (CurSy() == '!') {
     93      var curSy = CurSy();
     94      if (curSy == '!') {
    9095        NewSy();
    9196        r = Not(Expr(d));
    92       } else if (CurSy() == '(') {
     97      } else if (curSy == '(') {
    9398        NewSy();
    9499        r = Expr(d);
    95100        if (CurSy() != ')') throw new ArgumentException();
    96101        NewSy();
    97       } else if (CurSy() >= 'a' && CurSy() <= 'z') {
    98         int o = Convert.ToByte(CurSy()) - Convert.ToByte('a');
     102      } else /* if (curSy >= 'a' && curSy <= 'z') */ {
     103        int o = (byte)curSy - (byte)'a';
     104        //int o = Convert.ToByte(CurSy()) - Convert.ToByte('a');
    99105        if (o < 0 || o >= d.Length) throw new ArgumentException();
    100106        r = d[o];
    101107        NewSy();
    102       } else throw new ArgumentException();
     108      }
     109      //} else throw new ArgumentException();
    103110      return r;
    104111    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs

    r11730 r11732  
    3535      this.rules = new Dictionary<char, List<Sequence>>();
    3636      foreach (var r in orig.rules)
    37         this.rules.Add(r.Key, new List<Sequence>(r.Value.Select(v => new Sequence(v)))); // clone sequences
     37        this.rules.Add(r.Key, new List<Sequence>(r.Value.Select(v => new ReadonlySequence(v)))); // clone sequences
    3838      this.terminalSymbols = new HashSet<char>(orig.terminalSymbols);
    3939      this.sentenceSymbol = orig.sentenceSymbol;
    4040      this.nonTerminalSymbols = new HashSet<char>(orig.nonTerminalSymbols);
    4141      this.maxPhraseLength = new Dictionary<Sequence, int>();
    42       foreach (var p in orig.maxPhraseLength) this.maxPhraseLength.Add(new Sequence(p.Key), p.Value);
     42      foreach (var p in orig.maxPhraseLength) this.maxPhraseLength.Add(new ReadonlySequence(p.Key), p.Value);
    4343      this.minPhraseLength = new Dictionary<Sequence, int>();
    44       foreach (var p in orig.minPhraseLength) this.minPhraseLength.Add(new Sequence(p.Key), p.Value);
     44      foreach (var p in orig.minPhraseLength) this.minPhraseLength.Add(new ReadonlySequence(p.Key), p.Value);
    4545    }
    4646
     
    5454      foreach (var r in rules) {
    5555        if (!this.rules.ContainsKey(r.Item1)) this.rules.Add(r.Item1, new List<Sequence>());
    56         this.rules[r.Item1].Add(new Sequence(r.Item2)); // here we store an array of symbols for a phase
     56        this.rules[r.Item1].Add(new ReadonlySequence(r.Item2)); // here we store an array of symbols for a phase
    5757      }
    5858
    5959      CheckValidity();
    60       CalculatePhaseLengthBounds();
     60      CalculatePhraseLengthBounds();
    6161    }
    6262
     
    7676    }
    7777
    78     private void CalculatePhaseLengthBounds() {
    79       minPhraseLength.Clear();
    80       maxPhraseLength.Clear();
     78    private void CalculatePhraseLengthBounds() {
    8179      // cache phrase lengths
    8280      foreach (var nt in nonTerminalSymbols) {
     
    8482        var max = int.MinValue;
    8583        foreach (var alt in rules[nt].OrderBy(alt => alt.Length)) {
    86           minPhraseLength[alt] = MinPhraseLength(alt);
    87           maxPhraseLength[alt] = MaxPhraseLength(alt);
     84          CalcAndSetMinPhraseLength(alt);
     85          CalcAndSetMaxPhraseLength(alt);
    8886
    8987          min = Math.Min(min, minPhraseLength[alt]);
    9088          max = Math.Max(max, maxPhraseLength[alt]);
    9189        }
    92         minPhraseLength[new Sequence(nt)] = min;
    93         maxPhraseLength[new Sequence(nt)] = max;
    94       }
    95     }
    96 
     90        minPhraseLength[new ReadonlySequence(nt)] = min;
     91        maxPhraseLength[new ReadonlySequence(nt)] = max;
     92      }
     93    }
    9794
    9895    public IEnumerable<Sequence> GetAlternatives(char nt) {
     
    108105    }
    109106
    110     // caches for this are build in construction of object
    111     public int MinPhraseLength(Sequence phrase) {
     107    #region population of minphraselength cache
     108    private int CalcAndSetMinSymbolLength(char symb) {
     109      if (IsTerminal(symb)) return 1;
     110      else return Math.Min(short.MaxValue, rules[symb].Min(alt => CalcAndSetMinPhraseLength(alt))); // maximal allowed value is short.MaxValue
     111    }
     112    private int CalcAndSetMinPhraseLength(Sequence phrase) {
     113      Debug.Assert(phrase is ReadonlySequence);
    112114      int l;
    113115      if (minPhraseLength.TryGetValue(phrase, out l)) return l;
     
    116118
    117119      foreach (var symb in phrase) {
    118         if (IsNonTerminal(symb)) {
    119           l += MinSymbolLength(symb);
    120         } else {
    121           l++;
    122         }
     120        l += CalcAndSetMinSymbolLength(symb);
    123121      }
    124122
     
    127125      return l;
    128126    }
    129 
     127    #endregion
     128
     129    // read only access to caches
     130    private int MinSymbolLength(char symb) {
     131      if (IsTerminal(symb)) return 1;
     132      else return Math.Min(short.MaxValue, rules[symb].Min(alt => MinPhraseLength(alt))); // maximal allowed value is short.MaxValue
     133    }
     134    public int MinPhraseLength(Sequence phrase) {
     135      var minLen = 0;
     136      if (minPhraseLength.TryGetValue(phrase, out minLen)) return minLen;
     137      foreach (var s in phrase) {
     138        minLen += MinSymbolLength(s);
     139      }
     140      return Math.Min(short.MaxValue, minLen); // maximal allowed value is short.MaxValue
     141    }
     142
     143    #region population of maxphraselength cache
     144    private int CalcAndSetMaxSymbolLength(char symb) {
     145      if (IsTerminal(symb)) return 1;
     146      else return Math.Min(short.MaxValue, rules[symb].Max(alt => CalcAndSetMaxPhraseLength(alt))); // maximal allowed value is short.MaxValue
     147    }
    130148    // caches for this are build in construction of object
    131     public int MaxPhraseLength(Sequence phrase) {
     149    private int CalcAndSetMaxPhraseLength(Sequence phrase) {
     150      Debug.Assert(phrase is ReadonlySequence);
    132151      int l;
    133152      if (maxPhraseLength.TryGetValue(phrase, out l)) return l;
     
    136155
    137156      foreach (var symb in phrase) {
    138         if (IsNonTerminal(symb)) {
    139           l += MaxSymbolLength(symb);
    140         } else {
    141           l++;
    142         }
     157        l += CalcAndSetMaxSymbolLength(symb);
    143158      }
    144159      l = Math.Min(short.MaxValue, l); // maximal allowed value is short.MaxValue
     
    146161      return l;
    147162    }
    148 
    149     private int MinSymbolLength(char nt) {
    150       if (IsTerminal(nt)) return 1;
    151       else return Math.Min(short.MaxValue, rules[nt].Min(alt => MinPhraseLength(alt))); // maximal allowed value is short.MaxValue
    152     }
    153     private int MaxSymbolLength(char nt) {
    154       if (IsTerminal(nt)) return 1;
    155       else return Math.Min(short.MaxValue, rules[nt].Max(alt => MaxPhraseLength(alt))); // maximal allowed value is short.MaxValue
     163    #endregion
     164
     165    // read only access to caches
     166    public int MaxSymbolLength(char symb) {
     167      if (IsTerminal(symb)) return 1;
     168      else return Math.Min(short.MaxValue, rules[symb].Max(alt => MaxPhraseLength(alt))); // maximal allowed value is short.MaxValue
     169    }
     170    public int MaxPhraseLength(Sequence phrase) {
     171      var maxLen = 0;
     172      if (maxPhraseLength.TryGetValue(phrase, out maxLen)) return maxLen;
     173      foreach (var s in phrase) {
     174        maxLen += MaxSymbolLength(s);
     175      }
     176      return Math.Min(short.MaxValue, maxLen); // maximal allowed value is short.MaxValue
    156177    }
    157178
     
    159180      if (phrase.Length > maxLen) throw new ArgumentException();
    160181      if (MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
    161       bool done = phrase.IsTerminal; // terminal phrase means we are done
    162       while (!done) {
     182      while (!phrase.IsTerminal) {
    163183        char nt = phrase.FirstNonTerminal;
    164184
     
    173193        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
    174194
    175         done = phrase.All(IsTerminal); // terminal phrase means we are done
    176195      }
    177196      return phrase;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs

    r11727 r11732  
    1818    }
    1919
    20     public double GetBestKnownQuality(int maxLen) {
     20    public double BestKnownQuality(int maxLen) {
    2121      // the whole sentence is a palindrome + each symbol occurs only once or twice
    2222      // for odd total length the number of different characters can be larger than len/2 (aba)
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.csproj

    r11730 r11732  
    4646    <Compile Include="Grammar.cs" />
    4747    <Compile Include="EvenParityProblem.cs" />
     48    <Compile Include="ReadonlySequence.cs" />
    4849    <Compile Include="SentenceSetStatistics.cs" />
    4950    <Compile Include="Sequence.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs

    r11727 r11732  
    66namespace HeuristicLab.Problems.GrammaticalOptimization {
    77  public interface IProblem {
    8     double GetBestKnownQuality(int maxLen);
     8    double BestKnownQuality(int maxLen);
    99    IGrammar Grammar { get; }
    1010    double Evaluate(string sentence);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs

    r11727 r11732  
    1818    }
    1919
    20     public double GetBestKnownQuality(int maxLen) {
     20    public double BestKnownQuality(int maxLen) {
    2121      // the whole sentence is a palindrome
    2222      return maxLen;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPairProblem.cs

    r11727 r11732  
    1919    }
    2020
    21     public double GetBestKnownQuality(int maxLen) {
     21    public double BestKnownQuality(int maxLen) {
    2222      return maxLen - 1;
    2323    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalRoadProblem.cs

    r11727 r11732  
    1717    }
    1818
    19     public double GetBestKnownQuality(int maxLen) {
     19    public double BestKnownQuality(int maxLen) {
    2020      // for now only an upper bound is returned, ideally all fitness cases are predicted correctly
    2121      throw new NotImplementedException();
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSymbolProblem.cs

    r11727 r11732  
    1919    }
    2020
    21     public double GetBestKnownQuality(int maxLen) {
     21    public double BestKnownQuality(int maxLen) {
    2222      return maxLen;
    2323    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalTreeProblem.cs

    r11727 r11732  
    1717    }
    1818
    19     public double GetBestKnownQuality(int maxLen) {
     19    public double BestKnownQuality(int maxLen) {
    2020      // for now only an upper bound is returned, ideally all fitness cases are predicted correctly
    2121      throw new NotImplementedException();
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs

    r11730 r11732  
    2323    }
    2424
    25     public double GetBestKnownQuality(int maxLen) {
     25    public double BestKnownQuality(int maxLen) {
    2626      // for now only an upper bound is returned, ideally all food pellets are discovered
    2727      return 89;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Sequence.cs

    r11730 r11732  
    8484    }
    8585
    86     public void ReplaceAt(int position, int len, Sequence replacement) {
     86    public virtual void ReplaceAt(int position, int len, Sequence replacement) {
    8787      if (replacement == null) throw new ArgumentNullException();
    8888      if (len <= 0) throw new ArgumentException();
     
    125125
    126126    public Sequence Subsequence(int startIdx, int len) {
    127       if (startIdx < 0 || len <= 0) throw new ArgumentException();
     127      if (startIdx < 0 || len < 0) throw new ArgumentException();
    128128      if (startIdx >= this.len) throw new ArgumentException();
    129129      if (startIdx + len > this.len) throw new ArgumentException();
    130       var subsequence = new Sequence {len = len};
     130      var subsequence = new Sequence { len = len };
    131131
    132132      Array.Copy(this.symbols, startIdx, subsequence.symbols, 0, len);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs

    r11730 r11732  
    5656    }
    5757
    58     public double GetBestKnownQuality(int maxLen) {
     58    public double BestKnownQuality(int maxLen) {
    5959      // for now only an upper bound is returned, ideally we have an R² of 1.0
    6060      // the optimal R² can only be reached for sentences of at least 23 symbols
     
    6767
    6868    public double Evaluate(string sentence) {
    69       return RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])).ToArray());
     69      return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])).ToArray());
    7070    }
    7171
    72     private double RSq(IEnumerable<double> xs, IEnumerable<double> ys) {
    73       // two pass implementation, but we don't care
    74       var meanX = xs.Average();
    75       var meanY = ys.Average();
    76 
    77       var s = 0.0;
    78       var ssX = 0.0;
    79       var ssY = 0.0;
    80       foreach (var p in xs.Zip(ys, (x, y) => new { x, y })) {
    81         s += (p.x - meanX) * (p.y - meanY);
    82         ssX += Math.Pow(p.x - meanX, 2);
    83         ssY += Math.Pow(p.y - meanY, 2);
    84       }
    85 
    86       if (s.IsAlmost(0)) return 0;
    87       if (ssX.IsAlmost(0) || ssY.IsAlmost(0)) return 0;
    88       return s * s / (ssX * ssY);
    89     }
    9072
    9173
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Main.csproj

    r11727 r11732  
    5757      <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name>
    5858    </ProjectReference>
     59    <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization.SymbReg\HeuristicLab.Problems.GrammaticalOptimization.SymbReg.csproj">
     60      <Project>{17A7A380-86CE-482D-8D22-CBD70CC97F0D}</Project>
     61      <Name>HeuristicLab.Problems.GrammaticalOptimization.SymbReg</Name>
     62    </ProjectReference>
    5963    <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj">
    6064      <Project>{cb9dccf6-667e-4a13-b82d-dbd6b45a045e}</Project>
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11730 r11732  
    1111using HeuristicLab.Algorithms.GrammaticalOptimization;
    1212using HeuristicLab.Problems.GrammaticalOptimization;
     13using HeuristicLab.Problems.GrammaticalOptimization.SymbReg;
    1314
    1415namespace Main {
     
    2223
    2324    private static void RunGridTest() {
    24       int maxIterations = 100000; // for poly-10 with 50000 evaluations no successful try with hl yet
    25       // var globalRandom = new Random(31415);
     25      int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet
     26      //var globalRandom = new Random(31415);
    2627      var localRandSeed = 31415;
    2728      var reps = 20;
    2829
    29       var policyFactories = new Func<Random, int, IPolicy>[]
     30      var policies = new Func<IPolicy>[]
    3031        {
    31           (rand, numActions) => new GaussianThompsonSamplingPolicy(rand, numActions),
    32           (rand, numActions) => new BernoulliThompsonSamplingPolicy(rand, numActions),
    33           (rand, numActions) => new RandomPolicy(rand, numActions),
    34           (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.01),
    35           (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.05),
    36           (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1),
    37           (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.2),
    38           (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.5),
    39           (rand, numActions) => new UCTPolicy(numActions, 0.1),
    40           (rand, numActions) => new UCTPolicy(numActions, 0.5),
    41           (rand, numActions) => new UCTPolicy(numActions, 1),
    42           (rand, numActions) => new UCTPolicy(numActions, 2),
    43           (rand, numActions) => new UCTPolicy(numActions, 5),
    44           (rand, numActions) => new UCTPolicy(numActions, 10),
    45           (rand, numActions) => new UCB1Policy(numActions),
    46           (rand, numActions) => new UCB1TunedPolicy(numActions),
    47           (rand, numActions) => new UCBNormalPolicy(numActions),
    48           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.1),
    49           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.5),
    50           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 1),
    51           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 5),
    52           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 10),
    53           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 20),
    54           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 100),
    55           (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.01),
    56           (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.05),
    57           (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.1),
    58           (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.2),
    59           (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.01),
    60           (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.05),
    61           (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.1),
    62           (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.2),
    63           (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.01),
    64           (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.05),
    65           (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.1),
    66           (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.2),
    67           (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.01),
    68           (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.05),
    69           (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.1),
    70           (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.2),
    71           (rand, numActions) => new ThresholdAscentPolicy(numActions, 5000, 0.01),
    72           (rand, numActions) => new ThresholdAscentPolicy(numActions, 10000, 0.01),
     32         () => new GaussianThompsonSamplingPolicy(),
     33         () => new GaussianThompsonSamplingPolicy(true),
     34         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)),
     35         () => new BernoulliThompsonSamplingPolicy(),
     36         () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
     37         () => new RandomPolicy(),
     38         () => new EpsGreedyPolicy(0.01),
     39         () => new EpsGreedyPolicy(0.05),
     40         () => new EpsGreedyPolicy(0.1),
     41         () => new EpsGreedyPolicy(0.2),
     42         () => new EpsGreedyPolicy(0.5),
     43         () => new UCTPolicy(0.1),
     44         () => new UCTPolicy(0.5),
     45         () => new UCTPolicy(1),
     46         () => new UCTPolicy(2),
     47         () => new UCTPolicy( 5),
     48         () => new UCTPolicy( 10),
     49         () => new UCB1Policy(),
     50         () => new UCB1TunedPolicy(),
     51         () => new UCBNormalPolicy(),
     52         () => new BoltzmannExplorationPolicy(0.1),
     53         () => new BoltzmannExplorationPolicy(0.5),
     54         () => new BoltzmannExplorationPolicy(1),
     55         () => new BoltzmannExplorationPolicy(5),
     56         () => new BoltzmannExplorationPolicy(10),
     57         () => new BoltzmannExplorationPolicy(20),
     58         () => new BoltzmannExplorationPolicy(100),
     59         () => new ChernoffIntervalEstimationPolicy( 0.01),
     60         () => new ChernoffIntervalEstimationPolicy( 0.05),
     61         () => new ChernoffIntervalEstimationPolicy( 0.1),
     62         () => new ChernoffIntervalEstimationPolicy( 0.2),
     63         // (rand) => new ThresholdAscentPolicy(10, 0.01),
     64         // (rand) => new ThresholdAscentPolicy(10, 0.05),
     65         // (rand) => new ThresholdAscentPolicy(10, 0.1),
     66         // (rand) => new ThresholdAscentPolicy(10, 0.2),
     67         // (rand) => new ThresholdAscentPolicy(100, 0.01),
     68         // (rand) => new ThresholdAscentPolicy(100, 0.05),
     69         // (rand) => new ThresholdAscentPolicy(100, 0.1),
     70         // (rand) => new ThresholdAscentPolicy(100, 0.2),
     71         // (rand) => new ThresholdAscentPolicy(1000, 0.01),
     72         // (rand) => new ThresholdAscentPolicy(1000, 0.05),
     73         // (rand) => new ThresholdAscentPolicy(1000, 0.1),
     74         // (rand) => new ThresholdAscentPolicy(1000, 0.2),
     75         // (rand) => new ThresholdAscentPolicy(5000, 0.01),
     76         // (rand) => new ThresholdAscentPolicy(10000, 0.01),
    7377        };
    7478
    75       var tasks = new List<Task>();
    76       foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) {
    77         foreach (var policyFactory in policyFactories) {
    78           var myPolicyFactory = policyFactory;
    79           var myRandomTries = randomTries;
    80           var localRand = new Random(localRandSeed);
    81           var options = new ParallelOptions();
    82           options.MaxDegreeOfParallelism = 1;
    83           Parallel.For(0, reps, options, (i) => {
    84             //var t = Task.Run(() => {
    85             Random myLocalRand;
    86             lock (localRand)
    87               myLocalRand = new Random(localRand.Next());
    88 
    89             //for (int i = 0; i < reps; i++) {
    90 
    91             int iterations = 0;
    92             var sw = new Stopwatch();
    93             var globalStatistics = new SentenceSetStatistics();
    94 
    95             var problem = new SymbolicRegressionPoly10Problem();
    96             //var problem = new SantaFeAntProblem();
    97             //var problem = new PalindromeProblem();
    98             //var problem = new HardPalindromeProblem();
    99             //var problem = new RoyalPairProblem();
    100             //var problem = new EvenParityProblem();
    101             var alg = new MctsSampler(problem, 25, myLocalRand, myRandomTries, myPolicyFactory);
    102             //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
    103             //var alg = new AlternativesContextSampler(problem, 25);
    104 
    105             alg.SolutionEvaluated += (sentence, quality) => {
    106               iterations++;
    107               globalStatistics.AddSentence(sentence, quality);
    108               if (iterations % 10000 == 0) {
    109                 Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, myPolicyFactory(myLocalRand, 1), globalStatistics);
    110               }
    111             };
    112 
    113             sw.Start();
    114 
    115             alg.Run(maxIterations);
    116 
    117             sw.Stop();
    118             //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
    119             //}
    120             //});
    121             //tasks.Add(t);
    122           });
     79      foreach (var problem in new Tuple<IProblem, int>[]
     80        {
     81          Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
     82          Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23),
     83        })
     84        foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) {
     85          foreach (var policy in policies) {
     86            var myRandomTries = randomTries;
     87            var localRand = new Random(localRandSeed);
     88            var options = new ParallelOptions();
     89            options.MaxDegreeOfParallelism = 1;
     90            Parallel.For(0, reps, options, (i) => {
     91              //var t = Task.Run(() => {
     92              Random myLocalRand;
     93              lock (localRand)
     94                myLocalRand = new Random(localRand.Next());
     95
     96              //for (int i = 0; i < reps; i++) {
     97
     98              int iterations = 0;
     99              var globalStatistics = new SentenceSetStatistics();
     100
     101              // var problem = new SymbolicRegressionPoly10Problem();
     102              // var problem = new SantaFeAntProblem();
     103              //var problem = new PalindromeProblem();
     104              //var problem = new HardPalindromeProblem();
     105              //var problem = new RoyalPairProblem();
     106              //var problem = new EvenParityProblem();
     107              var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each experiment
     108              //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
     109              //var alg = new AlternativesContextSampler(problem, 25);
     110
     111              alg.SolutionEvaluated += (sentence, quality) => {
     112                iterations++;
     113                globalStatistics.AddSentence(sentence, quality);
     114                if (iterations % 10000 == 0) {
     115                  Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, policy(), globalStatistics);
     116                }
     117              };
     118
     119
     120              alg.Run(maxIterations);
     121
     122              //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
     123              //}
     124              //});
     125              //tasks.Add(t);
     126            });
     127          }
    123128        }
    124       }
    125129      //Task.WaitAll(tasks.ToArray());
    126130    }
    127131
    128132    private static void RunDemo() {
     133      // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
     134      // TODO: implement GaussianWithUnknownMeanAndVariance Model for Thompson Sampling (verify with unit test if correct mean and variance is identified)
     135      // TODO: separate value function from policy
     136      // TODO: debug and verify implementation variants of Gaussian Thompson Sampling with unit test
     137      // TODO: refactor Policies to use banditInfos (policies are factories for bandit infos and bandit info only has an update routine, each policy works only with it's type of banditinfo)
     138      // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents
     139      // TODO: exhaustive search with priority list
    129140      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
    130141      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
     
    133144      // TODO: research thompson sampling for max bandit?
    134145      // TODO: ausführlicher test von strategien für k-armed max bandit
    135       // TODO: verify TA implementation using example from the original paper
    136       // TODO: reference HL.ProblemInstances and try on tower dataset
     146      // TODO: verify TA implementation using example from the original paper     
    137147      // TODO: compare results for different policies also for the symb-reg problem
    138148      // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
     
    144154      // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
    145155      // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen
    146 
    147 
    148       int maxIterations = 10000000;
     156      // TODO: constant optimization
     157
     158
     159      int maxIterations = 100000;
    149160      int iterations = 0;
    150161      var sw = new Stopwatch();
     
    154165      var random = new Random();
    155166
    156       //var problem = new SymbolicRegressionPoly10Problem();
    157       var problem = new SantaFeAntProblem();
     167      var problem = new SymbolicRegressionPoly10Problem();
     168      //var problem = new SantaFeAntProblem(); // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
     169      //var problem = new SymbolicRegressionProblem("Tower"); // very good results e.g. new EpsGreedyPolicy(0.2) using max reward as quality !!!
    158170      //var problem = new PalindromeProblem();
    159171      //var problem = new HardPalindromeProblem();
    160172      //var problem = new RoyalPairProblem();
    161173      //var problem = new EvenParityProblem();
    162       //var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new GenericThompsonSamplingPolicy(rand, numActions, new GaussianModel(numActions, 0.5, 10)));
     174      var alg = new MctsSampler(problem, 23, random, 10, new EpsGreedyPolicy(0.2)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) works well for Ant
    163175      //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
    164176      //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
    165177      //var alg = new ExhaustiveDepthFirstSearch(problem, 17);
    166178      // var alg = new AlternativesSampler(problem, 17);
    167       var alg = new RandomSearch(problem, random, 17);
     179      // var alg = new RandomSearch(problem, random, 17);
     180      // var alg = new ExhaustiveRandomFirstSearch(problem, random, 17);
    168181
    169182      alg.FoundNewBestSolution += (sentence, quality) => {
    170183        bestQuality = quality;
    171184        bestSentence = sentence;
    172         Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
     185        Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
    173186      };
    174187      alg.SolutionEvaluated += (sentence, quality) => {
     
    176189        globalStatistics.AddSentence(sentence, quality);
    177190        if (iterations % 1000 == 0) {
    178           //alg.PrintStats();
     191          alg.PrintStats();
    179192        }
    180193        if (iterations % 10000 == 0) {
    181194          //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
    182195          //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
    183           Console.WriteLine(globalStatistics);
     196          Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
    184197        }
    185198      };
Note: See TracChangeset for help on using the changeset viewer.