Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/07/15 09:21:46 (9 years ago)
Author:
gkronber
Message:

#2283: refactoring and bug fixes

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/HeuristicLab.Problems.GrammaticalOptimization.Test.csproj

    r11730 r11732  
    3939  </PropertyGroup>
    4040  <ItemGroup>
     41    <Reference Include="HeuristicLab.Problems.Instances-3.3">
     42      <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath>
     43    </Reference>
     44    <Reference Include="HeuristicLab.Problems.Instances.DataAnalysis-3.3">
     45      <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances.DataAnalysis-3.3.dll</HintPath>
     46    </Reference>
    4147    <Reference Include="System" />
    4248    <Reference Include="System.Core">
     
    5763  </Choose>
    5864  <ItemGroup>
     65    <Compile Include="TestSymbRegInstances.cs" />
    5966    <Compile Include="TestSequence.cs" />
    6067    <Compile Include="TestBanditPolicies.cs" />
     
    7178      <Project>{eea07488-1a51-412a-a52c-53b754a628b3}</Project>
    7279      <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name>
     80    </ProjectReference>
     81    <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization.SymbReg\HeuristicLab.Problems.GrammaticalOptimization.SymbReg.csproj">
     82      <Project>{17a7a380-86ce-482d-8d22-cbd70cc97f0d}</Project>
     83      <Name>HeuristicLab.Problems.GrammaticalOptimization.SymbReg</Name>
    7384    </ProjectReference>
    7485    <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj">
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

    r11730 r11732  
    1010  [TestClass]
    1111  public class TestBanditPolicies {
     12    [TestMethod]
     13    public void ComparePoliciesForGaussianUnknownVarianceBandit() {
     14      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     15      var randSeed = 31415;
     16      var nArms = 20;
     17
     18      // Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(20, 0.01));
     19      // Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(100, 0.01));
     20      // Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(500, 0.01));
     21      // Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(1000, 0.01));
     22      Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1)));
     23      Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 0.1)));
     24      Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
     25      Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy());
     26      Console.WriteLine("UCBNormal"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
     27      Console.WriteLine("Random"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
     28
     29    }
    1230
    1331
     
    1533    public void ComparePoliciesForBernoulliBandit() {
    1634      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
    17 
    18       var globalRand = new Random(31415);
    19       var seedForPolicy = globalRand.Next();
     35      var randSeed = 31415;
    2036      var nArms = 20;
    2137      //Console.WriteLine("Exp3 (gamma=0.01)");
     
    2339      //Console.WriteLine("Exp3 (gamma=0.05)");
    2440      //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
    25       Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
    26       Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new BernoulliModel(nArms)));
     41      Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new BernoulliThompsonSamplingPolicy());
     42      Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new GenericThompsonSamplingPolicy(new BernoulliModel()));
    2743      Console.WriteLine("Random");
    28       TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
     44      TestPolicyBernoulli(randSeed, nArms, new RandomPolicy());
    2945      Console.WriteLine("UCB1");
    30       TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(nArms));
     46      TestPolicyBernoulli(randSeed, nArms, new UCB1Policy());
    3147      Console.WriteLine("UCB1Tuned");
    32       TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(nArms));
     48      TestPolicyBernoulli(randSeed, nArms, new UCB1TunedPolicy());
    3349      Console.WriteLine("UCB1Normal");
    34       TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(nArms));
     50      TestPolicyBernoulli(randSeed, nArms, new UCBNormalPolicy());
    3551      Console.WriteLine("Eps(0.01)");
    36       TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
     52      TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.01));
    3753      Console.WriteLine("Eps(0.05)");
    38       TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
     54      TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.05));
    3955      //Console.WriteLine("Eps(0.1)");
    40       //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
     56      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.1));
    4157      //Console.WriteLine("Eps(0.2)");
    42       //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
     58      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.2));
    4359      //Console.WriteLine("Eps(0.5)");
    44       //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
    45       Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.1));
    46       Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.5));
    47       Console.WriteLine("UCT(1)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 1));
    48       Console.WriteLine("UCT(2)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 2));
    49       Console.WriteLine("UCT(5)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 5));
    50       Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
    51       Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
    52       Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
    53       Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
    54       Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
    55       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
    56       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
    57       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
     60      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.5));
     61      Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.1));
     62      Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.5));
     63      Console.WriteLine("UCT(1)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(1));
     64      Console.WriteLine("UCT(2)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(2));
     65      Console.WriteLine("UCT(5)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(5));
     66      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.1));
     67      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.5));
     68      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(1));
     69      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(10));
     70      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(100));
     71      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.01));
     72      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.05));
     73      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.1));
    5874
    5975      // not applicable to bernoulli rewards
     
    7086
    7187    [TestMethod]
    72     public void ComparePoliciesForNormalBandit() {
    73       CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
    74 
    75       var globalRand = new Random(31415);
    76       var seedForPolicy = globalRand.Next();
    77       var nArms = 20;
    78       Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true));
    79       Console.WriteLine("Thompson (Gaussian new)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
    80       Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyNormal(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1)));
     88    public void ComparePoliciesForGaussianBandit() {
     89      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     90
     91      var randSeed = 31415;
     92      var nArms = 20;
     93      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
     94      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy());
     95      Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
    8196      /*
    82       Console.WriteLine("Random"); TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
    83       Console.WriteLine("UCB1"); TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
    84       Console.WriteLine("UCB1Tuned"); TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
    85       Console.WriteLine("UCB1Normal"); TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
     97      Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
     98      Console.WriteLine("UCB1"); TestPolicyNormal(randSeed, nArms, new UCB1Policy(nArms));
     99      Console.WriteLine("UCB1Tuned"); TestPolicyNormal(randSeed, nArms, new UCB1TunedPolicy(nArms));
     100      Console.WriteLine("UCB1Normal"); TestPolicyNormal(randSeed, nArms, new UCBNormalPolicy(nArms));
    86101      //Console.WriteLine("Exp3 (gamma=0.01)");
    87       //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
     102      //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
    88103      //Console.WriteLine("Exp3 (gamma=0.05)");
    89       //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
    90       Console.WriteLine("Eps(0.01)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
    91       Console.WriteLine("Eps(0.05)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
     104      //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
     105      Console.WriteLine("Eps(0.01)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
     106      Console.WriteLine("Eps(0.05)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
    92107      //Console.WriteLine("Eps(0.1)");
    93       //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
     108      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
    94109      //Console.WriteLine("Eps(0.2)");
    95       //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
     110      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
    96111      //Console.WriteLine("Eps(0.5)");
    97       //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
    98       Console.WriteLine("UCT(0.1)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.1));
    99       Console.WriteLine("UCT(0.5)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.5));
    100       Console.WriteLine("UCT(1)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 1));
    101       Console.WriteLine("UCT(2)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 2));
    102       Console.WriteLine("UCT(5)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 5));
    103       Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
    104       Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
    105       Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
    106       Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
    107       Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
    108       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
    109       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
    110       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
    111       Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
    112       Console.WriteLine("ThresholdAscent(10,0.05)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
    113       Console.WriteLine("ThresholdAscent(10,0.1)   "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
    114       Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
    115       Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
    116       Console.WriteLine("ThresholdAscent(100,0.1)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
    117       Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
    118       Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
    119       Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
     112      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
     113      Console.WriteLine("UCT(0.1)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.1));
     114      Console.WriteLine("UCT(0.5)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.5));
     115      Console.WriteLine("UCT(1)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 1));
     116      Console.WriteLine("UCT(2)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 2));
     117      Console.WriteLine("UCT(5)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 5));
     118      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
     119      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
     120      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
     121      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
     122      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
     123      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
     124      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
     125      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
     126      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
     127      Console.WriteLine("ThresholdAscent(10,0.05)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
     128      Console.WriteLine("ThresholdAscent(10,0.1)   "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
     129      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
     130      Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
     131      Console.WriteLine("ThresholdAscent(100,0.1)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
     132      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
     133      Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
     134      Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
    120135       */
    121136    }
     
    124139    public void ComparePoliciesForGaussianMixtureBandit() {
    125140      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
    126 
    127       var globalRand = new Random(31415);
    128       var seedForPolicy = globalRand.Next();
    129       var nArms = 20;
    130       Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true));
    131       Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
    132       Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1)));
     141      var randSeed = 31415;
     142      var nArms = 20;
     143      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
     144      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy());
     145      Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
    133146
    134147      /*
    135       Console.WriteLine("Random"); TestPolicyGaussianMixture(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
    136       Console.WriteLine("UCB1"); TestPolicyGaussianMixture(globalRand, nArms, new UCB1Policy(nArms));
    137       Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(globalRand, nArms, new UCB1TunedPolicy(nArms));
    138       Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(globalRand, nArms, new UCBNormalPolicy(nArms));
    139       Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
    140       Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
    141       Console.WriteLine("UCT(1)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 1));
    142       Console.WriteLine("UCT(2)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 2));
    143       Console.WriteLine("UCT(5)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 5));
    144       Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
    145       Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
    146       Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
    147 
    148       Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
    149       Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
    150       Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
    151       Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
     148      Console.WriteLine("Random"); TestPolicyGaussianMixture(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
     149      Console.WriteLine("UCB1"); TestPolicyGaussianMixture(randSeed, nArms, new UCB1Policy(nArms));
     150      Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(randSeed, nArms, new UCB1TunedPolicy(nArms));
     151      Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(randSeed, nArms, new UCBNormalPolicy(nArms));
     152      Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
     153      Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
     154      Console.WriteLine("UCT(1)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 1));
     155      Console.WriteLine("UCT(2)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 2));
     156      Console.WriteLine("UCT(5)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 5));
     157      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
     158      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
     159      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
     160
     161      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
     162      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
     163      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
     164      Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
    152165       */
    153166    }
    154167
    155168
    156     private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) {
    157       TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
    158     }
    159     private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
    160       TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
    161     }
    162     private void TestPolicyGaussianMixture(Random globalRand, int nArms, IPolicy policy) {
    163       TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
    164     }
    165 
    166 
    167     private void TestPolicy(Random globalRand, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) {
     169    private void TestPolicyBernoulli(int randSeed, int nArms, IPolicy policy) {
     170      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
     171    }
     172    private void TestPolicyGaussian(int randSeed, int nArms, IPolicy policy) {
     173      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
     174    }
     175    private void TestPolicyGaussianMixture(int randSeed, int nArms, IPolicy policy) {
     176      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
     177    }
     178    private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IPolicy policy) {
     179      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions));
     180    }
     181
     182
     183    private void TestPolicy(int randSeed, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) {
    168184      var maxIt = 1E5;
    169       var reps = 30; // independent runs
     185      var reps = 10; // independent runs
    170186      var regretForIteration = new Dictionary<int, List<double>>();
    171187      var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
    172188      var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
     189      var globalRandom = new Random(randSeed);
     190      var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test
     191      var policyRandom = new Random(globalRandom.Next());
     192
    173193      // calculate statistics
    174194      for (int r = 0; r < reps; r++) {
    175195        var nextLogStep = 1;
    176         var b = banditFactory(new Random(globalRand.Next()), nArms);
    177         policy.Reset();
     196        var b = banditFactory(banditRandom, nArms);
    178197        var totalRegret = 0.0;
    179198        var totalPullsOfSuboptimalArmsExp = 0.0;
    180199        var totalPullsOfSuboptimalArmsMax = 0.0;
     200        var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray();
    181201        for (int i = 0; i <= maxIt; i++) {
    182           var selectedAction = policy.SelectAction();
     202          var selectedAction = policy.SelectAction(policyRandom, actionInfos);
    183203          var reward = b.Pull(selectedAction);
    184           policy.UpdateReward(selectedAction, reward);
     204          actionInfos[selectedAction].UpdateReward(reward);
    185205
    186206          // collect stats
Note: See TracChangeset for help on using the changeset viewer.