Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/02/15 16:08:21 (9 years ago)
Author:
gkronber
Message:

#2283: several major extensions for grammatical optimization

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

    r11727 r11730  
    44using System.Globalization;
    55using HeuristicLab.Algorithms.Bandits;
     6using HeuristicLab.Algorithms.Bandits.Models;
    67using Microsoft.VisualStudio.TestTools.UnitTesting;
    78
     
    910  [TestClass]
    1011  public class TestBanditPolicies {
     12
     13
    1114    [TestMethod]
    1215    public void ComparePoliciesForBernoulliBandit() {
    13       System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
     16      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     17
    1418      var globalRand = new Random(31415);
    1519      var seedForPolicy = globalRand.Next();
    16       var nArms = 10;
     20      var nArms = 20;
    1721      //Console.WriteLine("Exp3 (gamma=0.01)");
    1822      //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
    1923      //Console.WriteLine("Exp3 (gamma=0.05)");
    2024      //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
    21       Console.WriteLine("Thompson (Bernoulli)");
    22       TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
     25      Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
     26      Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new BernoulliModel(nArms)));
    2327      Console.WriteLine("Random");
    2428      TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
     
    3943      //Console.WriteLine("Eps(0.5)");
    4044      //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
    41     }
     45      Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.1));
     46      Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.5));
     47      Console.WriteLine("UCT(1)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 1));
     48      Console.WriteLine("UCT(2)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 2));
     49      Console.WriteLine("UCT(5)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 5));
     50      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
     51      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
     52      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
     53      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
     54      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
     55      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
     56      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
     57      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
     58
     59      // not applicable to bernoulli rewards
     60      //Console.WriteLine("ThresholdAscent(10, 0.01)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
     61      //Console.WriteLine("ThresholdAscent(10, 0.05)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
     62      //Console.WriteLine("ThresholdAscent(10, 0.1)   "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
     63      //Console.WriteLine("ThresholdAscent(100, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
     64      //Console.WriteLine("ThresholdAscent(100, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
     65      //Console.WriteLine("ThresholdAscent(100, 0.1)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
     66      //Console.WriteLine("ThresholdAscent(1000, 0.01)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
     67      //Console.WriteLine("ThresholdAscent(1000, 0.05)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
     68      //Console.WriteLine("ThresholdAscent(1000, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
     69    }
     70
    4271    [TestMethod]
    4372    public void ComparePoliciesForNormalBandit() {
    44       System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
     73      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     74
    4575      var globalRand = new Random(31415);
    4676      var seedForPolicy = globalRand.Next();
    47       var nArms = 10;
    48       Console.WriteLine("Thompson (Gaussian)");
    49       TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
    50       Console.WriteLine("Random");
    51       TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
    52       Console.WriteLine("UCB1");
    53       TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
    54       Console.WriteLine("UCB1Tuned");
    55       TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
    56       Console.WriteLine("UCB1Normal");
    57       TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
     77      var nArms = 20;
     78      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true));
     79      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
     80      Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyNormal(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1)));
     81      /*
     82      Console.WriteLine("Random"); TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
     83      Console.WriteLine("UCB1"); TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
     84      Console.WriteLine("UCB1Tuned"); TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
     85      Console.WriteLine("UCB1Normal"); TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
    5886      //Console.WriteLine("Exp3 (gamma=0.01)");
    5987      //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
    6088      //Console.WriteLine("Exp3 (gamma=0.05)");
    6189      //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
    62       Console.WriteLine("Eps(0.01)");
    63       TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
    64       Console.WriteLine("Eps(0.05)");
    65       TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
     90      Console.WriteLine("Eps(0.01)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
     91      Console.WriteLine("Eps(0.05)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
    6692      //Console.WriteLine("Eps(0.1)");
    6793      //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
     
    7096      //Console.WriteLine("Eps(0.5)");
    7197      //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
    72     }
     98      Console.WriteLine("UCT(0.1)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.1));
     99      Console.WriteLine("UCT(0.5)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.5));
     100      Console.WriteLine("UCT(1)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 1));
     101      Console.WriteLine("UCT(2)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 2));
     102      Console.WriteLine("UCT(5)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 5));
     103      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
     104      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
     105      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
     106      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
     107      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
     108      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
     109      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
     110      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
     111      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
     112      Console.WriteLine("ThresholdAscent(10,0.05)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
     113      Console.WriteLine("ThresholdAscent(10,0.1)   "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
     114      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
     115      Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
     116      Console.WriteLine("ThresholdAscent(100,0.1)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
     117      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
     118      Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
     119      Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
     120       */
     121    }
     122
     123    [TestMethod]
     124    public void ComparePoliciesForGaussianMixtureBandit() {
     125      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     126
     127      var globalRand = new Random(31415);
     128      var seedForPolicy = globalRand.Next();
     129      var nArms = 20;
     130      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true));
     131      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
     132      Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1)));
     133
     134      /*
     135      Console.WriteLine("Random"); TestPolicyGaussianMixture(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
     136      Console.WriteLine("UCB1"); TestPolicyGaussianMixture(globalRand, nArms, new UCB1Policy(nArms));
     137      Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(globalRand, nArms, new UCB1TunedPolicy(nArms));
     138      Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(globalRand, nArms, new UCBNormalPolicy(nArms));
     139      Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
     140      Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
     141      Console.WriteLine("UCT(1)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 1));
     142      Console.WriteLine("UCT(2)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 2));
     143      Console.WriteLine("UCT(5)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 5));
     144      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
     145      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
     146      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
     147
     148      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
     149      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
     150      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
     151      Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
     152       */
     153    }
     154
    73155
    74156    private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) {
    75       var maxIt = 1E6;
    76       var reps = 10; // 10 independent runs
    77       var avgRegretForIteration = new Dictionary<int, double>();
     157      TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
     158    }
     159    private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
     160      TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
     161    }
     162    private void TestPolicyGaussianMixture(Random globalRand, int nArms, IPolicy policy) {
     163      TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
     164    }
     165
     166
     167    private void TestPolicy(Random globalRand, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) {
     168      var maxIt = 1E5;
     169      var reps = 30; // independent runs
     170      var regretForIteration = new Dictionary<int, List<double>>();
     171      var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
     172      var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
    78173      // calculate statistics
    79174      for (int r = 0; r < reps; r++) {
    80175        var nextLogStep = 1;
    81         var b = new BernoulliBandit(new Random(globalRand.Next()), 10);
     176        var b = banditFactory(new Random(globalRand.Next()), nArms);
    82177        policy.Reset();
    83178        var totalRegret = 0.0;
    84 
     179        var totalPullsOfSuboptimalArmsExp = 0.0;
     180        var totalPullsOfSuboptimalArmsMax = 0.0;
    85181        for (int i = 0; i <= maxIt; i++) {
    86182          var selectedAction = policy.SelectAction();
    87183          var reward = b.Pull(selectedAction);
     184          policy.UpdateReward(selectedAction, reward);
     185
     186          // collect stats
     187          if (selectedAction != b.OptimalExpectedRewardArm) totalPullsOfSuboptimalArmsExp++;
     188          if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++;
    88189          totalRegret += b.OptimalExpectedReward - reward;
    89           policy.UpdateReward(selectedAction, reward);
     190
    90191          if (i == nextLogStep) {
    91             nextLogStep *= 10;
    92             if (!avgRegretForIteration.ContainsKey(i)) {
    93               avgRegretForIteration.Add(i, 0.0);
     192            nextLogStep *= 2;
     193            if (!regretForIteration.ContainsKey(i)) {
     194              regretForIteration.Add(i, new List<double>());
    94195            }
    95             avgRegretForIteration[i] += totalRegret / i;
     196            regretForIteration[i].Add(totalRegret / i);
     197
     198            if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
     199              numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
     200            }
     201            numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
     202
     203            if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
     204              numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
     205            }
     206            numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
    96207          }
    97208        }
    98209      }
    99210      // print
    100       foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
    101         Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
    102       }
    103     }
    104     private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
    105       var maxIt = 1E6;
    106       var reps = 10; // 10 independent runs
    107       var avgRegretForIteration = new Dictionary<int, double>();
    108       // calculate statistics
    109       for (int r = 0; r < reps; r++) {
    110         var nextLogStep = 1;
    111         var b = new TruncatedNormalBandit(new Random(globalRand.Next()), 10);
    112         policy.Reset();
    113         var totalRegret = 0.0;
    114 
    115         for (int i = 0; i <= maxIt; i++) {
    116           var selectedAction = policy.SelectAction();
    117           var reward = b.Pull(selectedAction);
    118           totalRegret += b.OptimalExpectedReward - reward;
    119           policy.UpdateReward(selectedAction, reward);
    120           if (i == nextLogStep) {
    121             nextLogStep *= 10;
    122             if (!avgRegretForIteration.ContainsKey(i)) {
    123               avgRegretForIteration.Add(i, 0.0);
    124             }
    125             avgRegretForIteration[i] += totalRegret / i;
    126           }
    127         }
    128       }
    129       // print
    130       foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
    131         Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
     211      foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
     212        Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2}",
     213          p,
     214          regretForIteration[p].Average(),
     215          regretForIteration[p].Min(),
     216          regretForIteration[p].Max(),
     217          numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
     218          numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps
     219          );
    132220      }
    133221    }
Note: See TracChangeset for help on using the changeset viewer.