Changeset 11730 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs
- Timestamp:
- 01/02/15 16:08:21 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs
r11727 r11730 4 4 using System.Globalization; 5 5 using HeuristicLab.Algorithms.Bandits; 6 using HeuristicLab.Algorithms.Bandits.Models; 6 7 using Microsoft.VisualStudio.TestTools.UnitTesting; 7 8 … … 9 10 [TestClass] 10 11 public class TestBanditPolicies { 12 13 11 14 [TestMethod] 12 15 public void ComparePoliciesForBernoulliBandit() { 13 System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture; 16 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 17 14 18 var globalRand = new Random(31415); 15 19 var seedForPolicy = globalRand.Next(); 16 var nArms = 10;20 var nArms = 20; 17 21 //Console.WriteLine("Exp3 (gamma=0.01)"); 18 22 //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1)); 19 23 //Console.WriteLine("Exp3 (gamma=0.05)"); 20 24 //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1)); 21 Console.WriteLine("Thompson (Bernoulli)"); 22 TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));25 Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms)); 26 Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new BernoulliModel(nArms))); 23 27 Console.WriteLine("Random"); 24 28 TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); … … 39 43 //Console.WriteLine("Eps(0.5)"); 40 44 //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5)); 41 } 45 Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.1)); 46 Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.5)); 47 Console.WriteLine("UCT(1) "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 1)); 48 Console.WriteLine("UCT(2) "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 2)); 49 Console.WriteLine("UCT(5) "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 5)); 50 Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1)); 51 Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5)); 52 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1)); 53 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10)); 54 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100)); 55 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01)); 56 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05)); 57 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1)); 58 59 // not applicable to bernoulli rewards 60 //Console.WriteLine("ThresholdAscent(10, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); 61 //Console.WriteLine("ThresholdAscent(10, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05)); 62 //Console.WriteLine("ThresholdAscent(10, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1)); 63 //Console.WriteLine("ThresholdAscent(100, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01)); 64 //Console.WriteLine("ThresholdAscent(100, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05)); 65 //Console.WriteLine("ThresholdAscent(100, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1)); 66 //Console.WriteLine("ThresholdAscent(1000, 0.01)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01)); 67 //Console.WriteLine("ThresholdAscent(1000, 0.05)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05)); 68 //Console.WriteLine("ThresholdAscent(1000, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1)); 69 } 70 42 71 [TestMethod] 43 72 public void ComparePoliciesForNormalBandit() { 44 System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture; 73 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 74 45 75 var globalRand = new Random(31415); 46 76 var seedForPolicy = globalRand.Next(); 47 var nArms = 10; 48 Console.WriteLine("Thompson (Gaussian)"); 49 TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms)); 50 Console.WriteLine("Random"); 51 TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); 52 Console.WriteLine("UCB1"); 53 TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms)); 54 Console.WriteLine("UCB1Tuned"); 55 TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms)); 56 Console.WriteLine("UCB1Normal"); 57 TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms)); 77 var nArms = 20; 78 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true)); 79 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms)); 80 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyNormal(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1))); 81 /* 82 Console.WriteLine("Random"); TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); 83 Console.WriteLine("UCB1"); TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms)); 84 Console.WriteLine("UCB1Tuned"); TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms)); 85 Console.WriteLine("UCB1Normal"); TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms)); 58 86 //Console.WriteLine("Exp3 (gamma=0.01)"); 59 87 //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01)); 60 88 //Console.WriteLine("Exp3 (gamma=0.05)"); 61 89 //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05)); 62 Console.WriteLine("Eps(0.01)"); 63 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); 64 Console.WriteLine("Eps(0.05)"); 65 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); 90 Console.WriteLine("Eps(0.01)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); 91 Console.WriteLine("Eps(0.05)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); 66 92 //Console.WriteLine("Eps(0.1)"); 67 93 //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1)); … … 70 96 //Console.WriteLine("Eps(0.5)"); 71 97 //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5)); 72 } 98 Console.WriteLine("UCT(0.1)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.1)); 99 Console.WriteLine("UCT(0.5)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.5)); 100 Console.WriteLine("UCT(1) "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 1)); 101 Console.WriteLine("UCT(2) "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 2)); 102 Console.WriteLine("UCT(5) "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 5)); 103 Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1)); 104 Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5)); 105 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1)); 106 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10)); 107 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100)); 108 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01)); 109 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05)); 110 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1)); 111 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); 112 Console.WriteLine("ThresholdAscent(10,0.05) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05)); 113 Console.WriteLine("ThresholdAscent(10,0.1) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1)); 114 Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01)); 115 Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05)); 116 Console.WriteLine("ThresholdAscent(100,0.1) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1)); 117 Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01)); 118 Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05)); 119 Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1)); 120 */ 121 } 122 123 [TestMethod] 124 public void ComparePoliciesForGaussianMixtureBandit() { 125 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 126 127 var globalRand = new Random(31415); 128 var seedForPolicy = globalRand.Next(); 129 var nArms = 20; 130 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true)); 131 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms)); 132 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1))); 133 134 /* 135 Console.WriteLine("Random"); TestPolicyGaussianMixture(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); 136 Console.WriteLine("UCB1"); TestPolicyGaussianMixture(globalRand, nArms, new UCB1Policy(nArms)); 137 Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(globalRand, nArms, new UCB1TunedPolicy(nArms)); 138 Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(globalRand, nArms, new UCBNormalPolicy(nArms)); 139 Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); 140 Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); 141 Console.WriteLine("UCT(1) "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 1)); 142 Console.WriteLine("UCT(2) "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 2)); 143 Console.WriteLine("UCT(5) "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 5)); 144 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1)); 145 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10)); 146 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100)); 147 148 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); 149 Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01)); 150 Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01)); 151 Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01)); 152 */ 153 } 154 73 155 74 156 private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) { 75 var maxIt = 1E6; 76 var reps = 10; // 10 independent runs 77 var avgRegretForIteration = new Dictionary<int, double>(); 157 TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions)); 158 } 159 private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) { 160 TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions)); 161 } 162 private void TestPolicyGaussianMixture(Random globalRand, int nArms, IPolicy policy) { 163 TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions)); 164 } 165 166 167 private void TestPolicy(Random globalRand, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) { 168 var maxIt = 1E5; 169 var reps = 30; // independent runs 170 var regretForIteration = new Dictionary<int, List<double>>(); 171 var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>(); 172 var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>(); 78 173 // calculate statistics 79 174 for (int r = 0; r < reps; r++) { 80 175 var nextLogStep = 1; 81 var b = new BernoulliBandit(new Random(globalRand.Next()), 10);176 var b = banditFactory(new Random(globalRand.Next()), nArms); 82 177 policy.Reset(); 83 178 var totalRegret = 0.0; 84 179 var totalPullsOfSuboptimalArmsExp = 0.0; 180 var totalPullsOfSuboptimalArmsMax = 0.0; 85 181 for (int i = 0; i <= maxIt; i++) { 86 182 var selectedAction = policy.SelectAction(); 87 183 var reward = b.Pull(selectedAction); 184 policy.UpdateReward(selectedAction, reward); 185 186 // collect stats 187 if (selectedAction != b.OptimalExpectedRewardArm) totalPullsOfSuboptimalArmsExp++; 188 if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++; 88 189 totalRegret += b.OptimalExpectedReward - reward; 89 policy.UpdateReward(selectedAction, reward); 190 90 191 if (i == nextLogStep) { 91 nextLogStep *= 10;92 if (! avgRegretForIteration.ContainsKey(i)) {93 avgRegretForIteration.Add(i, 0.0);192 nextLogStep *= 2; 193 if (!regretForIteration.ContainsKey(i)) { 194 regretForIteration.Add(i, new List<double>()); 94 195 } 95 avgRegretForIteration[i] += totalRegret / i; 196 regretForIteration[i].Add(totalRegret / i); 197 198 if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) { 199 numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0); 200 } 201 numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp; 202 203 if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) { 204 numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0); 205 } 206 numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax; 96 207 } 97 208 } 98 209 } 99 210 // print 100 foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) { 101 Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret 102 } 103 } 104 private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) { 105 var maxIt = 1E6; 106 var reps = 10; // 10 independent runs 107 var avgRegretForIteration = new Dictionary<int, double>(); 108 // calculate statistics 109 for (int r = 0; r < reps; r++) { 110 var nextLogStep = 1; 111 var b = new TruncatedNormalBandit(new Random(globalRand.Next()), 10); 112 policy.Reset(); 113 var totalRegret = 0.0; 114 115 for (int i = 0; i <= maxIt; i++) { 116 var selectedAction = policy.SelectAction(); 117 var reward = b.Pull(selectedAction); 118 totalRegret += b.OptimalExpectedReward - reward; 119 policy.UpdateReward(selectedAction, reward); 120 if (i == nextLogStep) { 121 nextLogStep *= 10; 122 if (!avgRegretForIteration.ContainsKey(i)) { 123 avgRegretForIteration.Add(i, 0.0); 124 } 125 avgRegretForIteration[i] += totalRegret / i; 126 } 127 } 128 } 129 // print 130 foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) { 131 Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret 211 foreach (var p in regretForIteration.Keys.OrderBy(k => k)) { 212 Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2}", 213 p, 214 regretForIteration[p].Average(), 215 regretForIteration[p].Min(), 216 regretForIteration[p].Max(), 217 numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps, 218 numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps 219 ); 132 220 } 133 221 }
Note: See TracChangeset
for help on using the changeset viewer.