Changeset 12893 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs
- Timestamp:
- 08/24/15 13:56:27 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs
r12876 r12893 37 37 var randSeed = 31415; 38 38 TestPolicyExtremeBandit1(randSeed, new RandomPolicy()); 39 TestPolicyExtremeBandit1(randSeed, new SingleArmPolicy(1)); 39 40 TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy()); 40 41 TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000)); 42 TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1000)); 43 TestPolicyExtremeBandit1(randSeed, new UCB1Policy(100)); 44 TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10)); 45 TestPolicyExtremeBandit1(randSeed, new UCB1Policy(2)); 46 TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1)); 47 TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.5)); 48 TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.1)); 41 49 TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1)); 42 // TestPolicyExtremeBandit1(randSeed, new ThresholdAscentPolicy()); 50 TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.05)); 51 TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.01)); 43 52 } 44 53 … … 48 57 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 49 58 var randSeed = 31415; 50 TestPolicyExtremeBandit2(randSeed, new RandomPolicy()); 51 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy()); 52 TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000)); 53 TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1)); 54 // TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy()); 59 //TestPolicyExtremeBandit2(randSeed, new RandomPolicy()); 60 //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(0)); 61 //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(1)); 62 //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(2)); 63 // TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy()); 64 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 30)); 65 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 30)); 66 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 30)); 67 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 30)); 68 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 100)); 69 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 100)); 70 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 100)); 71 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 100)); 72 // TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000)); 73 //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1000)); 74 //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(100)); 75 //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10)); 76 //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(2)); 77 //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1)); 78 //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.5)); 79 //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.1)); 80 //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1)); 81 //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.05)); 82 //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.01)); 83 //TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy()); 84 } 85 86 [TestMethod] 87 // my own test case for ExtremeHunter 88 // using truncated normal distributions 89 public void ComparePoliciesExtremeBandits3() { 90 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 91 var randSeed = 31415; 92 TestPolicyExtremeBandit3(randSeed, new RandomPolicy()); 93 TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(0)); 94 TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(1)); 95 TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(2)); 96 TestPolicyExtremeBandit3(randSeed, new ExtremeHunterPolicy()); 97 TestPolicyExtremeBandit3(randSeed, new UCB1Policy(3)); 98 TestPolicyExtremeBandit3(randSeed, new EpsGreedyPolicy(0.1)); 99 } 100 101 [TestMethod] 102 // a unit test to experiment with bandit policies for completing a GP sentence 103 public void ComparePoliciesSentenceCompletionProblem() { 104 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 105 var randSeed = 31415; 106 107 108 Func<Random, IBandit> sentenceCompletionBanditFactory = (banditRandom) => { 109 var problem = new SymbolicRegressionPoly10Problem(); 110 return new SentenceBandit(banditRandom, problem, "a*b+c*d+e*f+E", 23); 111 }; 112 113 // ignore number of arms 114 115 // var b = sentenceCompletionBanditFactory(new Random()); 116 // all reference policies (always pulling one arm) 117 // for (int i = 0; i < b.NumArms; i++) { 118 // TestPolicy(randSeed, new SingleArmPolicy(i), sentenceCompletionBanditFactory); 119 // } 120 121 // for the completition of a*b+c*d+e*f+a*g*i+E the arms 12, 15, and 19 are optimal 122 TestPolicy(randSeed, new SingleArmPolicy(12), sentenceCompletionBanditFactory); 123 124 TestPolicy(randSeed, new RandomPolicy(), sentenceCompletionBanditFactory); 125 126 TestPolicy(randSeed, new ExtremeHunterPolicy(), sentenceCompletionBanditFactory); 127 TestPolicy(randSeed, new ExtremeHunterPolicy(D: 0.5), sentenceCompletionBanditFactory); 128 TestPolicy(randSeed, new UCB1Policy(3), sentenceCompletionBanditFactory); 129 TestPolicy(randSeed, new UCB1Policy(1), sentenceCompletionBanditFactory); 130 TestPolicy(randSeed, new UCB1Policy(0.5), sentenceCompletionBanditFactory); 131 TestPolicy(randSeed, new ThresholdAscentPolicy(), sentenceCompletionBanditFactory); 132 TestPolicy(randSeed, new EpsGreedyPolicy(0.1), sentenceCompletionBanditFactory); 55 133 } 56 134 … … 206 284 207 285 private void TestPolicyBernoulli(int randSeed, int nArms, IBanditPolicy policy) { 208 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));286 TestPolicy(randSeed, policy, (banditRandom) => new BernoulliBandit(banditRandom, nArms)); 209 287 } 210 288 private void TestPolicyGaussian(int randSeed, int nArms, IBanditPolicy policy) { 211 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));289 TestPolicy(randSeed, policy, (banditRandom) => new TruncatedNormalBandit(banditRandom, nArms)); 212 290 } 213 291 private void TestPolicyGaussianMixture(int randSeed, int nArms, IBanditPolicy policy) { 214 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));292 TestPolicy(randSeed, policy, (banditRandom) => new GaussianMixtureBandit(banditRandom, nArms)); 215 293 } 216 294 private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) { 217 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions, 0, 10));295 TestPolicy(randSeed, policy, (banditRandom) => new GaussianBandit(banditRandom, nArms, 0, 10)); 218 296 } 219 297 220 298 private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) { 221 TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); // 3 arms299 TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); 222 300 } 223 301 private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) { 224 TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 })); // 3 arms 225 } 226 227 228 private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) { 229 var maxIt = 1E4; 302 TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 }, 0, 1)); 303 } 304 private void TestPolicyExtremeBandit3(int randSeed, IBanditPolicy policy) { 305 TestPolicy(randSeed, policy, (banditRandom) => new Bandit(banditRandom, new IModel[] 306 { 307 new GammaModel(10, 1), // exp=10, var=10 308 new GammaModel(6, 2), // exp=12, var=24 309 new GammaModel(3, 3), // exp= 9, var=27 310 }, 1, 2)); 311 } 312 313 314 private void TestPolicy(int randSeed, IBanditPolicy policy, Func<Random, IBandit> banditFactory) { 315 var maxIt = 1E5; 230 316 var reps = 30; // independent runs 231 317 //var regretForIteration = new Dictionary<int, List<double>>(); … … 240 326 for (int r = 0; r < reps; r++) { 241 327 var nextLogStep = 1; 242 var b = banditFactory(banditRandom , nArms);243 var totalRe gret= 0.0;244 var totalPullsOfSuboptimalArmsExp = 0.0;245 var totalPullsOfSuboptimalArmsMax = 0.0;246 var bestReward = double.NegativeInfinity;247 var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray();248 for (int i = 0; i <= maxIt ; i++) {328 var b = banditFactory(banditRandom); 329 var totalReward = 0.0; 330 int totalPullsOfOptimalArmExp = 0; 331 int totalPullsOfOptimalArmMax = 0; 332 var maxReward = double.NegativeInfinity; 333 var actionInfos = Enumerable.Range(0, b.NumArms).Select(_ => policy.CreateActionInfo()).ToArray(); 334 for (int i = 0; i <= maxIt + 1; i++) { 249 335 var selectedAction = policy.SelectAction(policyRandom, actionInfos); 250 336 var reward = b.Pull(selectedAction); … … 252 338 253 339 // collect stats 254 if (selectedAction != b.OptimalExpectedRewardArm) totalPullsOfSuboptimalArmsExp++;255 if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++;256 totalRe gret += b.OptimalExpectedReward -reward;257 bestReward = Math.Max(bestReward, reward);258 259 if (i + 1== nextLogStep) {260 nextLogStep += 100;340 if (selectedAction == b.OptimalExpectedRewardArm) totalPullsOfOptimalArmExp++; 341 if (selectedAction == b.OptimalMaximalRewardArm) totalPullsOfOptimalArmMax++; 342 totalReward += reward; 343 maxReward = Math.Max(maxReward, reward); 344 345 if (i == nextLogStep) { 346 nextLogStep += 500; 261 347 //if (!regretForIteration.ContainsKey(i)) { 262 348 // regretForIteration.Add(i, new List<double>()); … … 279 365 //bestRewardForIteration[i].Add(bestReward); 280 366 Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}", 281 policy, i + 1, totalRegret, totalPullsOfSuboptimalArmsExp, totalPullsOfSuboptimalArmsMax, bestReward,282 totalRe gret / (i + 1), totalPullsOfSuboptimalArmsExp / (i + 1), totalPullsOfSuboptimalArmsMax / (i + 1));367 policy, i, totalReward, totalPullsOfOptimalArmExp, totalPullsOfOptimalArmMax, maxReward, 368 totalReward / i, totalPullsOfOptimalArmExp / (double)i, totalPullsOfOptimalArmMax / (double)i); 283 369 } 284 370 }
Note: See TracChangeset
for help on using the changeset viewer.