- Timestamp:
- 08/17/15 19:13:19 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs
r11745 r12876 17 17 var nArms = 20; 18 18 19 // ThresholdAscent only works for rewards in [0..1] so far 20 21 Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 1))); 22 Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 0.1))); 23 Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 24 Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy()); 25 Console.WriteLine("UCBNormal"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy()); 26 Console.WriteLine("Random"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy()); 27 28 } 29 19 // some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1 20 TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy()); 21 TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy()); 22 //TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10)); 23 TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy()); 24 TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy()); 25 TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10)); 26 TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10)); 27 TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy()); 28 TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100)); 29 TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1)); 30 TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy()); 31 } 32 33 [TestMethod] 34 // test case I as described in Extreme Bandits paper 35 public void ComparePoliciesExtremeBandits1() { 36 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 37 var randSeed = 31415; 38 TestPolicyExtremeBandit1(randSeed, new RandomPolicy()); 39 TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy()); 40 TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000)); 41 TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1)); 42 // TestPolicyExtremeBandit1(randSeed, new ThresholdAscentPolicy()); 43 } 44 45 [TestMethod] 46 // test case II as described in Extreme Bandits paper 47 public void ComparePoliciesExtremeBandits2() { 48 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 49 var randSeed = 31415; 50 TestPolicyExtremeBandit2(randSeed, new RandomPolicy()); 51 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy()); 52 TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000)); 53 TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1)); 54 // TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy()); 55 } 30 56 31 57 [TestMethod] … … 189 215 } 190 216 private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) { 191 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions)); 217 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions, 0, 10)); 218 } 219 220 private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) { 221 TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); // 3 arms 222 } 223 private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) { 224 TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 })); // 3 arms 192 225 } 193 226 194 227 195 228 private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) { 196 var maxIt = 1E5; 197 var reps = 10; // independent runs 198 var regretForIteration = new Dictionary<int, List<double>>(); 199 var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>(); 200 var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>(); 229 var maxIt = 1E4; 230 var reps = 30; // independent runs 231 //var regretForIteration = new Dictionary<int, List<double>>(); 232 //var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>(); 233 //var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>(); 234 //var bestRewardForIteration = new Dictionary<int, List<double>>(); 201 235 var globalRandom = new Random(randSeed); 202 236 var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test … … 210 244 var totalPullsOfSuboptimalArmsExp = 0.0; 211 245 var totalPullsOfSuboptimalArmsMax = 0.0; 246 var bestReward = double.NegativeInfinity; 212 247 var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray(); 213 248 for (int i = 0; i <= maxIt; i++) { … … 220 255 if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++; 221 256 totalRegret += b.OptimalExpectedReward - reward; 222 223 if (i == nextLogStep) { 224 nextLogStep *= 2; 225 if (!regretForIteration.ContainsKey(i)) { 226 regretForIteration.Add(i, new List<double>()); 227 } 228 regretForIteration[i].Add(totalRegret / i); 229 230 if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) { 231 numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0); 232 } 233 numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp; 234 235 if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) { 236 numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0); 237 } 238 numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax; 257 bestReward = Math.Max(bestReward, reward); 258 259 if (i + 1 == nextLogStep) { 260 nextLogStep += 100; 261 //if (!regretForIteration.ContainsKey(i)) { 262 // regretForIteration.Add(i, new List<double>()); 263 //} 264 //regretForIteration[i].Add(totalRegret / i); 265 // 266 //if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) { 267 // numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0); 268 //} 269 //numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp; 270 // 271 //if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) { 272 // numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0); 273 //} 274 //numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax; 275 // 276 //if (!bestRewardForIteration.ContainsKey(i)) { 277 // bestRewardForIteration.Add(i, new List<double>()); 278 //} 279 //bestRewardForIteration[i].Add(bestReward); 280 Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}", 281 policy, i + 1, totalRegret, totalPullsOfSuboptimalArmsExp, totalPullsOfSuboptimalArmsMax, bestReward, 282 totalRegret / (i + 1), totalPullsOfSuboptimalArmsExp / (i + 1), totalPullsOfSuboptimalArmsMax / (i + 1)); 239 283 } 240 284 } 241 285 } 242 286 // print 243 foreach (var p in regretForIteration.Keys.OrderBy(k => k)) { 244 Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2}", 245 p, 246 regretForIteration[p].Average(), 247 regretForIteration[p].Min(), 248 regretForIteration[p].Max(), 249 numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps, 250 numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps 251 ); 252 } 287 //foreach (var p in regretForIteration.Keys.OrderBy(k => k)) { 288 // Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}", 289 // p, 290 // regretForIteration[p].Average(), 291 // regretForIteration[p].Min(), 292 // regretForIteration[p].Max(), 293 // numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps, 294 // numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps, 295 // string.Join(" ", bestRewardForIteration[p]) 296 // ); 297 //} 253 298 } 254 299
Note: See TracChangeset
for help on using the changeset viewer.