Changeset 11710
- Timestamp:
- 12/21/14 09:19:54 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 7 added
- 1 deleted
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/EpsGreedyPolicy.cs
r11708 r11710 24 24 var maxReward = double.NegativeInfinity; 25 25 int bestAction = -1; 26 int curAction = -1;27 foreach (var avgReward in sumReward.Zip(tries, (r, t) => r / (t + 1))) { // prevent division by zero28 curAction++;26 for (int i = 0; i < NumActions; i++) { 27 if (tries[i] == 0) return i; 28 var avgReward = sumReward[i] / tries[i]; 29 29 if (maxReward < avgReward) { 30 30 maxReward = avgReward; 31 bestAction = curAction;31 bestAction = i; 32 32 } 33 33 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r11708 r11710 40 40 </ItemGroup> 41 41 <ItemGroup> 42 <Compile Include="B andit.cs" />42 <Compile Include="BernoulliBandit.cs" /> 43 43 <Compile Include="BanditPolicy.cs" /> 44 <Compile Include="TruncatedNormalBandit.cs" /> 45 <Compile Include="UCBNormalPolicy.cs" /> 46 <Compile Include="UCB1TunedPolicy.cs" /> 47 <Compile Include="UCB1Policy.cs" /> 44 48 <Compile Include="EpsGreedyPolicy.cs" /> 45 49 <Compile Include="IPolicy.cs" /> 46 50 <Compile Include="Properties\AssemblyInfo.cs" /> 47 51 <Compile Include="RandomPolicy.cs" /> 52 </ItemGroup> 53 <ItemGroup> 54 <Folder Include="Bandits\" /> 55 <Folder Include="Policies\" /> 48 56 </ItemGroup> 49 57 <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs
r11708 r11710 10 10 public class TestBanditPolicies { 11 11 [TestMethod] 12 public void ComparePolicies () {12 public void ComparePoliciesForBernoulliBandit() { 13 13 System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture; 14 14 var globalRand = new Random(31415); … … 16 16 var nArms = 10; 17 17 Console.WriteLine("Random"); 18 TestPolicy(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10)); 18 TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10)); 19 Console.WriteLine("UCB1"); 20 TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(10)); 21 Console.WriteLine("UCB1Tuned"); 22 TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(10)); 23 Console.WriteLine("UCB1Normal"); 24 TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(10)); 19 25 Console.WriteLine("Eps(0.01)"); 20 TestPolicy (globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));26 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01)); 21 27 Console.WriteLine("Eps(0.05)"); 22 TestPolicy (globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));28 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05)); 23 29 Console.WriteLine("Eps(0.1)"); 24 TestPolicy (globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));30 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1)); 25 31 Console.WriteLine("Eps(0.2)"); 26 TestPolicy (globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));32 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2)); 27 33 Console.WriteLine("Eps(0.5)"); 28 TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5)); 34 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5)); 35 } 36 [TestMethod] 37 public void ComparePoliciesForNormalBandit() { 38 System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture; 39 var globalRand = new Random(31415); 40 var seedForPolicy = globalRand.Next(); 41 var nArms = 10; 42 Console.WriteLine("Random"); 43 TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10)); 44 Console.WriteLine("UCB1"); 45 TestPolicyNormal(globalRand, nArms, new UCB1Policy(10)); 46 Console.WriteLine("UCB1Tuned"); 47 TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(10)); 48 Console.WriteLine("UCB1Normal"); 49 TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(10)); 50 Console.WriteLine("Eps(0.01)"); 51 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01)); 52 Console.WriteLine("Eps(0.05)"); 53 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05)); 54 Console.WriteLine("Eps(0.1)"); 55 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1)); 56 Console.WriteLine("Eps(0.2)"); 57 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2)); 58 Console.WriteLine("Eps(0.5)"); 59 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5)); 29 60 } 30 61 31 private void TestPolicy (Random globalRand, int nArms, IPolicy policy) {62 private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) { 32 63 var maxIt = 1E6; 33 64 var reps = 10; // 10 independent runs … … 36 67 for (int r = 0; r < reps; r++) { 37 68 var nextLogStep = 1; 38 var b = new B andit(new Random(globalRand.Next()), 10);69 var b = new BernoulliBandit(new Random(globalRand.Next()), 10); 39 70 policy.Reset(); 40 71 var totalRegret = 0.0; … … 59 90 } 60 91 } 92 private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) { 93 var maxIt = 1E6; 94 var reps = 10; // 10 independent runs 95 var avgRegretForIteration = new Dictionary<int, double>(); 96 // calculate statistics 97 for (int r = 0; r < reps; r++) { 98 var nextLogStep = 1; 99 var b = new TruncatedNormalBandit(new Random(globalRand.Next()), 10); 100 policy.Reset(); 101 var totalRegret = 0.0; 102 103 for (int i = 0; i <= maxIt; i++) { 104 var selectedAction = policy.SelectAction(); 105 var reward = b.Pull(selectedAction); 106 totalRegret += b.OptimalExpectedReward - reward; 107 policy.UpdateReward(selectedAction, reward); 108 if (i == nextLogStep) { 109 nextLogStep *= 10; 110 if (!avgRegretForIteration.ContainsKey(i)) { 111 avgRegretForIteration.Add(i, 0.0); 112 } 113 avgRegretForIteration[i] += totalRegret / i; 114 } 115 } 116 } 117 // print 118 foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) { 119 Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret 120 } 121 } 122 61 123 } 62 124 }
Note: See TracChangeset
for help on using the changeset viewer.