- Timestamp:
- 08/17/15 19:13:19 (9 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization-gkr
- Files:
-
- 6 added
- 16 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/GrammaticalOptimization.sln
r12099 r12876 1 1 2 2 Microsoft Visual Studio Solution File, Format Version 12.00 3 # Visual Studio 2013 4 VisualStudioVersion = 12.0.31101.0 5 MinimumVisualStudioVersion = 10.0.40219.1 3 # Visual Studio 2012 6 4 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Problems.GrammaticalOptimization", "HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj", "{CB9DCCF6-667E-4A13-B82D-DBD6B45A045E}" 7 5 EndProject … … 66 64 ProjectSection(SolutionItems) = preProject 67 65 local.testsettings = local.testsettings 66 Performance1.psess = Performance1.psess 68 67 EndProjectSection 69 68 EndProject … … 122 121 HideSolutionNode = FALSE 123 122 EndGlobalSection 123 GlobalSection(Performance) = preSolution 124 HasPerformanceSessions = true 125 EndGlobalSection 124 126 EndGlobal -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/DefaultPolicyActionInfo.cs
r12290 r12876 27 27 MaxReward = Math.Max(MaxReward, reward); 28 28 var delta = reward - avgValue; 29 //var alpha = 0.01; 30 var alpha = Math.Max(1.0/Tries, 0.01); 29 double alpha = 1.0 / Tries; 31 30 avgValue = avgValue + alpha * delta; 32 31 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r11851 r12876 31 31 </PropertyGroup> 32 32 <ItemGroup> 33 <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 34 <SpecificVersion>False</SpecificVersion> 35 <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath> 36 </Reference> 33 37 <Reference Include="System" /> 34 38 <Reference Include="System.Core" /> … … 36 40 <ItemGroup> 37 41 <Compile Include="ActionInfos\BernoulliPolicyActionInfo.cs" /> 42 <Compile Include="ActionInfos\ExtremeHunterActionInfo.cs" /> 38 43 <Compile Include="ActionInfos\DefaultPolicyActionInfo.cs" /> 39 44 <Compile Include="ActionInfos\MeanAndVariancePolicyActionInfo.cs" /> … … 45 50 <Compile Include="Policies\BoltzmannExplorationPolicy.cs" /> 46 51 <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs" /> 52 <Compile Include="Policies\IntervalEstimationPolicy.cs" /> 53 <Compile Include="Policies\ExtremeHunterPolicy.cs" /> 47 54 <Compile Include="Policies\EpsGreedyPolicy.cs" /> 48 55 <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs
r11806 r12876 9 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 10 public class ActiveLearningPolicy : IBanditPolicy { 11 public double MaxReward { get; private set; } 12 public ActiveLearningPolicy(double maxReward = 1.0) { 13 this.MaxReward = maxReward; 14 } 11 15 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 12 16 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); … … 29 33 q = aInfo.SumReward / aInfo.Tries; 30 34 var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries)); 31 u = q + 0.5* b;32 l = q - 0.5* b;35 u = q + MaxReward * b; 36 l = q - MaxReward * b; 33 37 } 34 38 bestActions.Add(aIdx); -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ChernoffIntervalEstimationPolicy.cs
r11806 r12876 35 35 var avgReward = aInfo.SumReward / aInfo.Tries; 36 36 37 // page 5 of "A simple distribution-free appr aoch to the max k-armed bandit problem"37 // page 5 of "A simple distribution-free approach to the max k-armed bandit problem" 38 38 // var alpha = Math.Log(2 * totalTries * k / delta); 39 39 double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs
r11806 r12876 10 10 // policy for k-armed bandit (see Auer et al. 2002) 11 11 public class UCB1Policy : IBanditPolicy { 12 public double MaxReward { get; private set; } 13 public UCB1Policy(double maxReward = 1.0) { 14 this.MaxReward = maxReward; 15 } 12 16 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 13 17 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); … … 24 28 } else { 25 29 26 q = aInfo.SumReward / aInfo.Tries + 0.5* Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);30 q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries); 27 31 } 28 32 if (q > bestQ) { -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs
r11832 r12876 9 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 10 // policy for k-armed bandit (see Auer et al. 2002) 11 // specific to Bernoulli distributed rewards 11 12 public class UCB1TunedPolicy : IBanditPolicy { 12 13 -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.GrammaticalOptimization/Solvers/SequentialSearch.cs
r12290 r12876 201 201 var children = n.children; 202 202 if (children == null || !children.Any()) break; 203 var values = children.Select(ch => policy.GetValue(ch.phrase)); 204 var maxValue = values.Max(); 205 if (maxValue == 0) maxValue = 1.0; 206 if (double.IsPositiveInfinity(maxValue)) maxValue = double.MaxValue; 207 203 var triesEnumerable = children.Select(ch => policy.GetTries(ch.phrase)); 204 double maxTries = triesEnumerable.Where(v => !double.IsInfinity(v)).DefaultIfEmpty(1).Max(); 205 maxTries = Math.Max(maxTries, 1.0); 208 206 // write phrases 209 207 foreach (var ch in children) { 210 SetColorForValue(policy.Get Value(ch.phrase) / maxValue);208 SetColorForValue(policy.GetTries(ch.phrase) / maxTries); 211 209 Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length))); 212 210 } … … 215 213 // write values 216 214 foreach (var ch in children) { 217 SetColorForValue(policy.GetValue(ch.phrase) / maxValue); 218 Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0); 215 SetColorForValue(policy.GetTries(ch.phrase) / maxTries); 216 if (!double.IsInfinity(policy.GetValue(ch.phrase))) 217 Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0); 218 else 219 Console.Write(" Inf "); 219 220 } 220 221 Console.WriteLine(); … … 222 223 // write tries 223 224 foreach (var ch in children) { 224 SetColorForValue(policy.Get Value(ch.phrase) / maxValue);225 SetColorForValue(policy.GetTries(ch.phrase) / maxTries); 225 226 Console.Write(" {0,4}", policy.GetTries(ch.phrase)); 226 227 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Common/HeuristicLab.Common.csproj
r11902 r12876 38 38 <Compile Include="ExpressionExtender.cs" /> 39 39 <Compile Include="Extensions.cs" /> 40 <Compile Include="Heap.cs" /> 40 41 <Compile Include="MostRecentlyUsedCache.cs" /> 41 42 <Compile Include="OnlineMeanAndVarianceEstimator.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/GaussianBandit.cs
r11849 r12876 12 12 public int OptimalExpectedRewardArm { get; private set; } 13 13 public int OptimalMaximalRewardArm { get; private set; } 14 14 public double MaxReward { get; private set; } 15 public double MinReward { get; private set; } 15 16 private readonly Random random; 16 17 private readonly double[] exp; 17 18 private readonly double[] stdDev; 18 public GaussianBandit(Random random, int nArms) { 19 public GaussianBandit(Random random, int nArms, double minReward = double.NegativeInfinity, double maxReward = double.PositiveInfinity) { 20 this.MaxReward = maxReward; 21 this.MinReward = minReward; 19 22 this.random = random; 20 23 this.NumArms = nArms; … … 31 34 OptimalExpectedRewardArm = i; 32 35 } 33 var q = alglib.invnormaldistribution(0.99 ) * stdDev[i] + exp[i];36 var q = alglib.invnormaldistribution(0.999) * stdDev[i] + exp[i]; 34 37 if (q > bestQ) { 35 38 bestQ = q; … … 39 42 } 40 43 41 // pulling an arm results in a truncatednormally distributed reward42 // with mean expReward[i] and std.dev 0.144 // pulling an arm results in a normally distributed reward 45 // with mean expReward[i] and std.dev 43 46 public double Pull(int arm) { 44 var z = Rand.RandNormal(random); 45 var x = z * stdDev[arm] + exp[arm]; 47 double x; 48 do { 49 var z = Rand.RandNormal(random); 50 x = z * stdDev[arm] + exp[arm]; 51 } while (x <= MinReward || x > MaxReward); 46 52 return x; 47 53 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/HeuristicLab.Problems.Bandits.csproj
r11981 r12876 41 41 <Compile Include="BanditHelper.cs" /> 42 42 <Compile Include="BernoulliBandit.cs" /> 43 <Compile Include="ParetoBandit.cs" /> 43 44 <Compile Include="GaussianBandit.cs" /> 44 45 <Compile Include="GaussianMixtureBandit.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/IBandit.cs
r11849 r12876 12 12 int OptimalMaximalRewardArm { get; } // arm which is optimal for optimization of maximal reward 13 13 14 double Pull(int arm); // pulling an arm returns a re gret14 double Pull(int arm); // pulling an arm returns a reward 15 15 } 16 16 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.csproj
r12391 r12876 47 47 </Reference> 48 48 <Reference Include="HeuristicLab.Random-3.3"> 49 <HintPath>..\..\..\ ..\..\Program Files\HeuristicLab 3.3\HeuristicLab.Random-3.3.dll</HintPath>49 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Random-3.3.dll</HintPath> 50 50 </Reference> 51 51 <Reference Include="System" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.GrammaticalOptimization/Problems/SantaFeAntProblem.cs
r12290 r12876 13 13 private const string grammarString = @" 14 14 G(A): 15 A -> l | r | m | ?(A)(A) | lA | rA | mA 15 A -> l | r | m | ?(A)(A) | lA | rA | mA |?(A)(A)A 16 16 "; 17 17 // for tree-based GP in HL we need a different grammar for the same language -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Main/Program.cs
r12298 r12876 57 57 // var alg = new SequentialSearch(problem, 25, random, 0, 58 58 // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy())); 59 var policy = new GenericPolicy(problem); 59 //var policy = new GenericPolicy(problem); 60 //var policy = new GenericGrammarPolicy(problem, new ExtremeHunterPolicy()); 61 //var policy = new GenericGrammarPolicy(problem, new UCB1Policy()); 62 //var policy = new GenericGrammarPolicy(problem, new ActiveLearningPolicy(0.1)); 63 var policy = new GenericGrammarPolicy(problem, new ExtremeHunterPolicy(1.0E-2, 1E-2, 1)); 60 64 var alg = new SequentialSearch(problem, 23, random, 0, 61 65 policy); … … 78 82 Console.SetCursorPosition(0, 0); 79 83 Console.WriteLine(iterations); 80 WriteAlleleStatistics();84 //WriteAlleleStatistics(); 81 85 Console.WriteLine(globalStatistics.BestSentenceQuality); 82 86 Console.WriteLine(globalStatistics.BestSentence); 83 87 Console.WriteLine(globalStatistics); 84 //alg.PrintStats();85 policy.PrintStats();88 alg.PrintStats(); 89 //policy.PrintStats(); 86 90 //ResetAlleleStatistics(); 87 91 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs
r11745 r12876 17 17 var nArms = 20; 18 18 19 // ThresholdAscent only works for rewards in [0..1] so far 20 21 Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 1))); 22 Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 0.1))); 23 Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 24 Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy()); 25 Console.WriteLine("UCBNormal"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy()); 26 Console.WriteLine("Random"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy()); 27 28 } 29 19 // some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1 20 TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy()); 21 TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy()); 22 //TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10)); 23 TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy()); 24 TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy()); 25 TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10)); 26 TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10)); 27 TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy()); 28 TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100)); 29 TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1)); 30 TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy()); 31 } 32 33 [TestMethod] 34 // test case I as described in Extreme Bandits paper 35 public void ComparePoliciesExtremeBandits1() { 36 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 37 var randSeed = 31415; 38 TestPolicyExtremeBandit1(randSeed, new RandomPolicy()); 39 TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy()); 40 TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000)); 41 TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1)); 42 // TestPolicyExtremeBandit1(randSeed, new ThresholdAscentPolicy()); 43 } 44 45 [TestMethod] 46 // test case II as described in Extreme Bandits paper 47 public void ComparePoliciesExtremeBandits2() { 48 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 49 var randSeed = 31415; 50 TestPolicyExtremeBandit2(randSeed, new RandomPolicy()); 51 TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy()); 52 TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000)); 53 TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1)); 54 // TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy()); 55 } 30 56 31 57 [TestMethod] … … 189 215 } 190 216 private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) { 191 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions)); 217 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions, 0, 10)); 218 } 219 220 private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) { 221 TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); // 3 arms 222 } 223 private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) { 224 TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 })); // 3 arms 192 225 } 193 226 194 227 195 228 private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) { 196 var maxIt = 1E5; 197 var reps = 10; // independent runs 198 var regretForIteration = new Dictionary<int, List<double>>(); 199 var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>(); 200 var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>(); 229 var maxIt = 1E4; 230 var reps = 30; // independent runs 231 //var regretForIteration = new Dictionary<int, List<double>>(); 232 //var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>(); 233 //var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>(); 234 //var bestRewardForIteration = new Dictionary<int, List<double>>(); 201 235 var globalRandom = new Random(randSeed); 202 236 var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test … … 210 244 var totalPullsOfSuboptimalArmsExp = 0.0; 211 245 var totalPullsOfSuboptimalArmsMax = 0.0; 246 var bestReward = double.NegativeInfinity; 212 247 var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray(); 213 248 for (int i = 0; i <= maxIt; i++) { … … 220 255 if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++; 221 256 totalRegret += b.OptimalExpectedReward - reward; 222 223 if (i == nextLogStep) { 224 nextLogStep *= 2; 225 if (!regretForIteration.ContainsKey(i)) { 226 regretForIteration.Add(i, new List<double>()); 227 } 228 regretForIteration[i].Add(totalRegret / i); 229 230 if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) { 231 numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0); 232 } 233 numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp; 234 235 if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) { 236 numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0); 237 } 238 numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax; 257 bestReward = Math.Max(bestReward, reward); 258 259 if (i + 1 == nextLogStep) { 260 nextLogStep += 100; 261 //if (!regretForIteration.ContainsKey(i)) { 262 // regretForIteration.Add(i, new List<double>()); 263 //} 264 //regretForIteration[i].Add(totalRegret / i); 265 // 266 //if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) { 267 // numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0); 268 //} 269 //numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp; 270 // 271 //if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) { 272 // numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0); 273 //} 274 //numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax; 275 // 276 //if (!bestRewardForIteration.ContainsKey(i)) { 277 // bestRewardForIteration.Add(i, new List<double>()); 278 //} 279 //bestRewardForIteration[i].Add(bestReward); 280 Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}", 281 policy, i + 1, totalRegret, totalPullsOfSuboptimalArmsExp, totalPullsOfSuboptimalArmsMax, bestReward, 282 totalRegret / (i + 1), totalPullsOfSuboptimalArmsExp / (i + 1), totalPullsOfSuboptimalArmsMax / (i + 1)); 239 283 } 240 284 } 241 285 } 242 286 // print 243 foreach (var p in regretForIteration.Keys.OrderBy(k => k)) { 244 Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2}", 245 p, 246 regretForIteration[p].Average(), 247 regretForIteration[p].Min(), 248 regretForIteration[p].Max(), 249 numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps, 250 numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps 251 ); 252 } 287 //foreach (var p in regretForIteration.Keys.OrderBy(k => k)) { 288 // Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}", 289 // p, 290 // regretForIteration[p].Average(), 291 // regretForIteration[p].Min(), 292 // regretForIteration[p].Max(), 293 // numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps, 294 // numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps, 295 // string.Join(" ", bestRewardForIteration[p]) 296 // ); 297 //} 253 298 } 254 299
Note: See TracChangeset
for help on using the changeset viewer.