Changeset 12876


Ignore:
Timestamp:
08/17/15 19:13:19 (5 years ago)
Author:
gkronber
Message:

#2283: implemented first crude version of extreme hunter algorithm in branch

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr
Files:
6 added
16 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/GrammaticalOptimization.sln

    r12099 r12876  
    11
    22Microsoft Visual Studio Solution File, Format Version 12.00
    3 # Visual Studio 2013
    4 VisualStudioVersion = 12.0.31101.0
    5 MinimumVisualStudioVersion = 10.0.40219.1
     3# Visual Studio 2012
    64Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Problems.GrammaticalOptimization", "HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj", "{CB9DCCF6-667E-4A13-B82D-DBD6B45A045E}"
    75EndProject
     
    6664  ProjectSection(SolutionItems) = preProject
    6765    local.testsettings = local.testsettings
     66    Performance1.psess = Performance1.psess
    6867  EndProjectSection
    6968EndProject
     
    122121    HideSolutionNode = FALSE
    123122  EndGlobalSection
     123  GlobalSection(Performance) = preSolution
     124    HasPerformanceSessions = true
     125  EndGlobalSection
    124126EndGlobal
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/DefaultPolicyActionInfo.cs

    r12290 r12876  
    2727      MaxReward = Math.Max(MaxReward, reward);
    2828      var delta = reward - avgValue;
    29       //var alpha = 0.01;
    30       var alpha = Math.Max(1.0/Tries, 0.01);
     29      double alpha = 1.0 / Tries;
    3130      avgValue = avgValue + alpha * delta;
    3231    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

    r11851 r12876  
    3131  </PropertyGroup>
    3232  <ItemGroup>
     33    <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     34      <SpecificVersion>False</SpecificVersion>
     35      <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath>
     36    </Reference>
    3337    <Reference Include="System" />
    3438    <Reference Include="System.Core" />
     
    3640  <ItemGroup>
    3741    <Compile Include="ActionInfos\BernoulliPolicyActionInfo.cs" />
     42    <Compile Include="ActionInfos\ExtremeHunterActionInfo.cs" />
    3843    <Compile Include="ActionInfos\DefaultPolicyActionInfo.cs" />
    3944    <Compile Include="ActionInfos\MeanAndVariancePolicyActionInfo.cs" />
     
    4550    <Compile Include="Policies\BoltzmannExplorationPolicy.cs" />
    4651    <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs" />
     52    <Compile Include="Policies\IntervalEstimationPolicy.cs" />
     53    <Compile Include="Policies\ExtremeHunterPolicy.cs" />
    4754    <Compile Include="Policies\EpsGreedyPolicy.cs" />
    4855    <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs

    r11806 r12876  
    99namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    1010  public class ActiveLearningPolicy : IBanditPolicy {
     11    public double MaxReward { get; private set; }
     12    public ActiveLearningPolicy(double maxReward = 1.0) {
     13      this.MaxReward = maxReward;
     14    }
    1115    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1216      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
     
    2933          q = aInfo.SumReward / aInfo.Tries;
    3034          var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries));
    31           u = q + 0.5 * b;
    32           l = q - 0.5 * b;
     35          u = q + MaxReward * b;
     36          l = q - MaxReward * b;
    3337        }
    3438        bestActions.Add(aIdx);
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ChernoffIntervalEstimationPolicy.cs

    r11806 r12876  
    3535          var avgReward = aInfo.SumReward / aInfo.Tries;
    3636
    37           // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
     37          // page 5 of "A simple distribution-free approach to the max k-armed bandit problem"
    3838          // var alpha = Math.Log(2 * totalTries * k / delta);
    3939          double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta);
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs

    r11806 r12876  
    1010  // policy for k-armed bandit (see Auer et al. 2002)
    1111  public class UCB1Policy : IBanditPolicy {
     12    public double MaxReward { get; private set; }
     13    public UCB1Policy(double maxReward = 1.0) {
     14      this.MaxReward = maxReward;
     15    }
    1216    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1317      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
     
    2428        } else {
    2529
    26           q = aInfo.SumReward / aInfo.Tries + 0.5 * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
     30          q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
    2731        }
    2832        if (q > bestQ) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs

    r11832 r12876  
    99namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    1010  // policy for k-armed bandit (see Auer et al. 2002)
     11  // specific to Bernoulli distributed rewards
    1112  public class UCB1TunedPolicy : IBanditPolicy {
    1213
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.GrammaticalOptimization/Solvers/SequentialSearch.cs

    r12290 r12876  
    201201        var children = n.children;
    202202        if (children == null || !children.Any()) break;
    203         var values = children.Select(ch => policy.GetValue(ch.phrase));
    204         var maxValue = values.Max();
    205         if (maxValue == 0) maxValue = 1.0;
    206         if (double.IsPositiveInfinity(maxValue)) maxValue = double.MaxValue;
    207 
     203        var triesEnumerable = children.Select(ch => policy.GetTries(ch.phrase));
     204        double maxTries = triesEnumerable.Where(v => !double.IsInfinity(v)).DefaultIfEmpty(1).Max();
     205        maxTries = Math.Max(maxTries, 1.0);
    208206        // write phrases
    209207        foreach (var ch in children) {
    210           SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
     208          SetColorForValue(policy.GetTries(ch.phrase) / maxTries);
    211209          Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
    212210        }
     
    215213        // write values
    216214        foreach (var ch in children) {
    217           SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
    218           Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
     215          SetColorForValue(policy.GetTries(ch.phrase) / maxTries);
     216          if (!double.IsInfinity(policy.GetValue(ch.phrase)))
     217            Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
     218          else
     219            Console.Write(" Inf ");
    219220        }
    220221        Console.WriteLine();
     
    222223        // write tries
    223224        foreach (var ch in children) {
    224           SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
     225          SetColorForValue(policy.GetTries(ch.phrase) / maxTries);
    225226          Console.Write(" {0,4}", policy.GetTries(ch.phrase));
    226227        }
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Common/HeuristicLab.Common.csproj

    r11902 r12876  
    3838    <Compile Include="ExpressionExtender.cs" />
    3939    <Compile Include="Extensions.cs" />
     40    <Compile Include="Heap.cs" />
    4041    <Compile Include="MostRecentlyUsedCache.cs" />
    4142    <Compile Include="OnlineMeanAndVarianceEstimator.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/GaussianBandit.cs

    r11849 r12876  
    1212    public int OptimalExpectedRewardArm { get; private set; }
    1313    public int OptimalMaximalRewardArm { get; private set; }
    14 
     14    public double MaxReward { get; private set; }
     15    public double MinReward { get; private set; }
    1516    private readonly Random random;
    1617    private readonly double[] exp;
    1718    private readonly double[] stdDev;
    18     public GaussianBandit(Random random, int nArms) {
     19    public GaussianBandit(Random random, int nArms, double minReward = double.NegativeInfinity, double maxReward = double.PositiveInfinity) {
     20      this.MaxReward = maxReward;
     21      this.MinReward = minReward;
    1922      this.random = random;
    2023      this.NumArms = nArms;
     
    3134          OptimalExpectedRewardArm = i;
    3235        }
    33         var q = alglib.invnormaldistribution(0.99) * stdDev[i] + exp[i];
     36        var q = alglib.invnormaldistribution(0.999) * stdDev[i] + exp[i];
    3437        if (q > bestQ) {
    3538          bestQ = q;
     
    3942    }
    4043
    41     // pulling an arm results in a truncated normally distributed reward
    42     // with mean expReward[i] and std.dev 0.1
     44    // pulling an arm results in a normally distributed reward
     45    // with mean expReward[i] and std.dev
    4346    public double Pull(int arm) {
    44       var z = Rand.RandNormal(random);
    45       var x = z * stdDev[arm] + exp[arm];
     47      double x;
     48      do {
     49        var z = Rand.RandNormal(random);
     50        x = z * stdDev[arm] + exp[arm];
     51      } while (x <= MinReward || x > MaxReward);
    4652      return x;
    4753    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/HeuristicLab.Problems.Bandits.csproj

    r11981 r12876  
    4141    <Compile Include="BanditHelper.cs" />
    4242    <Compile Include="BernoulliBandit.cs" />
     43    <Compile Include="ParetoBandit.cs" />
    4344    <Compile Include="GaussianBandit.cs" />
    4445    <Compile Include="GaussianMixtureBandit.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/IBandit.cs

    r11849 r12876  
    1212    int OptimalMaximalRewardArm { get; } // arm which is optimal for optimization of maximal reward
    1313
    14     double Pull(int arm); // pulling an arm returns a regret
     14    double Pull(int arm); // pulling an arm returns a reward
    1515  }
    1616}
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.csproj

    r12391 r12876  
    4747    </Reference>
    4848    <Reference Include="HeuristicLab.Random-3.3">
    49       <HintPath>..\..\..\..\..\Program Files\HeuristicLab 3.3\HeuristicLab.Random-3.3.dll</HintPath>
     49      <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Random-3.3.dll</HintPath>
    5050    </Reference>
    5151    <Reference Include="System" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.GrammaticalOptimization/Problems/SantaFeAntProblem.cs

    r12290 r12876  
    1313    private const string grammarString = @"
    1414G(A):
    15 A -> l | r | m | ?(A)(A) | lA | rA | mA
     15A -> l | r | m | ?(A)(A) | lA | rA | mA |?(A)(A)A
    1616";
    1717    // for tree-based GP in HL we need a different grammar for the same language
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Main/Program.cs

    r12298 r12876  
    5757        // var alg = new SequentialSearch(problem, 25, random, 0,
    5858        //   new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy()));
    59         var policy = new GenericPolicy(problem);
     59        //var policy = new GenericPolicy(problem);
     60        //var policy = new GenericGrammarPolicy(problem, new ExtremeHunterPolicy());
     61        //var policy = new GenericGrammarPolicy(problem, new UCB1Policy());
     62        //var policy = new GenericGrammarPolicy(problem, new ActiveLearningPolicy(0.1));
     63        var policy = new GenericGrammarPolicy(problem, new ExtremeHunterPolicy(1.0E-2, 1E-2, 1));
    6064        var alg = new SequentialSearch(problem, 23, random, 0,
    6165          policy);
     
    7882            Console.SetCursorPosition(0, 0);
    7983            Console.WriteLine(iterations);
    80             WriteAlleleStatistics();
     84            //WriteAlleleStatistics();
    8185            Console.WriteLine(globalStatistics.BestSentenceQuality);
    8286            Console.WriteLine(globalStatistics.BestSentence);
    8387            Console.WriteLine(globalStatistics);
    84             //alg.PrintStats();
    85             policy.PrintStats();
     88            alg.PrintStats();
     89            //policy.PrintStats();
    8690            //ResetAlleleStatistics();
    8791          }
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs

    r11745 r12876  
    1717      var nArms = 20;
    1818
    19       // ThresholdAscent only works for rewards in [0..1] so far
    20 
    21       Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 1)));
    22       Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 0.1)));
    23       Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
    24       Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy());
    25       Console.WriteLine("UCBNormal"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
    26       Console.WriteLine("Random"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
    27 
    28     }
    29 
     19      // some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1
     20      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy());
     21      TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy());
     22      //TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10));
     23      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
     24      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy());
     25      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10));
     26      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10));
     27      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy());
     28      TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100));
     29      TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1));
     30      TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
     31    }
     32
     33    [TestMethod]
     34    // test case I as described in Extreme Bandits paper
     35    public void ComparePoliciesExtremeBandits1() {
     36      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     37      var randSeed = 31415;
     38      TestPolicyExtremeBandit1(randSeed, new RandomPolicy());
     39      TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy());
     40      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000));
     41      TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1));
     42      // TestPolicyExtremeBandit1(randSeed, new ThresholdAscentPolicy());
     43    }
     44
     45    [TestMethod]
     46    // test case II as described in Extreme Bandits paper
     47    public void ComparePoliciesExtremeBandits2() {
     48      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     49      var randSeed = 31415;
     50      TestPolicyExtremeBandit2(randSeed, new RandomPolicy());
     51      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy());
     52      TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000));
     53      TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1));
     54      // TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy());
     55    }
    3056
    3157    [TestMethod]
     
    189215    }
    190216    private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) {
    191       TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions));
     217      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions, 0, 10));
     218    }
     219
     220    private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) {
     221      TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); // 3 arms
     222    }
     223    private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) {
     224      TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 })); // 3 arms
    192225    }
    193226
    194227
    195228    private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) {
    196       var maxIt = 1E5;
    197       var reps = 10; // independent runs
    198       var regretForIteration = new Dictionary<int, List<double>>();
    199       var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
    200       var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
     229      var maxIt = 1E4;
     230      var reps = 30; // independent runs
     231      //var regretForIteration = new Dictionary<int, List<double>>();
     232      //var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
     233      //var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
     234      //var bestRewardForIteration = new Dictionary<int, List<double>>();
    201235      var globalRandom = new Random(randSeed);
    202236      var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test
     
    210244        var totalPullsOfSuboptimalArmsExp = 0.0;
    211245        var totalPullsOfSuboptimalArmsMax = 0.0;
     246        var bestReward = double.NegativeInfinity;
    212247        var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray();
    213248        for (int i = 0; i <= maxIt; i++) {
     
    220255          if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++;
    221256          totalRegret += b.OptimalExpectedReward - reward;
    222 
    223           if (i == nextLogStep) {
    224             nextLogStep *= 2;
    225             if (!regretForIteration.ContainsKey(i)) {
    226               regretForIteration.Add(i, new List<double>());
    227             }
    228             regretForIteration[i].Add(totalRegret / i);
    229 
    230             if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
    231               numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
    232             }
    233             numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
    234 
    235             if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
    236               numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
    237             }
    238             numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
     257          bestReward = Math.Max(bestReward, reward);
     258
     259          if (i + 1 == nextLogStep) {
     260            nextLogStep += 100;
     261            //if (!regretForIteration.ContainsKey(i)) {
     262            //  regretForIteration.Add(i, new List<double>());
     263            //}
     264            //regretForIteration[i].Add(totalRegret / i);
     265            //
     266            //if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
     267            //  numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
     268            //}
     269            //numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
     270            //
     271            //if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
     272            //  numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
     273            //}
     274            //numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
     275            //
     276            //if (!bestRewardForIteration.ContainsKey(i)) {
     277            //  bestRewardForIteration.Add(i, new List<double>());
     278            //}
     279            //bestRewardForIteration[i].Add(bestReward);
     280            Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}",
     281              policy, i + 1, totalRegret, totalPullsOfSuboptimalArmsExp, totalPullsOfSuboptimalArmsMax, bestReward,
     282              totalRegret / (i + 1), totalPullsOfSuboptimalArmsExp / (i + 1), totalPullsOfSuboptimalArmsMax / (i + 1));
    239283          }
    240284        }
    241285      }
    242286      // print
    243       foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
    244         Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2}",
    245           p,
    246           regretForIteration[p].Average(),
    247           regretForIteration[p].Min(),
    248           regretForIteration[p].Max(),
    249           numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
    250           numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps
    251           );
    252       }
     287      //foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
     288      //  Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}",
     289      //    p,
     290      //    regretForIteration[p].Average(),
     291      //    regretForIteration[p].Min(),
     292      //    regretForIteration[p].Max(),
     293      //    numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
     294      //    numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps,
     295      //    string.Join(" ", bestRewardForIteration[p])
     296      //    );
     297      //}
    253298    }
    254299
Note: See TracChangeset for help on using the changeset viewer.