1  using System;


2  using System.Collections.Generic;


3  using System.Linq;


4  using System.Text;


5  using System.Threading.Tasks;


6  using HeuristicLab.Common;


7 


8  namespace HeuristicLab.Algorithms.Bandits {


9  public class TruncatedNormalBandit : IBandit {


10  public int NumArms { get; private set; }


11  public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret


12  public int OptimalExpectedRewardArm { get; private set; }


13  // the arm with highest expected reward also has the highest probability of return a reward of 1.0


14  public int OptimalMaximalRewardArm { get { return OptimalExpectedRewardArm; } }


15 


16  private readonly Random random;


17  private readonly double[] expReward;


18  public TruncatedNormalBandit(Random random, int nArms) {


19  this.random = random;


20  this.NumArms = nArms;


21  // expected reward of arms is iid and uniformly distributed


22  expReward = new double[nArms];


23  OptimalExpectedReward = double.NegativeInfinity;


24  for (int i = 0; i < nArms; i++) {


25  expReward[i] = random.NextDouble() * 0.7;


26  if (expReward[i] > OptimalExpectedReward) {


27  OptimalExpectedReward = expReward[i];


28  OptimalExpectedRewardArm = i;


29  }


30  }


31  }


32 


33  // pulling an arm results in a truncated normally distributed reward


34  // with mean expReward[i] and std.dev 0.1


35  public double Pull(int arm) {


36  double x = 0;


37  do {


38  var z = Rand.RandNormal(random);


39  x = z * 0.1 + expReward[arm];


40  }


41  while (x < 0  x > 1);


42  return x;


43  }


44  }


45  }

