using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Algorithms.Bandits { class BanditHelper { public static void SampleArms(Random random, IBandit bandit, int nSamples, out double expRewardEst, out int bestArmForExpReward, out int bestArmForMaxReward) { bestArmForExpReward = 0; bestArmForMaxReward = 0; expRewardEst = 0.0; var bestSumReward = 0.0; var bestUpperPercCount = 0; for (int a = 0; a < bandit.NumArms; a++) { var sumReward = 0.0; var upperPercCount = 0; for (int i = 0; i < nSamples; i++) { var reward = bandit.Pull(a); sumReward += reward; if (reward >= 0.95) upperPercCount++; } if (sumReward > bestSumReward) { bestSumReward = sumReward; bestArmForExpReward = a; expRewardEst = bestSumReward / nSamples; } if (upperPercCount > bestUpperPercCount) { bestUpperPercCount = upperPercCount; bestArmForMaxReward = a; } } } } }