using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Problems.GrammaticalOptimization; namespace HeuristicLab.Algorithms.Bandits { public class SentenceBandit : IBandit { public int NumArms { get; private set; } public int OptimalExpectedRewardArm { get; private set; } public int OptimalMaximalRewardArm { get; private set; } private readonly System.Random random; private readonly IProblem problem; private readonly IGrammar grammar; private readonly int maxLen; private readonly ReadonlySequence incompletePhrase; private readonly Sequence[] replacements; public SentenceBandit(System.Random random, IProblem problem, string incompletePhrase, int maxLen = 200) { this.random = random; this.incompletePhrase = new ReadonlySequence(incompletePhrase); this.problem = problem; this.grammar = problem.Grammar; this.maxLen = maxLen; this.replacements = grammar.GetNonTerminalAlternatives(this.incompletePhrase.FirstNonTerminal).ToArray(); NumArms = replacements.Length; } // pulling an arm results in a bernoulli distributed reward // with mean expReward[i] public double Pull(int arm) { // make the selected replacment ... var s = new Sequence(incompletePhrase); s.ReplaceAt(s.FirstNonTerminalIndex, 1, replacements[arm]); // ... and complete randomly to evaluate var completeSentence = grammar.CompleteSentenceRandomly(random, s, maxLen); return problem.Evaluate(completeSentence.ToString()); } } }