1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 | using HeuristicLab.Problems.GrammaticalOptimization;
7 |
8 | namespace HeuristicLab.Algorithms.Bandits {
9 | public class SentenceBandit : IBandit {
10 | public int NumArms { get; private set; }
11 | public int OptimalExpectedRewardArm { get; private set; }
12 | public int OptimalMaximalRewardArm { get; private set; }
13 |
14 | private readonly System.Random random;
15 | private readonly IProblem problem;
16 | private readonly IGrammar grammar;
17 | private readonly int maxLen;
18 | private readonly ReadonlySequence incompletePhrase;
19 | private readonly Sequence[] replacements;
20 |
21 | public SentenceBandit(System.Random random, IProblem problem, string incompletePhrase, int maxLen = 200) {
22 | this.random = random;
23 | this.incompletePhrase = new ReadonlySequence(incompletePhrase);
24 | this.problem = problem;
25 | this.grammar = problem.Grammar;
26 | this.maxLen = maxLen;
27 | this.replacements = grammar.GetNonTerminalAlternatives(this.incompletePhrase.FirstNonTerminal).ToArray();
28 | NumArms = replacements.Length;
29 | }
30 |
31 | // pulling an arm results in a bernoulli distributed reward
32 | // with mean expReward[i]
33 | public double Pull(int arm) {
34 | // make the selected replacment ...
35 | var s = new Sequence(incompletePhrase);
36 | s.ReplaceAt(s.FirstNonTerminalIndex, 1, replacements[arm]);
37 |
38 | // ... and complete randomly to evaluate
39 | var completeSentence = grammar.CompleteSentenceRandomly(random, s, maxLen);
40 | return problem.Evaluate(completeSentence.ToString());
41 | }
42 | }
43 | }