1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using System.Text;
|
---|
5 | using System.Threading.Tasks;
|
---|
6 | using HeuristicLab.Problems.GrammaticalOptimization;
|
---|
7 |
|
---|
8 | namespace HeuristicLab.Algorithms.Bandits {
|
---|
9 | public class SentenceBandit : IBandit {
|
---|
10 | public int NumArms { get; private set; }
|
---|
11 | public int OptimalExpectedRewardArm { get; private set; }
|
---|
12 | public int OptimalMaximalRewardArm { get; private set; }
|
---|
13 |
|
---|
14 | private readonly System.Random random;
|
---|
15 | private readonly IProblem problem;
|
---|
16 | private readonly IGrammar grammar;
|
---|
17 | private readonly int maxLen;
|
---|
18 | private readonly ReadonlySequence incompletePhrase;
|
---|
19 | private readonly Sequence[] replacements;
|
---|
20 |
|
---|
21 | public SentenceBandit(System.Random random, IProblem problem, string incompletePhrase, int maxLen = 200) {
|
---|
22 | this.random = random;
|
---|
23 | this.incompletePhrase = new ReadonlySequence(incompletePhrase);
|
---|
24 | this.problem = problem;
|
---|
25 | this.grammar = problem.Grammar;
|
---|
26 | this.maxLen = maxLen;
|
---|
27 | this.replacements = grammar.GetNonTerminalAlternatives(this.incompletePhrase.FirstNonTerminal).ToArray();
|
---|
28 | NumArms = replacements.Length;
|
---|
29 | }
|
---|
30 |
|
---|
31 | // pulling an arm results in a bernoulli distributed reward
|
---|
32 | // with mean expReward[i]
|
---|
33 | public double Pull(int arm) {
|
---|
34 | // make the selected replacment ...
|
---|
35 | var s = new Sequence(incompletePhrase);
|
---|
36 | s.ReplaceAt(s.FirstNonTerminalIndex, 1, replacements[arm]);
|
---|
37 |
|
---|
38 | // ... and complete randomly to evaluate
|
---|
39 | var completeSentence = grammar.CompleteSentenceRandomly(random, s, maxLen);
|
---|
40 | return problem.Evaluate(completeSentence.ToString());
|
---|
41 | }
|
---|
42 | }
|
---|
43 | }
|
---|