[12893] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Linq;
|
---|
| 4 | using System.Text;
|
---|
| 5 | using System.Threading.Tasks;
|
---|
| 6 | using HeuristicLab.Problems.GrammaticalOptimization;
|
---|
| 7 |
|
---|
| 8 | namespace HeuristicLab.Algorithms.Bandits {
|
---|
| 9 | public class SentenceBandit : IBandit {
|
---|
| 10 | public int NumArms { get; private set; }
|
---|
| 11 | public int OptimalExpectedRewardArm { get; private set; }
|
---|
| 12 | public int OptimalMaximalRewardArm { get; private set; }
|
---|
| 13 |
|
---|
| 14 | private readonly System.Random random;
|
---|
| 15 | private readonly IProblem problem;
|
---|
| 16 | private readonly IGrammar grammar;
|
---|
| 17 | private readonly int maxLen;
|
---|
| 18 | private readonly ReadonlySequence incompletePhrase;
|
---|
| 19 | private readonly Sequence[] replacements;
|
---|
| 20 |
|
---|
| 21 | public SentenceBandit(System.Random random, IProblem problem, string incompletePhrase, int maxLen = 200) {
|
---|
| 22 | this.random = random;
|
---|
| 23 | this.incompletePhrase = new ReadonlySequence(incompletePhrase);
|
---|
| 24 | this.problem = problem;
|
---|
| 25 | this.grammar = problem.Grammar;
|
---|
| 26 | this.maxLen = maxLen;
|
---|
| 27 | this.replacements = grammar.GetNonTerminalAlternatives(this.incompletePhrase.FirstNonTerminal).ToArray();
|
---|
| 28 | NumArms = replacements.Length;
|
---|
| 29 | }
|
---|
| 30 |
|
---|
| 31 | // pulling an arm results in a bernoulli distributed reward
|
---|
| 32 | // with mean expReward[i]
|
---|
| 33 | public double Pull(int arm) {
|
---|
| 34 | // make the selected replacment ...
|
---|
| 35 | var s = new Sequence(incompletePhrase);
|
---|
| 36 | s.ReplaceAt(s.FirstNonTerminalIndex, 1, replacements[arm]);
|
---|
| 37 |
|
---|
| 38 | // ... and complete randomly to evaluate
|
---|
| 39 | var completeSentence = grammar.CompleteSentenceRandomly(random, s, maxLen);
|
---|
| 40 | return problem.Evaluate(completeSentence.ToString());
|
---|
| 41 | }
|
---|
| 42 | }
|
---|
| 43 | }
|
---|