1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using System.Text;
|
---|
5 | using System.Threading.Tasks;
|
---|
6 | using HeuristicLab.Common;
|
---|
7 | using HeuristicLab.Problems.GrammaticalOptimization;
|
---|
8 |
|
---|
9 | namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
|
---|
10 | public class RandomNoResamplingPolicy : IGrammarPolicy {
|
---|
11 |
|
---|
12 | private readonly Dictionary<ReadonlySequence, bool> done;
|
---|
13 | private readonly Dictionary<Tuple<ReadonlySequence, ReadonlySequence>, ReadonlySequence> nextState;
|
---|
14 |
|
---|
15 |
|
---|
16 | public RandomNoResamplingPolicy() {
|
---|
17 | this.done = new Dictionary<ReadonlySequence, bool>();
|
---|
18 | }
|
---|
19 |
|
---|
20 | public ReadonlySequence SelectAction(Random random, ReadonlySequence state, IEnumerable<ReadonlySequence> actions) {
|
---|
21 | var allDone = true;
|
---|
22 | foreach (var a in actions) {
|
---|
23 | var p = Tuple.Create(state, a);
|
---|
24 | allDone &= nextState.ContainsKey(p) && Done(nextState[p]);
|
---|
25 | if (!allDone) break;
|
---|
26 | }
|
---|
27 | if(allDone)
|
---|
28 | return actions
|
---|
29 | .Where(a => !nextState.ContainsKey(Tuple.Create(state, a)) || Done(nextState[Tuple.Create(state, a)]))
|
---|
30 | .SelectRandom(random);
|
---|
31 | }
|
---|
32 |
|
---|
33 | public void UpdateReward(ReadonlySequence state, ReadonlySequence action, double reward, ReadonlySequence newState) {
|
---|
34 | var key = Tuple.Create(state, action);
|
---|
35 | nextState[key] = newState;
|
---|
36 | if (newState.IsTerminal) done[newState] = true;
|
---|
37 | if
|
---|
38 | }
|
---|
39 |
|
---|
40 | public bool Done(ReadonlySequence state) {
|
---|
41 | return done.ContainsKey(state);
|
---|
42 | }
|
---|
43 | }
|
---|
44 | }
|
---|