Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs @ 11792

Last change on this file since 11792 was 11792, checked in by gkronber, 9 years ago

#2283 work-in-progress commit (does not compile)

File size: 3.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using HeuristicLab.Common;
7using HeuristicLab.Problems.GrammaticalOptimization;
8
9namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
10  // this represents grammar policies that use one of the available bandit policies for state selection
11  public class GenericGrammarPolicy : IGrammarPolicy {
12    protected Dictionary<string, IBanditPolicyActionInfo> stateInfo; // stores the necessary information for bandit policies for each state
13    private readonly bool useCanonicalState;
14    private readonly IProblem problem;
15    private readonly IBanditPolicy banditPolicy;
16    private readonly HashSet<string> done;
17
18    public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
19      this.useCanonicalState = useCanonicalState;
20      this.problem = problem;
21      this.banditPolicy = banditPolicy;
22      this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
23      this.done = new HashSet<string>();
24    }
25
26    public bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates,
27      out ReadonlySequence selectedState) {
28      // only select states that are not yet done
29      afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a))).ToArray();
30      if (!afterStates.Any()) {
31        // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
32
33        done.Add(CanonicalState(curState));
34        selectedState = null;
35        return false;
36      }
37
38
39      var selectedIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
40      selectedState = afterStates.ElementAt(selectedIdx);
41      return true;
42    }
43
44    private IBanditPolicyActionInfo GetStateInfo(ReadonlySequence state) {
45      var s = CanonicalState(state);
46      IBanditPolicyActionInfo info;
47      if (!stateInfo.TryGetValue(s, out info)) {
48        info = banditPolicy.CreateActionInfo();
49        stateInfo[s] = info;
50      }
51      return info;
52    }
53
54    public virtual void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
55      // the last state could be terminal
56      var lastState = stateTrajectory.Last();
57      if (lastState.IsTerminal) done.Add(CanonicalState(lastState));
58
59      foreach (var state in stateTrajectory) {
60        GetStateInfo(state).UpdateReward(reward);
61      }
62    }
63
64    public virtual void Reset() {
65      stateInfo.Clear();
66      done.Clear();
67    }
68
69    public int GetTries(ReadonlySequence state) {
70      var s = CanonicalState(state);
71      if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries;
72      else return 0;
73    }
74
75    public double GetValue(ReadonlySequence state) {
76      var s = CanonicalState(state);
77      if (stateInfo.ContainsKey(s)) return stateInfo[s].Value;
78      else return 0.0; // TODO: check alternatives
79    }
80
81    protected string CanonicalState(ReadonlySequence state) {
82      if (useCanonicalState) {
83        if (state.IsTerminal)
84          return problem.CanonicalRepresentation(state.ToString());
85        else {
86          // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
87          // e.g. if for the ant problem we have the phrase lllS (and we are limited to 4 symbols) and lllr as well as llll are explored
88          // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
89          // solution: we disable the state rS4
90          return problem.CanonicalRepresentation(state.ToString()) + state.Length;
91        }
92      } else
93        return state.ToString();
94    }
95  }
96}
Note: See TracBrowser for help on using the repository browser.