Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs @ 11793

Last change on this file since 11793 was 11793, checked in by gkronber, 9 years ago

#2283 fixed compile errors and refactoring

File size: 3.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using HeuristicLab.Common;
7using HeuristicLab.Problems.GrammaticalOptimization;
8
9namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
10  // this represents grammar policies that use one of the available bandit policies for state selection
11  public class GenericGrammarPolicy : IGrammarPolicy {
12    protected Dictionary<string, IBanditPolicyActionInfo> stateInfo; // stores the necessary information for bandit policies for each state
13    private readonly bool useCanonicalState;
14    private readonly IProblem problem;
15    private readonly IBanditPolicy banditPolicy;
16    //private readonly HashSet<string> done;
17
18    public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
19      this.useCanonicalState = useCanonicalState;
20      this.problem = problem;
21      this.banditPolicy = banditPolicy;
22      this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
23      //this.done = new HashSet<string>();
24    }
25
26    public bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
27      // fail if all states are done (corresponding state infos are disabled)
28      if (afterStates.All(s => GetStateInfo(s).Disabled)) {
29        // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
30
31        GetStateInfo(curState).Disable(0.0); // should the value be max of afterstate values instead of 0.0?
32        selectedStateIdx = -1;
33        return false;
34      }
35
36      selectedStateIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
37
38      return true;
39    }
40
41    private IBanditPolicyActionInfo GetStateInfo(string state) {
42      var s = CanonicalState(state);
43      IBanditPolicyActionInfo info;
44      if (!stateInfo.TryGetValue(s, out info)) {
45        info = banditPolicy.CreateActionInfo();
46        stateInfo[s] = info;
47      }
48      return info;
49    }
50
51    public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
52      // the last state could be terminal
53      var lastState = stateTrajectory.Last();
54      if (problem.Grammar.IsTerminal(lastState)) {
55        GetStateInfo(lastState).Disable(reward);
56      }
57
58      // update remaining states
59      foreach (var state in stateTrajectory.Reverse().Skip(1)) {
60        GetStateInfo(state).UpdateReward(reward);
61      }
62    }
63
64    public virtual void Reset() {
65      stateInfo.Clear();
66      //done.Clear();
67    }
68
69    public int GetTries(string state) {
70      var s = CanonicalState(state);
71      if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries;
72      else return 0;
73    }
74
75    public double GetValue(string state) {
76      var s = CanonicalState(state);
77      if (stateInfo.ContainsKey(s)) return stateInfo[s].Value;
78      else return 0.0; // TODO: check alternatives
79    }
80
81    protected string CanonicalState(string state) {
82      if (useCanonicalState) {
83        if (problem.Grammar.IsTerminal(state))
84          return problem.CanonicalRepresentation(state);
85        else {
86          // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
87          // e.g. if for the ant problem we have the phrase lllS (and we are limited to 4 symbols) and lllr as well as llll are explored
88          // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
89          // solution: we disable the state rS4
90          return problem.CanonicalRepresentation(state) + state.Length;
91        }
92      } else
93        return state;
94    }
95  }
96}
Note: See TracBrowser for help on using the repository browser.