Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs @ 11770

Last change on this file since 11770 was 11770, checked in by gkronber, 9 years ago

#2283: worked on generic sequential search alg with bandit policy as parameter

File size: 3.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using HeuristicLab.Common;
7using HeuristicLab.Problems.GrammaticalOptimization;
8
9namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
10  // this represents grammar policies that use one of the available bandit policies for state selection
11  public class GenericGrammarPolicy : IGrammarPolicy {
12    protected Dictionary<string, IBanditPolicyActionInfo> stateInfo; // stores the necessary information for bandit policies for each state
13    private readonly bool useCanonicalState;
14    private readonly IProblem problem;
15    private readonly IBanditPolicy banditPolicy;
16    private readonly HashSet<string> done;
17
18    public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
19      this.useCanonicalState = useCanonicalState;
20      this.problem = problem;
21      this.banditPolicy = banditPolicy;
22      this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
23      this.done = new HashSet<string>();
24    }
25
26    public bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates,
27      out ReadonlySequence selectedState) {
28      // only select states that are not yet done
29      afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a.ToString()))).ToArray();
30      if (!afterStates.Any()) {
31        // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
32        throw new NotImplementedException();
33        //var curStateCanonical = CanonicalState(curState.ToString());
34        //if (curState.ToString().Length == curStateCanonical.Length)
35          done.Add(CanonicalState(curState.ToString()));
36        selectedState = null;
37        return false;
38      }
39
40
41      var selectedIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
42      selectedState = afterStates.ElementAt(selectedIdx);
43      return true;
44    }
45
46    private IBanditPolicyActionInfo GetStateInfo(ReadonlySequence state) {
47      var s = CanonicalState(state.ToString());
48      IBanditPolicyActionInfo info;
49      if (!stateInfo.TryGetValue(s, out info)) {
50        info = banditPolicy.CreateActionInfo();
51        stateInfo[s] = info;
52      }
53      return info;
54    }
55
56    public virtual void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
57      // the last state could be terminal
58      var lastState = stateTrajectory.Last();
59      if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));
60
61      foreach (var state in stateTrajectory) {
62        GetStateInfo(state).UpdateReward(reward);
63      }
64    }
65
66    public virtual void Reset() {
67      stateInfo.Clear();
68      done.Clear();
69    }
70
71    public int GetTries(ReadonlySequence state) {
72      var s = CanonicalState(state.ToString());
73      if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries;
74      else return 0;
75    }
76
77    public double GetValue(ReadonlySequence state) {
78      var s = CanonicalState(state.ToString());
79      if (stateInfo.ContainsKey(s)) return stateInfo[s].Value;
80      else return 0.0; // TODO: check alternatives
81    }
82
83    protected string CanonicalState(string state) {
84      if (useCanonicalState) return problem.CanonicalRepresentation(state);
85      else return state;
86    }
87  }
88}
Note: See TracBrowser for help on using the repository browser.