Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs @ 11793

Visit:

Last change on this file since 11793 was 11793, checked in by gkronber, 9 years ago
#2283 fixed compile errors and refactoring
File size: 3.8 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Linq;
4	using System.Text;
5	using System.Threading.Tasks;
6	using HeuristicLab.Common;
7	using HeuristicLab.Problems.GrammaticalOptimization;
8
9	namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
10	// this represents grammar policies that use one of the available bandit policies for state selection
11	public class GenericGrammarPolicy : IGrammarPolicy {
12	protected Dictionary<string, IBanditPolicyActionInfo> stateInfo; // stores the necessary information for bandit policies for each state
13	private readonly bool useCanonicalState;
14	private readonly IProblem problem;
15	private readonly IBanditPolicy banditPolicy;
16	//private readonly HashSet<string> done;
17
18	public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
19	this.useCanonicalState = useCanonicalState;
20	this.problem = problem;
21	this.banditPolicy = banditPolicy;
22	this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
23	//this.done = new HashSet<string>();
24	}
25
26	public bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
27	// fail if all states are done (corresponding state infos are disabled)
28	if (afterStates.All(s => GetStateInfo(s).Disabled)) {
29	// fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
30
31	GetStateInfo(curState).Disable(0.0); // should the value be max of afterstate values instead of 0.0?
32	selectedStateIdx = -1;
33	return false;
34	}
35
36	selectedStateIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
37
38	return true;
39	}
40
41	private IBanditPolicyActionInfo GetStateInfo(string state) {
42	var s = CanonicalState(state);
43	IBanditPolicyActionInfo info;
44	if (!stateInfo.TryGetValue(s, out info)) {
45	info = banditPolicy.CreateActionInfo();
46	stateInfo[s] = info;
47	}
48	return info;
49	}
50
51	public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
52	// the last state could be terminal
53	var lastState = stateTrajectory.Last();
54	if (problem.Grammar.IsTerminal(lastState)) {
55	GetStateInfo(lastState).Disable(reward);
56	}
57
58	// update remaining states
59	foreach (var state in stateTrajectory.Reverse().Skip(1)) {
60	GetStateInfo(state).UpdateReward(reward);
61	}
62	}
63
64	public virtual void Reset() {
65	stateInfo.Clear();
66	//done.Clear();
67	}
68
69	public int GetTries(string state) {
70	var s = CanonicalState(state);
71	if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries;
72	else return 0;
73	}
74
75	public double GetValue(string state) {
76	var s = CanonicalState(state);
77	if (stateInfo.ContainsKey(s)) return stateInfo[s].Value;
78	else return 0.0; // TODO: check alternatives
79	}
80
81	protected string CanonicalState(string state) {
82	if (useCanonicalState) {
83	if (problem.Grammar.IsTerminal(state))
84	return problem.CanonicalRepresentation(state);
85	else {
86	// for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
87	// e.g. if for the ant problem we have the phrase lllS (and we are limited to 4 symbols) and lllr as well as llll are explored
88	// then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
89	// solution: we disable the state rS4
90	return problem.CanonicalRepresentation(state) + state.Length;
91	}
92	} else
93	return state;
94	}
95	}
96	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences