Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs @ 11770

Visit:

Last change on this file since 11770 was 11770, checked in by gkronber, 9 years ago
#2283: worked on generic sequential search alg with bandit policy as parameter
File size: 3.4 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Linq;
4	using System.Text;
5	using System.Threading.Tasks;
6	using HeuristicLab.Common;
7	using HeuristicLab.Problems.GrammaticalOptimization;
8
9	namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
10	// this represents grammar policies that use one of the available bandit policies for state selection
11	public class GenericGrammarPolicy : IGrammarPolicy {
12	protected Dictionary<string, IBanditPolicyActionInfo> stateInfo; // stores the necessary information for bandit policies for each state
13	private readonly bool useCanonicalState;
14	private readonly IProblem problem;
15	private readonly IBanditPolicy banditPolicy;
16	private readonly HashSet<string> done;
17
18	public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
19	this.useCanonicalState = useCanonicalState;
20	this.problem = problem;
21	this.banditPolicy = banditPolicy;
22	this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
23	this.done = new HashSet<string>();
24	}
25
26	public bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates,
27	out ReadonlySequence selectedState) {
28	// only select states that are not yet done
29	afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a.ToString()))).ToArray();
30	if (!afterStates.Any()) {
31	// fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
32	throw new NotImplementedException();
33	//var curStateCanonical = CanonicalState(curState.ToString());
34	//if (curState.ToString().Length == curStateCanonical.Length)
35	done.Add(CanonicalState(curState.ToString()));
36	selectedState = null;
37	return false;
38	}
39
40
41	var selectedIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
42	selectedState = afterStates.ElementAt(selectedIdx);
43	return true;
44	}
45
46	private IBanditPolicyActionInfo GetStateInfo(ReadonlySequence state) {
47	var s = CanonicalState(state.ToString());
48	IBanditPolicyActionInfo info;
49	if (!stateInfo.TryGetValue(s, out info)) {
50	info = banditPolicy.CreateActionInfo();
51	stateInfo[s] = info;
52	}
53	return info;
54	}
55
56	public virtual void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
57	// the last state could be terminal
58	var lastState = stateTrajectory.Last();
59	if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));
60
61	foreach (var state in stateTrajectory) {
62	GetStateInfo(state).UpdateReward(reward);
63	}
64	}
65
66	public virtual void Reset() {
67	stateInfo.Clear();
68	done.Clear();
69	}
70
71	public int GetTries(ReadonlySequence state) {
72	var s = CanonicalState(state.ToString());
73	if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries;
74	else return 0;
75	}
76
77	public double GetValue(ReadonlySequence state) {
78	var s = CanonicalState(state.ToString());
79	if (stateInfo.ContainsKey(s)) return stateInfo[s].Value;
80	else return 0.0; // TODO: check alternatives
81	}
82
83	protected string CanonicalState(string state) {
84	if (useCanonicalState) return problem.CanonicalRepresentation(state);
85	else return state;
86	}
87	}
88	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences