Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies/GenericPolicy.cs @ 13847

Visit:

Last change on this file since 13847 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 8.5 KB

Rev	Line
[12291]	1	using System;
	2	using System.Collections.Generic;
	3	using System.Diagnostics;
	4	using System.Linq;
	5	using System.Text;
	6	using System.Threading.Tasks;
	7	using HeuristicLab.Common;
	8	using HeuristicLab.Problems.GrammaticalOptimization;
	9
	10	namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
	11	// resampling is not prevented
	12	public sealed class GenericPolicy : IGrammarPolicy {
[12294]	13	private Dictionary<string, double> Q; // stores the necessary information for bandit policies for each state
	14	private Dictionary<string, int> T; // tries;
	15	private Dictionary<string, List<string>> followStates;
[12291]	16	private readonly IProblem problem;
	17	private readonly HashSet<string> done; // contains all visited chains
	18
[12294]	19	public GenericPolicy(IProblem problem) {
[12291]	20	this.problem = problem;
[12294]	21	this.Q = new Dictionary<string, double>();
	22	this.T = new Dictionary<string, int>();
	23	this.followStates = new Dictionary<string, List<string>>();
[12291]	24	this.done = new HashSet<string>();
	25	}
	26
[12294]	27	private double[] activeAfterStates; // don't allocate each time
[12291]	28	private int[] actionIndexMap; // don't allocate each time
	29
[12893]	30	public bool TrySelect(System.Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
[12291]	31	// fail if all states are done (corresponding state infos are disabled)
	32	if (afterStates.All(s => Done(s))) {
	33	// fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
	34	MarkAsDone(curState);
	35
	36	selectedStateIdx = -1;
	37	return false;
	38	}
	39
	40	if (activeAfterStates == null \|\| activeAfterStates.Length < afterStates.Count()) {
[12294]	41	activeAfterStates = new double[afterStates.Count()];
[12291]	42	actionIndexMap = new int[afterStates.Count()];
	43	}
[12294]	44	if (!followStates.ContainsKey(curState)) {
	45	followStates[curState] = new List<string>(afterStates);
	46	}
[12291]	47	var idx = 0; int originalIdx = 0;
	48	foreach (var afterState in afterStates) {
	49	if (!Done(afterState)) {
[12298]	50	activeAfterStates[idx] = CalculateValue(afterState);
[12291]	51	actionIndexMap[idx] = originalIdx;
	52	idx++;
	53	}
	54	originalIdx++;
	55	}
	56
[12298]	57
[12294]	58	//var eps = Math.Max(500.0 / (GetTries(curState) + 1), 0.01);
	59	//var eps = 10.0 / Math.Sqrt(GetTries(curState) + 1);
[12298]	60	var eps = 0.01;
[12294]	61	selectedStateIdx = actionIndexMap[SelectEpsGreedy(random, activeAfterStates.Take(idx), eps)];
[12291]	62
[12298]	63	UpdateValue(curState, afterStates);
	64
[12291]	65	return true;
	66	}
	67
[12298]	68	private double CalculateValue(string chain) {
	69	var features = problem.GetFeatures(chain);
	70	var sum = 0.0;
	71	foreach (var f in features) {
	72	// if (GetTries(f.Id) == 0)
	73	// sum = 0.0;
	74	// else
	75	sum += GetValue(f.Id) * f.Value;
	76	}
	77	return sum;
	78	}
	79
	80	private void UpdateValue(string curChain, IEnumerable<string> alternatives) {
	81	const double gamma = 1;
	82	const double alpha = 0.01;
	83	var maxNextQ = alternatives
	84	.Select(CalculateValue).Max();
	85
	86	var delta = gamma * maxNextQ - CalculateValue(curChain);
	87
	88	foreach (var f in problem.GetFeatures(curChain)) {
	89
	90	Q[f.Id] = GetValue(f.Id) + alpha * delta * f.Value;
	91	}
	92	}
	93
	94	private void UpdateLastValue(string terminalChain, double reward) {
	95	const double alpha = 0.01;
	96	var delta = reward - CalculateValue(terminalChain);
	97	foreach (var f in problem.GetFeatures(terminalChain)) {
	98	Q[f.Id] = GetValue(f.Id) + alpha * delta * f.Value;
	99	}
	100	}
	101
	102
[12893]	103	private int SelectBoltzmann(System.Random random, IEnumerable<double> qs, double beta = 10) {
[12294]	104	// select best
[12291]	105
[12294]	106	// try any of the untries actions randomly
	107	// for RoyalSequence it is much better to select the actions in the order of occurrence (all terminal alternatives first)
	108	//if (myActionInfos.Any(aInfo => !aInfo.Disabled && aInfo.Tries == 0)) {
	109	// return myActionInfos
	110	// .Select((aInfo, idx) => new { aInfo, idx })
	111	// .Where(p => !p.aInfo.Disabled)
	112	// .Where(p => p.aInfo.Tries == 0)
	113	// .SelectRandom(random).idx;
	114	//}
[12291]	115
[12294]	116	var w = from q in qs
	117	select Math.Exp(beta * q);
	118
	119	var bestAction = Enumerable.Range(0, qs.Count()).SampleProportional(random, w);
	120	Debug.Assert(bestAction >= 0);
	121	return bestAction;
[12291]	122	}
	123
[12893]	124	private int SelectEpsGreedy(System.Random random, IEnumerable<double> qs, double eps = 0.2) {
[12294]	125	if (random.NextDouble() >= eps) { // eps == 0 should be equivalent to pure exploitation, eps == 1 is pure exploration
	126	// select best
	127	var bestActions = new List<int>();
	128	double bestQ = double.NegativeInfinity;
[12291]	129
[12294]	130	int aIdx = -1;
	131	foreach (var q in qs) {
	132	aIdx++;
	133
	134	if (q > bestQ) {
	135	bestActions.Clear();
	136	bestActions.Add(aIdx);
	137	bestQ = q;
[12893]	138	} else if (HeuristicLab.Common.Extensions.IsAlmost(q,bestQ)) {
[12294]	139	bestActions.Add(aIdx);
	140	}
[12291]	141	}
[12294]	142	Debug.Assert(bestActions.Any());
	143	return bestActions.SelectRandom(random);
	144	} else {
	145	// select random
	146	return SelectRandom(random, qs);
[12291]	147	}
	148	}
	149
[12893]	150	private int SelectRandom(System.Random random, IEnumerable<double> qs) {
[12294]	151	return qs
	152	.Select((aInfo, idx) => Tuple.Create(aInfo, idx))
	153	.SelectRandom(random).Item2;
	154	}
	155
	156
	157	public void UpdateReward(IEnumerable<string> chainTrajectory, double reward) {
[12298]	158	// // only updates the last chain because we already update values after each step
	159	// var reverseChains = chainTrajectory.Reverse();
	160	// var terminalChain = reverseChains.First();
	161	//
	162	// UpdateValue(terminalChain, reward);
	163	//
	164	// foreach (var chain in reverseChains.Skip(1)) {
	165	//
	166	// var maxNextQ = followStates[chain]
	167	// //.Where(s=>!Done(s))
	168	// .Select(GetValue).Max();
	169	//
	170	// UpdateValue(chain, maxNextQ);
	171	// }
	172	var terminalChain = chainTrajectory.Last();
	173	UpdateLastValue(terminalChain, reward);
[12294]	174	if (problem.Grammar.IsTerminal(terminalChain)) MarkAsDone(terminalChain);
	175	}
	176
[12298]	177
[12291]	178	public void Reset() {
[12294]	179	Q.Clear();
[12298]	180	T.Clear();
[12291]	181	done.Clear();
[12294]	182	followStates.Clear();
[12291]	183	}
	184
	185
	186	private bool Done(string chain) {
	187	return done.Contains(chain);
	188	}
	189
	190	private void MarkAsDone(string chain) {
	191	done.Add(chain);
	192	}
	193
	194
[12298]	195	public int GetTries(string fId) {
	196	if (T.ContainsKey(fId)) return T[fId];
[12291]	197	else return 0;
	198	}
	199
[12298]	200	public double GetValue(string fId) {
	201	// var s = CalcState(chain);
	202	if (Q.ContainsKey(fId)) return Q[fId];
[12291]	203	else return 0.0; // TODO: check alternatives
	204	}
	205
[12298]	206	// private string CalcState(string chain) {
	207	// var f = problem.GetFeatures(chain);
	208	// // this policy only works for problems that return exactly one feature (the 'state')
	209	// if (f.Skip(1).Any()) throw new ArgumentException();
	210	// return f.First().Id;
	211	// }
[12294]	212
	213	public void PrintStats() {
	214	Console.WriteLine(Q.Values.Max());
[12298]	215	// var topTries = Q.Keys.OrderByDescending(key => T[key]).Take(50);
	216	// var topQs = Q.Keys/.Where(key => key.Contains("E"))/.OrderByDescending(key => Q[key]).Take(50);
	217	// foreach (var t in topTries.Zip(topQs, Tuple.Create)) {
	218	// var id1 = t.Item1;
	219	// var id2 = t.Item2;
	220	// Console.WriteLine("{0,30} {1,6} {2:N4} {3,30} {4,6} {5:N4}", id1, T[id1], Q[id1], id2, T[id2], Q[id2]);
	221	// }
	222
	223	foreach (var option in new String[]
	224	{
	225	"ab", "cd", "ab+cd", "ef", "ab+cd+ef",
	226	"ab+ab", "cd+cd",
	227	"aa", "ab","ac","ad","ae","af","ag","ah","ai","aj",
	228	"ab","cd","ef","ac","af","ai","aig","cf","cf*j",
	229	"b+c","a+c","b+d","a+d",
	230	"ab+cd+ef", "ab+cd+ef+a", "ab+cd+ef+b", "ab+cd+ef+c", "ab+cd+ef+d","ab+cd+ef+e", "ab+cd+ef+f", "ab+cd+ef+g", "ab+cd+ef+h", "ab+cd+ef+i", "ab+cd+e*f+j",
	231	"ab+cd+ef+agi+cj*f"
	232	}) {
	233	Console.WriteLine("{0,-10} {1:N5}", option, CalculateValue(option));
[12294]	234	}
	235
[12298]	236	// var topQs = Q.Keys/.Where(key => key.Contains("E"))/.OrderByDescending(key => Math.Abs(Q[key])).Take(10);
	237	// foreach (var t in topQs) {
	238	// Console.WriteLine("{0,30} {1:N4}", t, Q[t]);
	239	// }
[12294]	240	}
[12291]	241	}
	242	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences