Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs @ 11727

Visit:

Last change on this file since 11727 was 11727, checked in by gkronber, 9 years ago
#2283: worked on grammatical optimization problem solvers (simple MCTS done)
File size: 2.0 KB

Rev	Line
[11708]	1	using System;
	2	using System.Collections.Generic;
[11727]	3	using System.Diagnostics;
[11708]	4	using System.Linq;
	5	using System.Text;
	6	using System.Threading.Tasks;
	7
	8	namespace HeuristicLab.Algorithms.Bandits {
	9	public class EpsGreedyPolicy : BanditPolicy {
	10	private readonly Random random;
	11	private readonly double eps;
	12	private readonly int[] tries;
	13	private readonly double[] sumReward;
[11727]	14	private readonly RandomPolicy randomPolicy;
	15
[11708]	16	public EpsGreedyPolicy(Random random, int numActions, double eps)
	17	: base(numActions) {
	18	this.random = random;
	19	this.eps = eps;
[11727]	20	this.randomPolicy = new RandomPolicy(random, numActions);
	21	this.tries = new int[numActions];
	22	this.sumReward = new double[numActions];
[11708]	23	}
	24
	25	public override int SelectAction() {
[11727]	26	Debug.Assert(Actions.Any());
[11708]	27	if (random.NextDouble() > eps) {
	28	// select best
	29	var maxReward = double.NegativeInfinity;
	30	int bestAction = -1;
[11727]	31	foreach (var a in Actions) {
	32	if (tries[a] == 0) return a;
	33	var avgReward = sumReward[a] / tries[a];
[11708]	34	if (maxReward < avgReward) {
	35	maxReward = avgReward;
[11727]	36	bestAction = a;
[11708]	37	}
	38	}
[11727]	39	Debug.Assert(bestAction >= 0);
[11708]	40	return bestAction;
	41	} else {
	42	// select random
[11727]	43	return randomPolicy.SelectAction();
[11708]	44	}
	45	}
	46	public override void UpdateReward(int action, double reward) {
[11727]	47	Debug.Assert(Actions.Contains(action));
	48
	49	randomPolicy.UpdateReward(action, reward); // does nothing
[11708]	50	tries[action]++;
	51	sumReward[action] += reward;
	52	}
[11727]	53
	54	public override void DisableAction(int action) {
	55	base.DisableAction(action);
	56	randomPolicy.DisableAction(action);
	57	sumReward[action] = 0;
	58	tries[action] = -1;
	59	}
	60
[11708]	61	public override void Reset() {
[11727]	62	base.Reset();
	63	randomPolicy.Reset();
[11708]	64	Array.Clear(tries, 0, tries.Length);
	65	Array.Clear(sumReward, 0, sumReward.Length);
	66	}
	67	}
	68	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences