Context Navigation

UCB1Policy.cs @ 11711

Visit:

Last change on this file since 11711 was 11711, checked in by gkronber, 9 years ago
#2283: folders for bandits and policies
File size: 1.2 KB

Rev	Line
[11710]	1	using System;
	2	using System.Collections.Generic;
	3	using System.Linq;
	4	using System.Text;
	5	using System.Threading.Tasks;
	6
	7	namespace HeuristicLab.Algorithms.Bandits {
	8	public class UCB1Policy : BanditPolicy {
	9	private readonly int[] tries;
	10	private readonly double[] sumReward;
	11	private int totalTries = 0;
	12	public UCB1Policy(int numActions)
	13	: base(numActions) {
	14	this.tries = new int[NumActions];
	15	this.sumReward = new double[NumActions];
	16	}
	17
	18	public override int SelectAction() {
	19	int bestAction = -1;
	20	double bestQ = double.NegativeInfinity;
	21	for (int i = 0; i < NumActions; i++) {
	22	if (tries[i] == 0) return i;
	23	var q = sumReward[i] / tries[i] + Math.Sqrt((2 * Math.Log(totalTries)) / tries[i]);
	24	if (q > bestQ) {
	25	bestQ = q;
	26	bestAction = i;
	27	}
	28	}
	29	return bestAction;
	30	}
	31	public override void UpdateReward(int action, double reward) {
	32	totalTries++;
	33	tries[action]++;
	34	sumReward[action] += reward;
	35	}
	36	public override void Reset() {
	37	totalTries = 0;
	38	Array.Clear(tries, 0, tries.Length);
	39	Array.Clear(sumReward, 0, sumReward.Length);
	40	}
	41	}
	42	}

Note: See TracBrowser for help on using the repository browser.