Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs @ 12893

Visit:

Last change on this file since 12893 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 1.8 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	public class ActiveLearningPolicy : IBanditPolicy {
11	public double MaxReward { get; private set; }
12	public ActiveLearningPolicy(double maxReward = 1.0) {
13	this.MaxReward = maxReward;
14	}
15	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
16	var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
17	int totalTries = myActionInfos.Sum(a => a.Tries);
18	const double delta = 0.1;
19	int k = myActionInfos.Count();
20	var bestActions = new List<int>();
21	var us = new List<double>();
22	var ls = new List<double>();
23	int aIdx = -1;
24	foreach (var aInfo in myActionInfos) {
25	aIdx++;
26	double q;
27	double u;
28	double l;
29	if (aInfo.Tries == 0) {
30	u = double.PositiveInfinity;
31	l = double.NegativeInfinity;
32	} else {
33	q = aInfo.MaxReward;
34	var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries));
35	u = q + MaxReward * b;
36	l = q - MaxReward * b;
37	}
38	bestActions.Add(aIdx);
39	us.Add(u);
40	ls.Add(l);
41	}
42	var active = new List<int>();
43	var maxL = ls.Max();
44	for (int i = 0; i < us.Count; i++) {
45	if (us[i] >= maxL) active.Add(bestActions[i]);
46	}
47	Debug.Assert(active.Any());
48	return active.SelectRandom(random);
49	}
50
51	public IBanditPolicyActionInfo CreateActionInfo() {
52	return new DefaultPolicyActionInfo();
53	}
54	public override string ToString() {
55	return "ActiveLearningPolicy";
56	}
57	}
58	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences