Rev | Line | |
---|
[11708] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
[11727] | 3 | using System.Diagnostics;
|
---|
[11708] | 4 | using System.Linq;
|
---|
| 5 | using System.Text;
|
---|
| 6 | using System.Threading.Tasks;
|
---|
| 7 |
|
---|
| 8 | namespace HeuristicLab.Algorithms.Bandits {
|
---|
[11732] | 9 | public class EpsGreedyPolicy : IPolicy {
|
---|
[11708] | 10 | private readonly double eps;
|
---|
[11727] | 11 | private readonly RandomPolicy randomPolicy;
|
---|
| 12 |
|
---|
[11732] | 13 | public EpsGreedyPolicy(double eps) {
|
---|
[11708] | 14 | this.eps = eps;
|
---|
[11732] | 15 | this.randomPolicy = new RandomPolicy();
|
---|
[11708] | 16 | }
|
---|
[11732] | 17 | public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
|
---|
| 18 | Debug.Assert(actionInfos.Any());
|
---|
[11708] | 19 | if (random.NextDouble() > eps) {
|
---|
| 20 | // select best
|
---|
[11732] | 21 | var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
|
---|
[11708] | 22 | int bestAction = -1;
|
---|
[11732] | 23 | double bestQ = double.NegativeInfinity;
|
---|
| 24 | int aIdx = -1;
|
---|
| 25 | foreach (var aInfo in myActionInfos) {
|
---|
| 26 |
|
---|
| 27 | aIdx++;
|
---|
| 28 | if (aInfo.Disabled) continue;
|
---|
| 29 | if (aInfo.Tries == 0) return aIdx;
|
---|
| 30 |
|
---|
| 31 |
|
---|
| 32 | var avgReward = aInfo.SumReward / aInfo.Tries;
|
---|
| 33 | //var q = avgReward;
|
---|
| 34 | var q = aInfo.MaxReward;
|
---|
| 35 | if (q > bestQ) {
|
---|
[11730] | 36 | bestQ = q;
|
---|
[11732] | 37 | bestAction = aIdx;
|
---|
[11708] | 38 | }
|
---|
| 39 | }
|
---|
[11727] | 40 | Debug.Assert(bestAction >= 0);
|
---|
[11708] | 41 | return bestAction;
|
---|
| 42 | } else {
|
---|
| 43 | // select random
|
---|
[11732] | 44 | return randomPolicy.SelectAction(random, actionInfos);
|
---|
[11708] | 45 | }
|
---|
| 46 | }
|
---|
[11727] | 47 |
|
---|
[11732] | 48 | public IPolicyActionInfo CreateActionInfo() {
|
---|
| 49 | return new DefaultPolicyActionInfo();
|
---|
[11708] | 50 | }
|
---|
[11727] | 51 |
|
---|
| 52 |
|
---|
[11730] | 53 | public override string ToString() {
|
---|
| 54 | return string.Format("EpsGreedyPolicy({0:F2})", eps);
|
---|
| 55 | }
|
---|
[11708] | 56 | }
|
---|
| 57 | }
|
---|
Note: See
TracBrowser
for help on using the repository browser.