Free cookie consent management tool by TermsFeed Policy Generator

source: stable/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Policies/EpsGreedy.cs @ 15062

Last change on this file since 15062 was 15060, checked in by gkronber, 8 years ago

#2581: merged r13645,r13648,r13650,r13651,r13652,r13654,r13657,r13658,r13659,r13661,r13662,r13669,r13708,r14142 from trunk to stable (to be deleted in the next commit)

File size: 3.1 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics.Contracts;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8using HeuristicLab.Core;
9using HeuristicLab.Data;
10using HeuristicLab.Parameters;
11using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
12
13namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression.Policies {
14  [StorableClass]
15  [Item("EpsilonGreedy", "Epsilon greedy policy with parameter eps to balance between exploitation and exploration")]
16  public class EpsilonGreedy : PolicyBase {
17    private class ActionStatistics : IActionStatistics {
18      public double SumQuality { get; set; }
19      public double AverageQuality { get { return SumQuality / Tries; } }
20      public int Tries { get; set; }
21      public bool Done { get; set; }
22    }
23    private List<int> buf = new List<int>();
24
25    public IFixedValueParameter<DoubleValue> EpsParameter {
26      get { return (IFixedValueParameter<DoubleValue>)Parameters["Eps"]; }
27    }
28
29    public double Eps {
30      get { return EpsParameter.Value.Value; }
31      set { EpsParameter.Value.Value = value; }
32    }
33
34    [StorableConstructor]
35    protected EpsilonGreedy(bool deserializing) : base(deserializing) { }
36    protected EpsilonGreedy(EpsilonGreedy original, Cloner cloner)
37      : base(original, cloner) {
38    }
39    public EpsilonGreedy()
40      : base() {
41      Parameters.Add(new FixedValueParameter<DoubleValue>("Eps", "Rate of random selection 0 (greedy) <= eps <= 1 (random)", new DoubleValue(0.1)));
42    }
43
44    public override IDeepCloneable Clone(Cloner cloner) {
45      return new EpsilonGreedy(this, cloner);
46    }
47
48    public override int Select(IEnumerable<IActionStatistics> actions, IRandom random) {
49      return Select(actions, random, Eps, buf);
50    }
51
52    public override void Update(IActionStatistics action, double q) {
53      var a = action as ActionStatistics;
54      a.SumQuality += q;
55      a.Tries++;
56    }
57
58    public override IActionStatistics CreateActionStatistics() {
59      return new ActionStatistics();
60    }
61
62    private static int Select(IEnumerable<IActionStatistics> actions, IRandom rand, double c, IList<int> buf) {
63      buf.Clear();
64      if (rand.NextDouble() >= c) {
65        // select best
66        var bestQ = double.NegativeInfinity;
67        int aIdx = -1;
68        foreach (var a in actions) {
69          ++aIdx;
70          if (a.Done) continue;
71          var actionQ = a.Tries > 0 ? a.AverageQuality : double.PositiveInfinity; // always try unvisited actions first
72          if (actionQ > bestQ) {
73            buf.Clear();
74            buf.Add(aIdx);
75            bestQ = actionQ;
76          } else if (actionQ >= bestQ) {
77            buf.Add(aIdx);
78          }
79        }
80        return buf[rand.Next(buf.Count)];
81      } else {
82        // random selection
83        int aIdx = -1;
84        foreach (var a in actions) {
85          ++aIdx;
86          if (a.Done) continue;
87          buf.Add(aIdx);
88        }
89        return buf[rand.Next(buf.Count)];
90      }
91    }
92  }
93}
Note: See TracBrowser for help on using the repository browser.