Free cookie consent management tool by TermsFeed Policy Generator

source: branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Policies/EpsGreedy.cs @ 15425

Last change on this file since 15425 was 15425, checked in by gkronber, 7 years ago

#2796 made several changes for debugging

File size: 3.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics.Contracts;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8using HeuristicLab.Core;
9using HeuristicLab.Data;
10using HeuristicLab.Parameters;
11using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
12
13namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression.Policies {
14  [StorableClass]
15  [Item("EpsilonGreedy", "Epsilon greedy policy with parameter eps to balance between exploitation and exploration")]
16  public class EpsilonGreedy : PolicyBase {
17    private class ActionStatistics : IActionStatistics {
18      public double SumQuality { get; set; }
19      public double AverageQuality { get { return SumQuality / Tries; } }
20      public double BestQuality { get; internal set; }
21      public int Tries { get; set; }
22      public bool Done { get; set; }
23
24      public void Add(IActionStatistics other) {
25        var o = other as ActionStatistics;
26        if (o == null) throw new ArgumentException();
27        this.Tries += o.Tries;
28        this.SumQuality += o.SumQuality;
29        this.BestQuality = Math.Max(this.BestQuality, other.BestQuality);
30      }
31    }
32    private List<int> buf = new List<int>();
33
34    public IFixedValueParameter<DoubleValue> EpsParameter {
35      get { return (IFixedValueParameter<DoubleValue>)Parameters["Eps"]; }
36    }
37
38    public double Eps {
39      get { return EpsParameter.Value.Value; }
40      set { EpsParameter.Value.Value = value; }
41    }
42
43    [StorableConstructor]
44    protected EpsilonGreedy(bool deserializing) : base(deserializing) { }
45    protected EpsilonGreedy(EpsilonGreedy original, Cloner cloner)
46      : base(original, cloner) {
47    }
48    public EpsilonGreedy()
49      : base() {
50      Parameters.Add(new FixedValueParameter<DoubleValue>("Eps", "Rate of random selection 0 (greedy) <= eps <= 1 (random)", new DoubleValue(0.1)));
51    }
52
53    public override IDeepCloneable Clone(Cloner cloner) {
54      return new EpsilonGreedy(this, cloner);
55    }
56
57    public override int Select(IEnumerable<IActionStatistics> actions, IRandom random) {
58      return Select(actions, random, Eps, buf);
59    }
60
61    public override void Update(IActionStatistics action, double q) {
62      var a = action as ActionStatistics;
63      a.SumQuality += q;
64      a.BestQuality = Math.Max(a.BestQuality, q);
65      a.Tries++;
66    }
67
68    public override IActionStatistics CreateActionStatistics() {
69      return new ActionStatistics();
70    }
71
72    private static int Select(IEnumerable<IActionStatistics> actions, IRandom rand, double c, IList<int> buf) {
73      buf.Clear();
74      if (rand.NextDouble() >= c) {
75        // select best
76        var bestQ = double.NegativeInfinity;
77        int aIdx = -1;
78        foreach (var a in actions) {
79          ++aIdx;
80          if (a.Done) continue;
81          var actionQ = a.Tries > 0 ? a.AverageQuality : double.PositiveInfinity; // always try unvisited actions first
82          if (actionQ > bestQ) {
83            buf.Clear();
84            buf.Add(aIdx);
85            bestQ = actionQ;
86          } else if (actionQ >= bestQ) {
87            buf.Add(aIdx);
88          }
89        }
90        return buf[rand.Next(buf.Count)];
91      } else {
92        // random selection
93        int aIdx = -1;
94        foreach (var a in actions) {
95          ++aIdx;
96          if (a.Done) continue;
97          buf.Add(aIdx);
98        }
99        return buf[rand.Next(buf.Count)];
100      }
101    }
102  }
103}
Note: See TracBrowser for help on using the repository browser.