Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Algorithms.IteratedSentenceConstruction/HeuristicLab.Algorithms.IteratedSymbolicExpressionConstruction/3.3/Policies/EpsGreedySymbolicExpressionConstructionPolicy.cs @ 12909

Last change on this file since 12909 was 12909, checked in by gkronber, 9 years ago

#2471: initial import of basic algorithm and framework (state value approximation not yet supported)

File size: 3.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using HeuristicLab.Common;
5using HeuristicLab.Core;
6using HeuristicLab.Data;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Parameters;
9using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
10using HeuristicLab.Random;
11
12namespace HeuristicLab.Algorithms.IteratedSymbolicExpressionConstruction {
13  [StorableClass]
14  [Item("EpsGreedySymbolicExpressionConstructionPolicy", "")]
15  public class EpsGreedySymbolicExpressionConstructionPolicy : SymbolicExpressionConstructionPolicyBase {
16
17    public double Eps {
18      get { return ((IFixedValueParameter<DoubleValue>)Parameters["Eps"]).Value.Value; }
19      set { ((IFixedValueParameter<DoubleValue>)Parameters["Eps"]).Value.Value = value; }
20    }
21
22    public IQualityFunction QualityFunction {
23      get {
24        return ((IValueParameter<IQualityFunction>)Parameters["Quality function"]).Value;
25      }
26      set { ((IValueParameter<IQualityFunction>)Parameters["Quality function"]).Value = value; }
27    }
28
29    public EpsGreedySymbolicExpressionConstructionPolicy()
30      : base() {
31      Parameters.Add(new FixedValueParameter<DoubleValue>("Eps", "The fraction of random pulls", new PercentValue(0.1, true)));
32      Parameters.Add(new ValueParameter<IQualityFunction>("Quality function", "The quality function to use", new TabularAvgQualityFunction()));
33    }
34
35    protected override int Select(object state, IEnumerable<int> actions, IRandom random) {
36      if (random.NextDouble() < Eps) {
37        return actions.SampleRandom(random, 1).First();
38      }
39
40      // find best action
41      var bestActions = new List<int>();
42      var bestQuality = double.NegativeInfinity;
43      foreach (var a in actions) {
44        double quality = QualityFunction.Q(state, a);
45
46        if (quality >= bestQuality) {
47          if (quality > bestQuality) {
48            bestActions.Clear();
49            bestQuality = quality;
50          }
51          bestActions.Add(a);
52        }
53      }
54      return bestActions.SampleRandom(random, 1).First();
55    }
56
57    public override void Update(IEnumerable<Tuple<object, int>> stateActionSequence, double quality) {
58      foreach (var t in stateActionSequence) {
59        var state = t.Item1;
60        var action = t.Item2;
61        QualityFunction.Update(state, action, quality);
62      }
63    }
64
65    protected override object CreateState(ISymbolicExpressionTreeNode root, List<int> actions, ISymbolicExpressionTreeNode parent, int childIdx) {
66      return QualityFunction.StateFunction.CreateState(root, actions, parent, childIdx);
67    }
68
69    #region IItem
70    protected EpsGreedySymbolicExpressionConstructionPolicy(EpsGreedySymbolicExpressionConstructionPolicy original, Cloner cloner)
71      : base(original, cloner) {
72    }
73
74    [StorableConstructor]
75    protected EpsGreedySymbolicExpressionConstructionPolicy(bool deserializing) : base(deserializing) { }
76
77    public override HeuristicLab.Common.IDeepCloneable Clone(HeuristicLab.Common.Cloner cloner) {
78      return new EpsGreedySymbolicExpressionConstructionPolicy(this, cloner);
79    }
80
81    #endregion
82  }
83}
Note: See TracBrowser for help on using the repository browser.