Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Algorithms.IteratedSentenceConstruction/HeuristicLab.Algorithms.IteratedSymbolicExpressionConstruction/3.3/Policies/UcbSymbolicExpressionConstructionPolicy.cs @ 12923

Last change on this file since 12923 was 12923, checked in by gkronber, 9 years ago

#2471

  • refactoring to use state value function V(s) instead of state/action value function Q(s,a)
  • added test case for artificial ant problem
File size: 3.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using HeuristicLab.Common;
5using HeuristicLab.Core;
6using HeuristicLab.Data;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Parameters;
9using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
10using HeuristicLab.PluginInfrastructure;
11using HeuristicLab.Random;
12
13namespace HeuristicLab.Algorithms.IteratedSymbolicExpressionConstruction {
14  [StorableClass]
15  [Item("UcbSymbolicExpressionConstructionPolicy", "")]
16  public class UcbSymbolicExpressionConstructionPolicy : SymbolicExpressionConstructionPolicyBase {
17    public double R {
18      get { return ((IFixedValueParameter<DoubleValue>)Parameters["R"]).Value.Value; }
19      set { ((IFixedValueParameter<DoubleValue>)Parameters["R"]).Value.Value = value; }
20    }
21
22    public ITabularStateValueFunction StateValueFunction {
23      get {
24        return ((IValueParameter<ITabularStateValueFunction>)Parameters["Quality function"]).Value;
25      }
26      set { ((IValueParameter<ITabularStateValueFunction>)Parameters["Quality function"]).Value = value; }
27    }
28
29
30    protected UcbSymbolicExpressionConstructionPolicy(UcbSymbolicExpressionConstructionPolicy original, Cloner cloner)
31      : base(original, cloner) {
32    }
33
34    [StorableConstructor]
35    protected UcbSymbolicExpressionConstructionPolicy(bool deserializing) : base(deserializing) { }
36
37
38    public UcbSymbolicExpressionConstructionPolicy()
39      : base() {
40      Parameters.Add(new FixedValueParameter<DoubleValue>("R", "The weighting factor for the confidence bound (should be scaled based on the range or the fitness values)", new DoubleValue(1.0)));
41      Parameters.Add(new ValueParameter<ITabularStateValueFunction>("Quality function", "The quality function to use", new TabularAvgStateValueFunction()));
42    }
43
44    protected sealed override int Select(IReadOnlyList<object> followStates, IRandom random) {
45      var bestFollowStates = new List<int>();
46      var bestQuality = double.NegativeInfinity;
47      int totalTries = followStates.Sum(s => StateValueFunction.Tries(s));
48      for (int idx = 0; idx < followStates.Count; idx++) {
49        double quality;
50        var s = followStates[idx];
51        if (StateValueFunction.Tries(s) == 0) {
52          quality = double.PositiveInfinity;
53        } else {
54          quality = StateValueFunction.Value(s) + R * Math.Sqrt((2 * Math.Log(totalTries)) / StateValueFunction.Tries(s));
55        }
56        if (quality >= bestQuality) {
57          if (quality > bestQuality) {
58            bestFollowStates.Clear();
59            bestQuality = quality;
60          }
61          bestFollowStates.Add(idx);
62        }
63      }
64      return bestFollowStates.SampleRandom(random);
65    }
66
67    public sealed override void Update(IEnumerable<object> stateSequence, double quality) {
68      foreach (var state in stateSequence) {
69        StateValueFunction.Update(state, quality);
70      }
71    }
72
73    protected override object CreateState(ISymbolicExpressionTreeNode root, List<ISymbol> actions, ISymbolicExpressionTreeNode parent, int childIdx) {
74      return StateValueFunction.StateFunction.CreateState(root, actions, parent, childIdx);
75    }
76
77    public override IDeepCloneable Clone(HeuristicLab.Common.Cloner cloner) {
78      return new UcbSymbolicExpressionConstructionPolicy(this, cloner);
79    }
80  }
81}
Note: See TracBrowser for help on using the repository browser.