1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using HeuristicLab.Common;
|
---|
5 | using HeuristicLab.Core;
|
---|
6 | using HeuristicLab.Data;
|
---|
7 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
8 | using HeuristicLab.Parameters;
|
---|
9 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
10 | using HeuristicLab.PluginInfrastructure;
|
---|
11 | using HeuristicLab.Random;
|
---|
12 |
|
---|
13 | namespace HeuristicLab.Algorithms.IteratedSymbolicExpressionConstruction {
|
---|
14 | [StorableClass]
|
---|
15 | [Item("UcbSymbolicExpressionConstructionPolicy", "")]
|
---|
16 | public class UcbSymbolicExpressionConstructionPolicy : SymbolicExpressionConstructionPolicyBase {
|
---|
17 | public double R {
|
---|
18 | get { return ((IFixedValueParameter<DoubleValue>)Parameters["R"]).Value.Value; }
|
---|
19 | set { ((IFixedValueParameter<DoubleValue>)Parameters["R"]).Value.Value = value; }
|
---|
20 | }
|
---|
21 |
|
---|
22 | public ITabularStateValueFunction StateValueFunction {
|
---|
23 | get {
|
---|
24 | return ((IValueParameter<ITabularStateValueFunction>)Parameters["Quality function"]).Value;
|
---|
25 | }
|
---|
26 | set { ((IValueParameter<ITabularStateValueFunction>)Parameters["Quality function"]).Value = value; }
|
---|
27 | }
|
---|
28 |
|
---|
29 |
|
---|
30 | protected UcbSymbolicExpressionConstructionPolicy(UcbSymbolicExpressionConstructionPolicy original, Cloner cloner)
|
---|
31 | : base(original, cloner) {
|
---|
32 | }
|
---|
33 |
|
---|
34 | [StorableConstructor]
|
---|
35 | protected UcbSymbolicExpressionConstructionPolicy(bool deserializing) : base(deserializing) { }
|
---|
36 |
|
---|
37 |
|
---|
38 | public UcbSymbolicExpressionConstructionPolicy()
|
---|
39 | : base() {
|
---|
40 | Parameters.Add(new FixedValueParameter<DoubleValue>("R", "The weighting factor for the confidence bound (should be scaled based on the range or the fitness values)", new DoubleValue(1.0)));
|
---|
41 | Parameters.Add(new ValueParameter<ITabularStateValueFunction>("Quality function", "The quality function to use", new TabularAvgStateValueFunction()));
|
---|
42 | }
|
---|
43 |
|
---|
44 | protected sealed override int Select(IReadOnlyList<object> followStates, IRandom random) {
|
---|
45 | var bestFollowStates = new List<int>();
|
---|
46 | var bestQuality = double.NegativeInfinity;
|
---|
47 | int totalTries = followStates.Sum(s => StateValueFunction.Tries(s));
|
---|
48 | for (int idx = 0; idx < followStates.Count; idx++) {
|
---|
49 | double quality;
|
---|
50 | var s = followStates[idx];
|
---|
51 | if (StateValueFunction.Tries(s) == 0) {
|
---|
52 | quality = double.PositiveInfinity;
|
---|
53 | } else {
|
---|
54 | quality = StateValueFunction.Value(s) + R * Math.Sqrt((2 * Math.Log(totalTries)) / StateValueFunction.Tries(s));
|
---|
55 | }
|
---|
56 | if (quality >= bestQuality) {
|
---|
57 | if (quality > bestQuality) {
|
---|
58 | bestFollowStates.Clear();
|
---|
59 | bestQuality = quality;
|
---|
60 | }
|
---|
61 | bestFollowStates.Add(idx);
|
---|
62 | }
|
---|
63 | }
|
---|
64 | return bestFollowStates.SampleRandom(random);
|
---|
65 | }
|
---|
66 |
|
---|
67 | public sealed override void Update(IEnumerable<object> stateSequence, double quality) {
|
---|
68 | foreach (var state in stateSequence) {
|
---|
69 | StateValueFunction.Update(state, quality);
|
---|
70 | }
|
---|
71 | }
|
---|
72 |
|
---|
73 | protected override object CreateState(ISymbolicExpressionTreeNode root, List<ISymbol> actionSequence, ISymbolicExpressionTreeNode parent, int childIdx) {
|
---|
74 | return StateValueFunction.StateFunction.CreateState(root, actionSequence, parent, childIdx);
|
---|
75 | }
|
---|
76 |
|
---|
77 | public override IDeepCloneable Clone(HeuristicLab.Common.Cloner cloner) {
|
---|
78 | return new UcbSymbolicExpressionConstructionPolicy(this, cloner);
|
---|
79 | }
|
---|
80 | }
|
---|
81 | }
|
---|