[12909] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Linq;
|
---|
| 4 | using HeuristicLab.Common;
|
---|
| 5 | using HeuristicLab.Core;
|
---|
| 6 | using HeuristicLab.Data;
|
---|
| 7 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
| 8 | using HeuristicLab.Parameters;
|
---|
| 9 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
| 10 | using HeuristicLab.PluginInfrastructure;
|
---|
| 11 | using HeuristicLab.Random;
|
---|
| 12 |
|
---|
| 13 | namespace HeuristicLab.Algorithms.IteratedSymbolicExpressionConstruction {
|
---|
| 14 | [StorableClass]
|
---|
| 15 | [Item("UcbSymbolicExpressionConstructionPolicy", "")]
|
---|
| 16 | public class UcbSymbolicExpressionConstructionPolicy : SymbolicExpressionConstructionPolicyBase {
|
---|
| 17 | public double R {
|
---|
| 18 | get { return ((IFixedValueParameter<DoubleValue>)Parameters["R"]).Value.Value; }
|
---|
| 19 | set { ((IFixedValueParameter<DoubleValue>)Parameters["R"]).Value.Value = value; }
|
---|
| 20 | }
|
---|
| 21 |
|
---|
[12923] | 22 | public ITabularStateValueFunction StateValueFunction {
|
---|
[12909] | 23 | get {
|
---|
[12923] | 24 | return ((IValueParameter<ITabularStateValueFunction>)Parameters["Quality function"]).Value;
|
---|
[12909] | 25 | }
|
---|
[12923] | 26 | set { ((IValueParameter<ITabularStateValueFunction>)Parameters["Quality function"]).Value = value; }
|
---|
[12909] | 27 | }
|
---|
| 28 |
|
---|
| 29 |
|
---|
| 30 | protected UcbSymbolicExpressionConstructionPolicy(UcbSymbolicExpressionConstructionPolicy original, Cloner cloner)
|
---|
| 31 | : base(original, cloner) {
|
---|
| 32 | }
|
---|
| 33 |
|
---|
| 34 | [StorableConstructor]
|
---|
| 35 | protected UcbSymbolicExpressionConstructionPolicy(bool deserializing) : base(deserializing) { }
|
---|
| 36 |
|
---|
| 37 |
|
---|
| 38 | public UcbSymbolicExpressionConstructionPolicy()
|
---|
| 39 | : base() {
|
---|
| 40 | Parameters.Add(new FixedValueParameter<DoubleValue>("R", "The weighting factor for the confidence bound (should be scaled based on the range or the fitness values)", new DoubleValue(1.0)));
|
---|
[12923] | 41 | Parameters.Add(new ValueParameter<ITabularStateValueFunction>("Quality function", "The quality function to use", new TabularAvgStateValueFunction()));
|
---|
[12909] | 42 | }
|
---|
| 43 |
|
---|
[12923] | 44 | protected sealed override int Select(IReadOnlyList<object> followStates, IRandom random) {
|
---|
| 45 | var bestFollowStates = new List<int>();
|
---|
[12909] | 46 | var bestQuality = double.NegativeInfinity;
|
---|
[12923] | 47 | int totalTries = followStates.Sum(s => StateValueFunction.Tries(s));
|
---|
| 48 | for (int idx = 0; idx < followStates.Count; idx++) {
|
---|
[12909] | 49 | double quality;
|
---|
[12923] | 50 | var s = followStates[idx];
|
---|
| 51 | if (StateValueFunction.Tries(s) == 0) {
|
---|
[12909] | 52 | quality = double.PositiveInfinity;
|
---|
| 53 | } else {
|
---|
[12923] | 54 | quality = StateValueFunction.Value(s) + R * Math.Sqrt((2 * Math.Log(totalTries)) / StateValueFunction.Tries(s));
|
---|
[12909] | 55 | }
|
---|
| 56 | if (quality >= bestQuality) {
|
---|
| 57 | if (quality > bestQuality) {
|
---|
[12923] | 58 | bestFollowStates.Clear();
|
---|
[12909] | 59 | bestQuality = quality;
|
---|
| 60 | }
|
---|
[12923] | 61 | bestFollowStates.Add(idx);
|
---|
[12909] | 62 | }
|
---|
| 63 | }
|
---|
[12923] | 64 | return bestFollowStates.SampleRandom(random);
|
---|
[12909] | 65 | }
|
---|
| 66 |
|
---|
[12923] | 67 | public sealed override void Update(IEnumerable<object> stateSequence, double quality) {
|
---|
| 68 | foreach (var state in stateSequence) {
|
---|
| 69 | StateValueFunction.Update(state, quality);
|
---|
[12909] | 70 | }
|
---|
| 71 | }
|
---|
| 72 |
|
---|
[12955] | 73 | protected override object CreateState(ISymbolicExpressionTreeNode root, List<ISymbol> actionSequence, ISymbolicExpressionTreeNode parent, int childIdx) {
|
---|
| 74 | return StateValueFunction.StateFunction.CreateState(root, actionSequence, parent, childIdx);
|
---|
[12909] | 75 | }
|
---|
| 76 |
|
---|
| 77 | public override IDeepCloneable Clone(HeuristicLab.Common.Cloner cloner) {
|
---|
| 78 | return new UcbSymbolicExpressionConstructionPolicy(this, cloner);
|
---|
| 79 | }
|
---|
| 80 | }
|
---|
| 81 | }
|
---|