[12909] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Linq;
|
---|
| 4 | using HeuristicLab.Common;
|
---|
| 5 | using HeuristicLab.Core;
|
---|
| 6 | using HeuristicLab.Data;
|
---|
| 7 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
| 8 | using HeuristicLab.Parameters;
|
---|
| 9 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
| 10 | using HeuristicLab.Random;
|
---|
| 11 |
|
---|
| 12 | namespace HeuristicLab.Algorithms.IteratedSymbolicExpressionConstruction {
|
---|
| 13 | [StorableClass]
|
---|
| 14 | [Item("EpsGreedySymbolicExpressionConstructionPolicy", "")]
|
---|
| 15 | public class EpsGreedySymbolicExpressionConstructionPolicy : SymbolicExpressionConstructionPolicyBase {
|
---|
| 16 |
|
---|
| 17 | public double Eps {
|
---|
| 18 | get { return ((IFixedValueParameter<DoubleValue>)Parameters["Eps"]).Value.Value; }
|
---|
| 19 | set { ((IFixedValueParameter<DoubleValue>)Parameters["Eps"]).Value.Value = value; }
|
---|
| 20 | }
|
---|
| 21 |
|
---|
[12923] | 22 | public IStateValueFunction StateValueFunction {
|
---|
[12909] | 23 | get {
|
---|
[12923] | 24 | return ((IValueParameter<IStateValueFunction>)Parameters["Quality function"]).Value;
|
---|
[12909] | 25 | }
|
---|
[12923] | 26 | set { ((IValueParameter<IStateValueFunction>)Parameters["Quality function"]).Value = value; }
|
---|
[12909] | 27 | }
|
---|
| 28 |
|
---|
| 29 | public EpsGreedySymbolicExpressionConstructionPolicy()
|
---|
| 30 | : base() {
|
---|
| 31 | Parameters.Add(new FixedValueParameter<DoubleValue>("Eps", "The fraction of random pulls", new PercentValue(0.1, true)));
|
---|
[12923] | 32 | Parameters.Add(new ValueParameter<IStateValueFunction>("Quality function", "The quality function to use", new TabularAvgStateValueFunction()));
|
---|
[12909] | 33 | }
|
---|
| 34 |
|
---|
[12923] | 35 | protected override int Select(IReadOnlyList<object> followStates, IRandom random) {
|
---|
| 36 | var idxs = Enumerable.Range(0, followStates.Count);
|
---|
[12909] | 37 | if (random.NextDouble() < Eps) {
|
---|
[12923] | 38 | return idxs.SampleRandom(random);
|
---|
[12909] | 39 | }
|
---|
| 40 |
|
---|
| 41 | // find best action
|
---|
[12923] | 42 | var bestFollowStates = new List<int>();
|
---|
[12909] | 43 | var bestQuality = double.NegativeInfinity;
|
---|
[12923] | 44 | for (int idx = 0; idx < followStates.Count; idx++) {
|
---|
| 45 | double quality = StateValueFunction.Value(followStates[idx]);
|
---|
[12909] | 46 |
|
---|
| 47 | if (quality >= bestQuality) {
|
---|
| 48 | if (quality > bestQuality) {
|
---|
[12923] | 49 | bestFollowStates.Clear();
|
---|
[12909] | 50 | bestQuality = quality;
|
---|
| 51 | }
|
---|
[12923] | 52 | bestFollowStates.Add(idx);
|
---|
[12909] | 53 | }
|
---|
| 54 | }
|
---|
[12923] | 55 | return bestFollowStates.SampleRandom(random);
|
---|
[12909] | 56 | }
|
---|
| 57 |
|
---|
[12923] | 58 | public sealed override void Update(IEnumerable<object> stateSequence, double quality) {
|
---|
| 59 | foreach (var state in stateSequence) {
|
---|
| 60 | StateValueFunction.Update(state, quality);
|
---|
[12909] | 61 | }
|
---|
| 62 | }
|
---|
| 63 |
|
---|
[12955] | 64 | protected override object CreateState(ISymbolicExpressionTreeNode root, List<ISymbol> actionSequence, ISymbolicExpressionTreeNode parent, int childIdx) {
|
---|
| 65 | return StateValueFunction.StateFunction.CreateState(root, actionSequence, parent, childIdx);
|
---|
[12909] | 66 | }
|
---|
| 67 |
|
---|
| 68 | #region IItem
|
---|
| 69 | protected EpsGreedySymbolicExpressionConstructionPolicy(EpsGreedySymbolicExpressionConstructionPolicy original, Cloner cloner)
|
---|
| 70 | : base(original, cloner) {
|
---|
| 71 | }
|
---|
| 72 |
|
---|
| 73 | [StorableConstructor]
|
---|
| 74 | protected EpsGreedySymbolicExpressionConstructionPolicy(bool deserializing) : base(deserializing) { }
|
---|
| 75 |
|
---|
| 76 | public override HeuristicLab.Common.IDeepCloneable Clone(HeuristicLab.Common.Cloner cloner) {
|
---|
| 77 | return new EpsGreedySymbolicExpressionConstructionPolicy(this, cloner);
|
---|
| 78 | }
|
---|
| 79 |
|
---|
| 80 | #endregion
|
---|
| 81 | }
|
---|
| 82 | }
|
---|