using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.PluginInfrastructure; using HeuristicLab.Random; namespace HeuristicLab.Algorithms.IteratedSymbolicExpressionConstruction { [StorableClass] [Item("UcbTunedSymbolicExpressionConstructionPolicy", "Also uses an estimate of the variance")] public class UcbTunedSymbolicExpressionConstructionPolicy : SymbolicExpressionConstructionPolicyBase { public double R { get { return ((IFixedValueParameter)Parameters["R"]).Value.Value; } set { ((IFixedValueParameter)Parameters["R"]).Value.Value = value; } } public ITabularStateValueFunction StateValueFunction { get { return ((IValueParameter)Parameters["Quality function"]).Value; } set { ((IValueParameter)Parameters["Quality function"]).Value = value; } } protected UcbTunedSymbolicExpressionConstructionPolicy(UcbTunedSymbolicExpressionConstructionPolicy original, Cloner cloner) : base(original, cloner) { } [StorableConstructor] protected UcbTunedSymbolicExpressionConstructionPolicy(bool deserializing) : base(deserializing) { } public UcbTunedSymbolicExpressionConstructionPolicy() : base() { Parameters.Add(new FixedValueParameter("R", "The weighting factor for the confidence bound (should be scaled based on the range or the fitness values)", new DoubleValue(1.0))); Parameters.Add(new ValueParameter("Quality function", "The quality function to use", new TabularAvgStateValueFunction())); } protected sealed override int Select(IReadOnlyList followStates, IRandom random) { var bestFollowStates = new List(); var bestQuality = double.NegativeInfinity; int totalTries = followStates.Sum(s => StateValueFunction.Tries(s)); for (int idx = 0; idx < followStates.Count; idx++) { var s = followStates[idx]; double quality; if (StateValueFunction.Tries(s) == 0) { quality = double.PositiveInfinity; } else { double v = StateValueFunction.ValueVariance(s) + Math.Sqrt(2 * Math.Log(totalTries) / StateValueFunction.Tries(s)); quality = StateValueFunction.Value(s) + R * Math.Sqrt(Math.Log(totalTries) / StateValueFunction.Tries(s) * v); } if (quality >= bestQuality) { if (quality > bestQuality) { bestFollowStates.Clear(); bestQuality = quality; } bestFollowStates.Add(idx); } } return bestFollowStates.SampleRandom(random); } public sealed override void Update(IEnumerable stateSequence, double quality) { foreach (var state in stateSequence) { StateValueFunction.Update(state, quality); } } protected override object CreateState(ISymbolicExpressionTreeNode root, List actionSequence, ISymbolicExpressionTreeNode parent, int childIdx) { return StateValueFunction.StateFunction.CreateState(root, actionSequence, parent, childIdx); } public override IDeepCloneable Clone(HeuristicLab.Common.Cloner cloner) { return new UcbTunedSymbolicExpressionConstructionPolicy(this, cloner); } } }