1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using HeuristicLab.Common;
|
---|
5 | using HeuristicLab.Core;
|
---|
6 | using HeuristicLab.Data;
|
---|
7 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
8 | using HeuristicLab.Parameters;
|
---|
9 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
10 | using HeuristicLab.PluginInfrastructure;
|
---|
11 | using HeuristicLab.Random;
|
---|
12 |
|
---|
13 | namespace HeuristicLab.Algorithms.IteratedSymbolicExpressionConstruction {
|
---|
14 | [StorableClass]
|
---|
15 | [Item("UcbSymbolicExpressionConstructionPolicy", "")]
|
---|
16 | public class UcbSymbolicExpressionConstructionPolicy : SymbolicExpressionConstructionPolicyBase {
|
---|
17 | public double R {
|
---|
18 | get { return ((IFixedValueParameter<DoubleValue>)Parameters["R"]).Value.Value; }
|
---|
19 | set { ((IFixedValueParameter<DoubleValue>)Parameters["R"]).Value.Value = value; }
|
---|
20 | }
|
---|
21 |
|
---|
22 | public ITabularQualityFunction QualityFunction {
|
---|
23 | get {
|
---|
24 | return ((IValueParameter<ITabularQualityFunction>)Parameters["Quality function"]).Value;
|
---|
25 | }
|
---|
26 | set { ((IValueParameter<ITabularQualityFunction>)Parameters["Quality function"]).Value = value; }
|
---|
27 | }
|
---|
28 |
|
---|
29 |
|
---|
30 | protected UcbSymbolicExpressionConstructionPolicy(UcbSymbolicExpressionConstructionPolicy original, Cloner cloner)
|
---|
31 | : base(original, cloner) {
|
---|
32 | }
|
---|
33 |
|
---|
34 | [StorableConstructor]
|
---|
35 | protected UcbSymbolicExpressionConstructionPolicy(bool deserializing) : base(deserializing) { }
|
---|
36 |
|
---|
37 |
|
---|
38 | public UcbSymbolicExpressionConstructionPolicy()
|
---|
39 | : base() {
|
---|
40 | Parameters.Add(new FixedValueParameter<DoubleValue>("R", "The weighting factor for the confidence bound (should be scaled based on the range or the fitness values)", new DoubleValue(1.0)));
|
---|
41 | Parameters.Add(new ValueParameter<ITabularQualityFunction>("Quality function", "The quality function to use", new TabularAvgQualityFunction()));
|
---|
42 | }
|
---|
43 |
|
---|
44 | protected sealed override int Select(object state, IEnumerable<int> actions, IRandom random) {
|
---|
45 |
|
---|
46 | // find best action
|
---|
47 | var bestActions = new List<int>();
|
---|
48 | var bestQuality = double.NegativeInfinity;
|
---|
49 | int totalTries = actions.Sum(a => QualityFunction.Tries(state, a));
|
---|
50 | foreach (var a in actions) {
|
---|
51 | double quality;
|
---|
52 | if (QualityFunction.Tries(state, a) == 0) {
|
---|
53 | quality = double.PositiveInfinity;
|
---|
54 | } else {
|
---|
55 | quality = QualityFunction.Q(state, a) + R * Math.Sqrt((2 * Math.Log(totalTries)) / QualityFunction.Tries(state, a));
|
---|
56 | }
|
---|
57 | if (quality >= bestQuality) {
|
---|
58 | if (quality > bestQuality) {
|
---|
59 | bestActions.Clear();
|
---|
60 | bestQuality = quality;
|
---|
61 | }
|
---|
62 | bestActions.Add(a);
|
---|
63 | }
|
---|
64 | }
|
---|
65 | return bestActions.SampleRandom(random, 1).First();
|
---|
66 | }
|
---|
67 |
|
---|
68 | public sealed override void Update(IEnumerable<Tuple<object, int>> stateActionSequence, double quality) {
|
---|
69 | foreach (var t in stateActionSequence) {
|
---|
70 | var state = t.Item1;
|
---|
71 | var action = t.Item2;
|
---|
72 | QualityFunction.Update(state, action, quality);
|
---|
73 | }
|
---|
74 | }
|
---|
75 |
|
---|
76 | protected override object CreateState(ISymbolicExpressionTreeNode root, List<int> actions, ISymbolicExpressionTreeNode parent, int childIdx) {
|
---|
77 | return QualityFunction.StateFunction.CreateState(root, actions, parent, childIdx);
|
---|
78 | }
|
---|
79 |
|
---|
80 | public override IDeepCloneable Clone(HeuristicLab.Common.Cloner cloner) {
|
---|
81 | return new UcbSymbolicExpressionConstructionPolicy(this, cloner);
|
---|
82 | }
|
---|
83 | }
|
---|
84 | }
|
---|