1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using HeuristicLab.Common;
|
---|
5 | using HeuristicLab.Core;
|
---|
6 | using HeuristicLab.Data;
|
---|
7 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
8 | using HeuristicLab.Parameters;
|
---|
9 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
10 | using HeuristicLab.PluginInfrastructure;
|
---|
11 | using HeuristicLab.Random;
|
---|
12 |
|
---|
13 | namespace HeuristicLab.Algorithms.IteratedSymbolicExpressionConstruction {
|
---|
14 | [StorableClass]
|
---|
15 | [Item("BoltzmannExplorationSymbolicExpressionConstructionPolicy", "")]
|
---|
16 | public class BoltzmannExplorationSymbolicExpressionConstructionPolicy : SymbolicExpressionConstructionPolicyBase {
|
---|
17 | public double Beta {
|
---|
18 | get { return ((IFixedValueParameter<DoubleValue>)Parameters["Beta"]).Value.Value; }
|
---|
19 | set { ((IFixedValueParameter<DoubleValue>)Parameters["Beta"]).Value.Value = value; }
|
---|
20 | }
|
---|
21 |
|
---|
22 | public ITabularQualityFunction QualityFunction {
|
---|
23 | get {
|
---|
24 | return ((IValueParameter<ITabularQualityFunction>)Parameters["Quality function"]).Value;
|
---|
25 | }
|
---|
26 | set { ((IValueParameter<ITabularQualityFunction>)Parameters["Quality function"]).Value = value; }
|
---|
27 | }
|
---|
28 |
|
---|
29 |
|
---|
30 | protected BoltzmannExplorationSymbolicExpressionConstructionPolicy(BoltzmannExplorationSymbolicExpressionConstructionPolicy original, Cloner cloner)
|
---|
31 | : base(original, cloner) {
|
---|
32 | }
|
---|
33 |
|
---|
34 | [StorableConstructor]
|
---|
35 | protected BoltzmannExplorationSymbolicExpressionConstructionPolicy(bool deserializing) : base(deserializing) { }
|
---|
36 |
|
---|
37 |
|
---|
38 | public BoltzmannExplorationSymbolicExpressionConstructionPolicy()
|
---|
39 | : base() {
|
---|
40 | Parameters.Add(new FixedValueParameter<DoubleValue>("Beta", "The weighting factor beta", new DoubleValue(1.0)));
|
---|
41 | Parameters.Add(new ValueParameter<ITabularQualityFunction>("Quality function", "The quality function to use", new TabularAvgQualityFunction()));
|
---|
42 | }
|
---|
43 |
|
---|
44 | protected sealed override int Select(object state, IEnumerable<int> actions, IRandom random) {
|
---|
45 |
|
---|
46 | // find best action
|
---|
47 | var bestActions = new List<int>();
|
---|
48 | var bestQuality = double.NegativeInfinity;
|
---|
49 | if (actions.Any(a => QualityFunction.Tries(state, a) == 0)) {
|
---|
50 | return actions.Where(a => QualityFunction.Tries(state, a) == 0).SampleRandom(random, 1).First();
|
---|
51 | }
|
---|
52 |
|
---|
53 | // windowing
|
---|
54 | var max = actions.Select(a => QualityFunction.Q(state, a)).Max();
|
---|
55 | var min = actions.Select(a => QualityFunction.Q(state, a)).Min();
|
---|
56 | double range = max - min;
|
---|
57 | if (range.IsAlmost(0.0)) return actions.SampleRandom(random, 1).First();
|
---|
58 |
|
---|
59 | var w = from a in actions
|
---|
60 | select Math.Exp(Beta * (QualityFunction.Q(state, a) - min) / range);
|
---|
61 |
|
---|
62 | return actions.SampleProportional(random, 1, w).First();
|
---|
63 |
|
---|
64 | }
|
---|
65 |
|
---|
66 | public sealed override void Update(IEnumerable<Tuple<object, int>> stateActionSequence, double quality) {
|
---|
67 | foreach (var t in stateActionSequence) {
|
---|
68 | var state = t.Item1;
|
---|
69 | var action = t.Item2;
|
---|
70 | QualityFunction.Update(state, action, quality);
|
---|
71 | }
|
---|
72 | }
|
---|
73 |
|
---|
74 | protected override object CreateState(ISymbolicExpressionTreeNode root, List<int> actions, ISymbolicExpressionTreeNode parent, int childIdx) {
|
---|
75 | return QualityFunction.StateFunction.CreateState(root, actions, parent, childIdx);
|
---|
76 | }
|
---|
77 |
|
---|
78 | public override IDeepCloneable Clone(HeuristicLab.Common.Cloner cloner) {
|
---|
79 | return new BoltzmannExplorationSymbolicExpressionConstructionPolicy(this, cloner);
|
---|
80 | }
|
---|
81 | }
|
---|
82 | }
|
---|