Free cookie consent management tool by TermsFeed Policy Generator

source: branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/ExprHash.cs @ 15414

Last change on this file since 15414 was 15414, checked in by gkronber, 7 years ago

#2796 worked on MCTS

File size: 6.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21using System;
22using System.Collections.Generic;
23using System.Diagnostics.Contracts;
24using HeuristicLab.Random;
25using System.Linq;
26
27namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
28
29  // calculates a hash-code for expressions.
30  // The hash codes of equivalent expressions x * (x + y) = (y + x)*x = (yx * xx) must be the same.
31  // We use a random number for each symbol in the expressions and use symmetric operations to produce
32  // results of sub-expressions.
33  // Surrogate functions for the functions log, exp, 1/x are necessary.
34  // Ideally, the surrogate functions also match identities of the original expressions.
35  // Simply using floating point operations and random floats for symbols would be easiest.
36  // However, when we use floating point operations it is necessary to handle the inaccuracies
37  // in the last bits (e.g. for division, log and exp)
38  //
39  // Collisions are OK as long as they occur seldom.
40  // It is probably be very unlikely to miss the optimal solution because of a collision.
41  //
42  // We only need to identify equivalent structures. The numeric constants are irrelevant.
43  // Therefore, equivalent structures with different numeric constants map to the same hash code.
44
45  public static class ExprHash {
46    const int MaxStackSize = 100;
47    const int MaxVariables = 1000;
48    private static double[] varSymbValues;
49    static ExprHash() {
50      var rand = new MersenneTwister();
51      varSymbValues = new double[MaxVariables];
52      for (int i = 0; i < MaxVariables; i++) {
53        varSymbValues[i] = rand.NextDouble();
54      }
55    }
56
57    public static ulong GetHash(byte[] code, int nParams) {
58      var bits = (ulong)BitConverter.DoubleToInt64Bits(Eval(code, nParams));
59      // clear last five bits (insignificant?)
60      bits = bits & 0xFFFFFFFFFFFFFFE0;
61      return bits;
62    }
63
64    private static double Eval(byte[] code, int nParams) {
65      // The hash code calculation already preserves commutativity, associativity and distributivity of operations.
66      // However, we also need to hash c1*x1 + c2*x1 to the same value as c3*x1.
67      // Similarly for x1*x2 + x1*x2 or log(x1) + log(x1)!
68
69      // Calculate sums lazily. Keep all terms and only when the actual sum is necessary remove duplicate terms and calculate sum
70      // think about speed later (TODO)
71
72      var stack = new ISet<double>[MaxStackSize];
73      var terms = new HashSet<double>(new ApproximateDoubleEqualityComparer()); // the set of arguments for the current operator (+, *)
74      int topOfStack = -1;
75      int pc = 0;
76      int nextParamIdx = -1;
77      OpCodes op;
78      short arg;
79      while (true) {
80        ReadNext(code, ref pc, out op, out arg);
81        switch (op) {
82          case OpCodes.Nop: break;
83          case OpCodes.LoadConst0: {
84              ++topOfStack;
85              stack[topOfStack] = new HashSet<double>( new[] { 0.0 });
86
87              // terms.Add(0.0); // ignore numeric constants in expr-hash
88
89              break;
90            }
91          case OpCodes.LoadConst1: {
92              ++topOfStack;
93              stack[topOfStack] = new HashSet<double>(new[] { 1.0 });
94
95              // args.Add(1.0); ignore numeric constants in expr-hash
96
97              break;
98            }
99          case OpCodes.LoadParamN: {
100              ++topOfStack;
101              stack[topOfStack] =  new HashSet<double>(new[] { 1.0 });
102              break;
103            }
104          case OpCodes.LoadVar: {
105              ++topOfStack;
106              stack[topOfStack] = new HashSet<double>(new[] { varSymbValues[arg] });
107
108              // args.Add(varSymbValues[arg]);
109
110              break;
111            }
112          case OpCodes.Add: {
113              // take arguments from stack and put both terms into the set of terms
114              // for every other operation we need to evaluate the sum of terms first and put it onto the stack (lazy eval of sums)
115
116              stack[topOfStack - 1].UnionWith(stack[topOfStack]);
117              topOfStack--;
118
119              // stack[topOfStack] = t1 + t2; (later)
120              break;
121            }
122          case OpCodes.Mul: {
123              var t1 = stack[topOfStack - 1];
124              var t2 = stack[topOfStack];
125              topOfStack--;
126              stack[topOfStack] = new HashSet<double>(new double[] { t1.Sum() * t2.Sum() });
127              break;
128            }
129          case OpCodes.Log: {
130              var v1 = stack[topOfStack];
131              stack[topOfStack] = new HashSet<double>(new double[] { Math.Log( v1.Sum()) });
132              break;
133            }
134          case OpCodes.Exp: {
135              var v1 = stack[topOfStack];
136              stack[topOfStack] = new HashSet<double>(new double[] { Math.Exp(v1.Sum()) });
137              break;
138            }
139          case OpCodes.Inv: {
140              var v1 = stack[topOfStack];
141              stack[topOfStack] = new HashSet<double>(new double[] { 1.0 / v1.Sum() });
142              break;
143            }
144          case OpCodes.Exit:
145            Contract.Assert(topOfStack == 0);
146            return stack[topOfStack].Sum();
147        }
148      }
149    }
150
151    private static void EvalTerms(HashSet<double> terms, double[] stack, ref int topOfStack) {
152      ++topOfStack;
153      stack[topOfStack] = terms.Sum();
154      terms.Clear();
155    }
156
157    private static void ReadNext(byte[] code, ref int pc, out OpCodes op, out short s) {
158      op = (OpCodes)Enum.ToObject(typeof(OpCodes), code[pc++]);
159      s = 0;
160      if (op == OpCodes.LoadVar) {
161        s = (short)((code[pc] << 8) | code[pc + 1]);
162        pc += 2;
163      }
164    }
165  }
166}
Note: See TracBrowser for help on using the repository browser.