Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3127-MRGP-VarPro-Exploration/HeuristicLab.Problems.VarProMRGP/3.3/Problem.cs @ 18003

Last change on this file since 18003 was 18003, checked in by gkronber, 3 years ago

#3127: changed VarProMRGP to use HEAL.VarPro instead of VarPro implementation in NativeInterpreter

File size: 27.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Optimization;
29using HeuristicLab.Parameters;
30using HEAL.Attic;
31using HeuristicLab.Problems.Instances;
32using HeuristicLab.Problems.DataAnalysis;
33using HeuristicLab.Encodings.BinaryVectorEncoding;
34using HeuristicLab.Problems.DataAnalysis.Symbolic;
35using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
36using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
37using System.Collections.Generic;
38using System.Runtime.InteropServices;
39using HeuristicLab.Random;
40
41namespace HeuristicLab.Problems.VarProMRGP {
42  [Item("VarPro Multi-Regression Genetic Programming", "Similar to MRGP but MRGP is a inappropriate name, we should think about a new name.")]
43  [Creatable(CreatableAttribute.Categories.CombinatorialProblems, Priority = 999)]
44  [StorableType("8B84830E-0DEB-44FD-B7E8-6DA2F64C0FF2")]
45  public sealed class Problem : SingleObjectiveBasicProblem<BinaryVectorEncoding>, IProblemInstanceConsumer<IRegressionProblemData> {
46
47    public IValueParameter<IRegressionProblemData> RegressionProblemDataParameter => (IValueParameter<IRegressionProblemData>)Parameters["ProblemData"];
48    public IValueParameter<VarProGrammar> GrammarParameter => (IValueParameter<VarProGrammar>)Parameters["Grammar"];
49    public IFixedValueParameter<IntValue> MaxDepthParameter => (IFixedValueParameter<IntValue>)Parameters["MaxDepth"];
50    public IFixedValueParameter<IntValue> MaxSizeParameter => (IFixedValueParameter<IntValue>)Parameters["MaxSize"];
51    public OptionalValueParameter<ReadOnlyItemArray<StringValue>> FeaturesParameter => (OptionalValueParameter<ReadOnlyItemArray<StringValue>>)Parameters["Features"];
52    public OptionalValueParameter<BinaryVector> BestKnownSolutionParameter => (OptionalValueParameter<BinaryVector>)Parameters["BestKnownSolution"];
53    public IRegressionProblemData RegressionProblemData {
54      get => RegressionProblemDataParameter.Value;
55      set => RegressionProblemDataParameter.Value = value;
56    }
57    public VarProGrammar Grammar {
58      get => GrammarParameter.Value;
59      set => GrammarParameter.Value = value;
60    }
61
62    public int MaxSize {
63      get => MaxSizeParameter.Value.Value;
64      set => MaxSizeParameter.Value.Value = value;
65    }
66
67    public int MaxDepth {
68      get => MaxDepthParameter.Value.Value;
69      set => MaxDepthParameter.Value.Value = value;
70    }
71
72    public ReadOnlyItemArray<StringValue> Features {
73      get => FeaturesParameter.Value;
74      private set {
75        FeaturesParameter.ReadOnly = false;
76        FeaturesParameter.Value = value;
77        FeaturesParameter.ReadOnly = true;
78      }
79    }
80
81    public BinaryVector BestKnownSolution {
82      get => BestKnownSolutionParameter.Value;
83      set => BestKnownSolutionParameter.Value = value;
84    }
85
86
87    public override bool Maximization => false;
88    // public override bool[] Maximization => new[] { false, false };
89
90
91    #region not cloned or stored
92    ISymbolicExpressionTree[] features;
93    private List<TreeToAutoDiffTermConverter.ParametricFunctionGradient> featCode; // AutoDiff delegates for the features
94    private List<double[]> featParam; // parameters for the features
95    private List<double[][]> featVariables;
96    #endregion
97
98
99    [StorableConstructor]
100    private Problem(StorableConstructorFlag _) : base(_) { }
101    private Problem(Problem original, Cloner cloner)
102      : base(original, cloner) {
103      RegisterEventHandlers();
104    }
105
106    public Problem() {
107      var g = new VarProGrammar();
108
109      // TODO optionally: scale dataset
110
111      Parameters.Add(new ValueParameter<IRegressionProblemData>("ProblemData", "", new RegressionProblemData()));
112      Parameters.Add(new ValueParameter<VarProGrammar>("Grammar", "", g));
113      Parameters.Add(new FixedValueParameter<IntValue>("MaxSize", "", new IntValue(10)));
114      Parameters.Add(new FixedValueParameter<IntValue>("MaxDepth", "", new IntValue(6)));
115      Parameters.Add(new OptionalValueParameter<ReadOnlyItemArray<StringValue>>("Features", "autogenerated"));
116      Parameters.Add(new OptionalValueParameter<BinaryVector>("BestKnownSolution", ""));
117      FeaturesParameter.ReadOnly = true;
118
119      Encoding = new BinaryVectorEncoding("b");
120      Encoding.Length = 10000; // default for number of features
121
122      g.ConfigureVariableSymbols(RegressionProblemData);
123
124      InitializeOperators();
125      RegisterEventHandlers();
126    }
127
128    public override IDeepCloneable Clone(Cloner cloner) {
129      return new Problem(this, cloner);
130    }
131
132
133    [StorableHook(HookType.AfterDeserialization)]
134    private void AfterDeserialization() {
135      RegisterEventHandlers();
136    }
137
138    #region event handling
139    // Dependencies of parameters and fields
140    // ProblemData
141    //   |
142    // Grammar             MaxSize           MaxDepth          MaxInteractions
143    //   |                    |                 |                 |
144    //   +--------------------+-----------------+-----------------+
145    //   |
146    //  Features
147    //   Code
148    //   |
149    //  Encoding (Length)
150    //   |
151    //   +--------------------+
152    //   |                    |
153    // BestKnownSolution      Operators (Parameter)
154    // BestKnownQuality
155
156    private void RegisterEventHandlers() {
157      RegressionProblemDataParameter.ValueChanged += RegressionProblemDataParameter_ValueChanged;
158      RegressionProblemData.Changed += RegressionProblemData_Changed;
159      GrammarParameter.ValueChanged += GrammarParameter_ValueChanged;
160      Grammar.Changed += Grammar_Changed;
161      MaxSizeParameter.Value.ValueChanged += Value_ValueChanged;
162      MaxDepthParameter.Value.ValueChanged += Value_ValueChanged;
163      FeaturesParameter.ValueChanged += FeaturesParameter_ValueChanged;
164    }
165
166    private void FeaturesParameter_ValueChanged(object sender, EventArgs e) {
167      if (Encoding.Length != Features.Length) {
168        Encoding.Length = Features.Length;
169        OnEncodingChanged();
170      }
171    }
172
173    private void Value_ValueChanged(object sender, EventArgs e) {
174      UpdateFeaturesAndCode();
175    }
176
177    private void Grammar_Changed(object sender, EventArgs e) {
178      UpdateFeaturesAndCode();
179    }
180
181    private void GrammarParameter_ValueChanged(object sender, EventArgs e) {
182      Grammar.Changed += Grammar_Changed;
183      UpdateFeaturesAndCode();
184    }
185
186    private void RegressionProblemData_Changed(object sender, EventArgs e) {
187      Grammar.ConfigureVariableSymbols(RegressionProblemData);
188    }
189
190    private void RegressionProblemDataParameter_ValueChanged(object sender, EventArgs e) {
191      RegressionProblemData.Changed += RegressionProblemData_Changed;
192      Grammar.ConfigureVariableSymbols(RegressionProblemData);
193    }
194
195    protected override void OnEncodingChanged() {
196      base.OnEncodingChanged();
197      OnReset();
198      ParameterizeOperators();
199    }
200
201    protected override void OnReset() {
202      base.OnReset();
203      BestKnownQualityParameter.ActualValue = null;
204      BestKnownSolutionParameter.ActualValue = null;
205    }
206
207    private void UpdateFeaturesAndCode() {
208      features = GenerateFeaturesSystematic(10000, new MersenneTwister(31415), Grammar, MaxSize, MaxDepth, maxVariables: 3);
209      GenerateCode(features, RegressionProblemData);
210      var formatter = new InfixExpressionFormatter();
211      Features = new ItemArray<StringValue>(features.Select(fi => new StringValue(formatter.Format(fi, System.Globalization.NumberFormatInfo.InvariantInfo, formatString: "0.0")).AsReadOnly())).AsReadOnly();
212    }
213
214
215    #endregion
216
217    public override double Evaluate(Individual individual, IRandom random) {
218      if (featCode == null) {
219        UpdateFeaturesAndCode();
220      }
221      var b = individual.BinaryVector(Encoding.Name);
222
223      var rows = RegressionProblemData.TrainingIndices.ToArray();
224
225      var allRows = rows.ToArray();
226      var nRows = allRows.Length;
227      var termIndexList = new List<int>();
228      for (int i = 0; i < b.Length; i++) {
229        if (b[i] == true) {
230          termIndexList.Add(i);
231        }
232      }
233
234      var oldParameterValues = ExtractParameters(termIndexList);
235      var alpha = (double[])oldParameterValues.Clone();
236
237      var target = RegressionProblemData.TargetVariableTrainingValues.ToArray();
238
239      // local function for feature evaluation
240      void Phi(double[] a, ref double[,] phi) {
241        if (phi == null) {
242          phi = new double[nRows, termIndexList.Count + 1]; // + offset term
243          // last term is constant = 1
244          for (int i = 0; i < nRows; i++)
245            phi[i, termIndexList.Count] = 1.0;
246        }
247        var offset = 0;
248        // for each term
249        for (int i = 0; i < termIndexList.Count; i++) {
250          var termIdx = termIndexList[i];
251          var numFeatParam = this.featParam[termIdx].Length;
252          var variableValues = new double[featVariables[termIdx].Length];
253          var featParam = new double[numFeatParam];
254          Array.Copy(a, offset, featParam, 0, featParam.Length);
255          // for each row
256          for (int j = 0; j < nRows; j++) {
257            // copy row values
258            for (int k = 0; k < variableValues.Length; k++) {
259              variableValues[k] = featVariables[termIdx][k][j]; // featVariables is column-order
260            }
261            var tup = featCode[termIdx].Invoke(featParam, variableValues); // TODO for phi we do not actually need g
262            phi[j, i] = tup.Item2;
263          }
264          offset += numFeatParam;
265        }
266      }
267
268      // local function for Jacobian evaluation
269      void Jac(double[] a, ref double[,] J, ref int[,] ind) {
270        if (J == null) {
271          J = new double[nRows, featParam.Sum(fp => fp.Length)]; // all parameters
272          ind = new int[2, featParam.Sum(fp => fp.Length)];
273        }
274        var offset = 0;
275        // for each term
276        for (int i = 0; i < termIndexList.Count; i++) {
277          var termIdx = termIndexList[i];
278          var numFeatParam = this.featParam[termIdx].Length;
279          var variableValues = new double[featVariables[termIdx].Length];
280          var featParam = new double[numFeatParam];
281          Array.Copy(a, offset, featParam, 0, featParam.Length);
282
283          // for each parameter
284          for (int k = 0; k < featParam.Length; k++) {
285            ind[0, offset + k] = i; // column idx in phi
286            ind[1, offset + k] = offset + k; // parameter idx (no parameter is used twice)
287          }
288
289          // for each row
290          for (int j = 0; j < nRows; j++) {
291            // copy row values
292            for (int k = 0; k < variableValues.Length; k++) {
293              variableValues[k] = featVariables[termIdx][k][j]; // featVariables is column-order
294            }
295            var tup = featCode[termIdx].Invoke(featParam, variableValues);
296            // for each parameter
297            for (int k = 0; k < featParam.Length; k++) {
298              J[j, offset + k] = tup.Item1[k];
299            }
300          }
301          offset += numFeatParam;
302        }
303      }
304
305      try {
306        HEAL.VarPro.VariableProjection.Fit(Phi, Jac, target, alpha, out var coeff, out var report);
307
308
309        if (report.residNorm < 0) throw new InvalidProgramException();
310        UpdateParameter(termIndexList, alpha);
311
312
313        individual["Parameter"] = new DoubleArray(alpha); // store the parameter which worked for this individual for solution creation
314        individual["Coeff"] = new DoubleArray(coeff);
315
316        return report.residNormSqr / nRows;
317      } catch (Exception _) {
318        individual["Parameter"] = new DoubleArray(alpha); // store the parameter which worked for this individual for solution creation
319        individual["Coeff"] = new DoubleArray(termIndexList.Count + 1);
320        return double.MaxValue;
321      }
322    }
323
324
325    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
326      base.Analyze(individuals, qualities, results, random);
327
328      var orderedIndividuals = individuals.Zip(qualities, (i, q) => new { Individual = i, Quality = q }).OrderBy(z => z.Quality);
329      var bestIndividual = orderedIndividuals.First().Individual;
330      var bestQ = orderedIndividuals.First().Quality;
331      if (double.IsNaN(BestKnownQuality) || bestQ < BestKnownQuality) {
332        BestKnownQuality = bestQ;
333        BestKnownSolution = bestIndividual.BinaryVector(Encoding.Name);
334      }
335
336      var curBestQuality = results.ContainsKey("BestQuality") ? ((DoubleValue)results["BestQuality"].Value).Value : double.NaN;
337      if (double.IsNaN(curBestQuality) || bestQ < curBestQuality) {
338        var bestVector = bestIndividual.BinaryVector(Encoding.Name);
339        var bestParams = ((DoubleArray)bestIndividual["Parameter"]).ToArray();
340        var bestCoeff = ((DoubleArray)bestIndividual["Coeff"]).ToArray();
341        // var rows = RegressionProblemData.TrainingIndices.ToArray();
342        // var target = RegressionProblemData.TargetVariableTrainingValues.ToArray();
343        //
344        // var rowsArray = rows.ToArray();
345        // var nRows = rowsArray.Length;
346        // var result = new double[nRows];
347        var termIndexList = new List<int>();
348        // var predictorNames = new List<string>();
349        for (int i = 0; i < bestVector.Length; i++) {
350          if (bestVector[i] == true) {
351            termIndexList.Add(i);
352          }
353        }
354
355        results.AddOrUpdateResult("Solution", CreateRegressionSolution(termIndexList.ToArray(), bestParams, bestCoeff, RegressionProblemData));
356        results.AddOrUpdateResult("BestQuality", new DoubleValue(bestQ));
357      }
358    }
359
360    #region retrieval / update of non-linear parameters
361    private double[] ExtractParameters(List<int> termIndexList) {
362      var p = new List<double>();
363      for (int i = 0; i < termIndexList.Count; i++) {
364        p.AddRange(featParam[termIndexList[i]]);
365      }
366      return p.ToArray();
367    }
368
369
370    // parameters are given as a flat array
371    private void UpdateParameter(List<int> termIndexList, double[] p) {
372      var offset = 0;
373      for (int i = 0; i < termIndexList.Count; i++) {
374        var numFeatParam = featParam[termIndexList[i]].Length;
375        Array.Copy(p, offset, featParam[termIndexList[i]], 0, numFeatParam);
376        offset += numFeatParam;
377      }
378    }
379    #endregion
380
381    #region feature generation
382    /*
383    private static ISymbolicExpressionTree[] GenerateFeatures(int n, IRandom random, ISymbolicDataAnalysisGrammar grammar, int maxSize, int maxDepth) {
384      var features = new ISymbolicExpressionTree[n];
385      var hashes = new HashSet<ulong>();
386      int i = 0;
387      while (i < features.Length) {
388        var t = ProbabilisticTreeCreator.Create(random, grammar, maxSize, maxDepth);
389        t = TreeSimplifier.Simplify(t);
390        var th = SymbolicExpressionTreeHash.ComputeHash(t);
391        if (!hashes.Contains(th)) {
392          features[i++] = t;
393          hashes.Add(th);
394        }
395      }
396      return features;
397    }
398    */
399    private static ISymbolicExpressionTree[] GenerateFeaturesSystematic(int n, IRandom random, ISymbolicDataAnalysisGrammar grammar, int maxSize, int maxDepth, int maxVariables) {
400      var hashes = new HashSet<ulong>();
401
402      var root = grammar.ProgramRootSymbol.CreateTreeNode();
403      var trees = new List<ISymbolicExpressionTreeNode>();
404      var incompleteTrees = new Queue<ISymbolicExpressionTreeNode>();
405      incompleteTrees.Enqueue(root);
406      while (incompleteTrees.Any() && trees.Count < n) {
407        var t = incompleteTrees.Dequeue();
408        // find first extension point
409        ISymbolicExpressionTreeNode parent = null;
410        var numVariables = 0;
411        int size = 0;
412        int depth = t.GetDepth();
413        foreach (var node in t.IterateNodesPrefix()) {
414          size++;
415          if (node is VariableTreeNodeBase) numVariables++;
416          if (node.SubtreeCount < grammar.GetMinimumSubtreeCount(node.Symbol)) {
417            parent = node;
418            break;
419          }
420        }
421        if (numVariables > maxVariables || size > maxSize || depth > maxDepth) continue;
422        if (parent == null) {
423          // no extension point found => sentence is complete
424          var hash = SymbolicExpressionTreeHash.ComputeHash(t);
425          if (hashes.Add(SymbolicExpressionTreeHash.ComputeHash(t))) {
426            trees.Add((ISymbolicExpressionTreeNode)t.Clone());
427          }
428
429          // check if the (complete) sentence can be extended
430          foreach (var node in t.IterateNodesPrefix()) {
431            if (node.SubtreeCount < grammar.GetMaximumSubtreeCount(node.Symbol)) {
432              parent = node;
433              break;
434            }
435          }
436          if (parent == null) {
437            // no extension possible => continue with next tree in queue
438            continue;
439          }
440        }
441
442        if (parent == null) throw new InvalidProgramException(); // assertion
443
444        // the sentence must / can be extended
445        var allowedChildSy = grammar.GetAllowedChildSymbols(parent.Symbol, parent.SubtreeCount).OrderBy(sy => sy.MinimumArity == 0 ? 0 : 1); // terminals first
446        if (!allowedChildSy.Any()) throw new InvalidProgramException(); // grammar fail
447
448        // make new variants and add them to the queue of incomplete trees
449        foreach (var sy in allowedChildSy) {
450          if (sy is DataAnalysis.Symbolic.Variable variableSy) {
451            // generate all variables
452            foreach (var varName in variableSy.VariableNames) {
453              var varNode = (VariableTreeNode)variableSy.CreateTreeNode();
454              varNode.ResetLocalParameters(random);
455              varNode.VariableName = varName;
456              varNode.Weight = 1.0;
457              parent.AddSubtree(varNode);
458              incompleteTrees.Enqueue((ISymbolicExpressionTreeNode)t.Clone());
459              parent.RemoveSubtree(parent.SubtreeCount - 1); // prepare for next iteration
460            }
461          } else {
462            var node = sy.CreateTreeNode();
463            node.ResetLocalParameters(random);
464            parent.AddSubtree(node);
465            incompleteTrees.Enqueue((ISymbolicExpressionTreeNode)t.Clone());
466            parent.RemoveSubtree(parent.SubtreeCount - 1); // prepare for next iteration
467          }
468        }
469
470      }
471      return trees.Select(r => new SymbolicExpressionTree(r)).ToArray();
472    }
473
474
475    private void GenerateCode(ISymbolicExpressionTree[] features, IRegressionProblemData problemData) {
476      this.featCode = new List<TreeToAutoDiffTermConverter.ParametricFunctionGradient>();
477      this.featParam = new List<double[]>();
478      this.featVariables = new List<double[][]>();
479      foreach (var f in features) {
480        var featureCode = Compile(f, problemData, out var initialParamValues, out var variableValues);
481
482        featCode.Add(featureCode);
483        featParam.Add(initialParamValues);
484        featVariables.Add(variableValues);
485      }
486    }
487
488
489    private static readonly HashSet<byte> supportedOpCodes = new HashSet<byte>() {
490      (byte)OpCode.Constant,
491      (byte)OpCode.Variable,
492      (byte)OpCode.Add,
493      (byte)OpCode.Sub,
494      (byte)OpCode.Mul,
495      (byte)OpCode.Div,
496      (byte)OpCode.Exp,
497      (byte)OpCode.Log,
498      (byte)OpCode.Sin,
499      (byte)OpCode.Cos,
500      (byte)OpCode.Tan,
501      (byte)OpCode.Tanh,
502      // (byte)OpCode.Power,
503      // (byte)OpCode.Root,
504      (byte)OpCode.SquareRoot,
505      (byte)OpCode.Square,
506      (byte)OpCode.CubeRoot,
507      (byte)OpCode.Cube,
508      (byte)OpCode.Absolute,
509      (byte)OpCode.AnalyticQuotient
510    };
511    private TreeToAutoDiffTermConverter.ParametricFunctionGradient Compile(ISymbolicExpressionTree tree, IRegressionProblemData problemData,
512      out double[] initialParameterValues, out double[][] variableValues) {
513      TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, makeVariableWeightsVariable: false, addLinearScalingTerms: false,
514        out var parameters, out initialParameterValues, out var func, out var func_grad);
515      variableValues = new double[parameters.Count][];
516      for (int i = 0; i < parameters.Count; i++) {
517        variableValues[i] = problemData.Dataset.GetDoubleValues(parameters[i].variableName, problemData.TrainingIndices).ToArray(); // TODO: we could reuse the arrays
518      }
519      return func_grad;
520    }
521    #endregion
522
523    #region solution creation
524    private IRegressionSolution CreateRegressionSolution(int[] featIdx, double[] parameters, double[] coefficients, IRegressionProblemData problemData) {
525      var root = (new ProgramRootSymbol()).CreateTreeNode();
526      var start = (new StartSymbol()).CreateTreeNode();
527      var add = (new Addition()).CreateTreeNode();
528      root.AddSubtree(start);
529      start.AddSubtree(add);
530      var offset = 0;
531      for (int i = 0; i < featIdx.Length; i++) {
532        var term = (ISymbolicExpressionTreeNode)features[featIdx[i]].Root.GetSubtree(0).GetSubtree(0).Clone();
533
534        var termParameters = new double[featParam[featIdx[i]].Length];
535        Array.Copy(parameters, offset, termParameters, 0, termParameters.Length);
536        ReplaceParameters(term, termParameters);
537        offset += termParameters.Length;
538
539        var mul = (new Multiplication()).CreateTreeNode();
540        mul.AddSubtree(term);
541        mul.AddSubtree(CreateConstant(coefficients[i]));
542        add.AddSubtree(mul);
543      }
544      // last coeff is offset
545      add.AddSubtree(CreateConstant(coefficients[coefficients.Length - 1]));
546
547      var tree = new SymbolicExpressionTree(root);
548      var ds = problemData.Dataset;
549      var scaledDataset = new Dataset(ds.DoubleVariables, ds.ToArray(ds.DoubleVariables, Enumerable.Range(0, ds.Rows)));
550      var scaledProblemData = new RegressionProblemData(scaledDataset, problemData.AllowedInputVariables, problemData.TargetVariable);
551      scaledProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
552      scaledProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
553      scaledProblemData.TestPartition.Start = problemData.TestPartition.Start;
554      scaledProblemData.TestPartition.End = problemData.TestPartition.End;
555      return new SymbolicRegressionSolution(
556        new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeNativeInterpreter()), scaledProblemData);
557    }
558
559    private void ReplaceParameters(ISymbolicExpressionTreeNode term, double[] termParameters) {
560      // Autodiff converter extracts parameters using a pre-order tree traversal.
561      // Therefore, we must use a pre-order tree traversal here as well.
562      // Only ConstantTreeNode values are optimized.
563      var paramIdx = 0;
564      foreach (var node in term.IterateNodesPrefix().OfType<ConstantTreeNode>()) {
565        node.Value = termParameters[paramIdx++];
566      }
567      if (paramIdx != termParameters.Length) throw new InvalidProgramException();
568    }
569
570    private ISymbolicExpressionTreeNode CreateConstant(double coeff) {
571      var constNode = (ConstantTreeNode)(new Constant()).CreateTreeNode();
572      constNode.Value = coeff;
573      return constNode;
574    }
575
576    Dictionary<byte, Symbol> symbols = new Dictionary<byte, Symbol>() {
577      {(byte)OpCode.Add, new Addition()  },
578      {(byte)OpCode.Sub, new Subtraction()  },
579      {(byte)OpCode.Mul, new Multiplication()  },
580      {(byte)OpCode.Div, new Division()  },
581      {(byte)OpCode.Exp, new Exponential()  },
582      {(byte)OpCode.Log, new Logarithm()  },
583      {(byte)OpCode.Sin, new Sine()  },
584      {(byte)OpCode.Cos, new Cosine()  },
585      {(byte)OpCode.Tan, new Tangent()  },
586      {(byte)OpCode.Tanh, new HyperbolicTangent()  },
587      {(byte)OpCode.Square, new Square()  },
588      {(byte)OpCode.SquareRoot, new SquareRoot()  },
589      {(byte)OpCode.Cube, new Cube()  },
590      {(byte)OpCode.CubeRoot, new CubeRoot()  },
591      {(byte)OpCode.Absolute, new Absolute()  },
592      {(byte)OpCode.AnalyticQuotient, new AnalyticQuotient()  },
593    };
594
595    // used for solutions only
596    Symbol constSy = new Constant();
597    Symbol varSy = new DataAnalysis.Symbolic.Variable();
598
599
600    #endregion
601
602    public void Load(IRegressionProblemData data) {
603      RegressionProblemData = data;
604    }
605
606    private void InitializeOperators() {
607      Operators.Add(new AlleleFrequencyAnalyzer());
608
609      // var cvMSEAnalyzer = new BestAverageWorstQualityAnalyzer();
610      // cvMSEAnalyzer.Name = "CVMSE Analzer";
611      // ParameterizeAnalyzer(cvMSEAnalyzer, "CV MSE (avg)");
612      // Operators.Add(cvMSEAnalyzer);
613      //
614      // var trainingMSEAnalyzer = new BestAverageWorstQualityAnalyzer();
615      // trainingMSEAnalyzer.Name = "Training MSE Analzer";
616      // ParameterizeAnalyzer(trainingMSEAnalyzer, "Train MSE (avg)");
617      // Operators.Add(trainingMSEAnalyzer);
618
619      ParameterizeOperators();
620    }
621
622    private void ParameterizeAnalyzer(BestAverageWorstQualityAnalyzer analyzer, string qualityName) {
623      analyzer.QualityParameter.ActualName = qualityName;
624      analyzer.QualitiesParameter.ActualName = qualityName + " " + analyzer.QualitiesParameter.ActualName;
625      analyzer.BestQualityParameter.ActualName += " " + qualityName;
626      analyzer.CurrentAverageQualityParameter.ActualName += " " + qualityName;
627      analyzer.CurrentBestQualityParameter.ActualName += " " + qualityName;
628      analyzer.CurrentWorstQualityParameter.ActualName += " " + qualityName;
629      analyzer.BestKnownQualityParameter.ActualName += " " + qualityName;
630      analyzer.AbsoluteDifferenceBestKnownToBestParameter.ActualName += " " + qualityName;
631      analyzer.RelativeDifferenceBestKnownToBestParameter.ActualName += " " + qualityName;
632    }
633
634    private void ParameterizeOperators() {
635      foreach (var op in Operators) {
636        if (op is AlleleFrequencyAnalyzer alleleAnalyzer) {
637          alleleAnalyzer.SolutionParameter.ActualName = Encoding.Name;
638        }
639        if (op is MultiAnalyzer multiAnalyzer) {
640          var freqAnalyzer = Operators.OfType<AlleleFrequencyAnalyzer>().First();
641          multiAnalyzer.Operators.SetItemCheckedState(freqAnalyzer, true);
642        }
643      }
644      foreach (var op in Encoding.Operators) {
645        if (op is SomePositionsBitflipManipulator multiFlipManipulator) {
646          multiFlipManipulator.MutationProbabilityParameter.Value.Value = 1.0 / Encoding.Length; // one feature on average
647        } else if (op is RandomBinaryVectorCreator creator) {
648          creator.TrueProbability.Value = 20.0 / Encoding.Length; // 20 features on average
649        }
650      }
651    }
652  }
653}
Note: See TracBrowser for help on using the repository browser.