Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2796_SymbReg/HeuristicLab.Algorithms.DataAnalysis/3.4/Heuristics.cs @ 17021

Last change on this file since 17021 was 15438, checked in by gkronber, 7 years ago

#2796 refactoring to simplify the code

File size: 3.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using HeuristicLab.Problems.DataAnalysis;
7
8namespace HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg {
9  // experimenting with heuristics
10  //
11  // question: how can relevant interacting terms be reliably detected?
12  //           - is this even feasible?
13  //           - even if variables are colinear?
14  //           - even for non-linear transformations
15  //
16  // Also see Multi-variate adaptive regression splines (MARS)
17  // Maybe we could use MARS-style basis functions to identify the relevant interaction terms. (tune split points and find optimal interaction term with max spearmans rank)
18  //
19  // assuming we interactions of have scaled/shifted variables (x + xo) * (y + yo) with constant xo and yo
20  // this leads to: x y + x yo + y xo + yo xo.
21  // We only need to identify the x y as we assume that all other terms are accounted for
22  public static class Heuristics {
23    public static double CorrelationForInteraction(double[] a, double[] b, double[] c, double[] target) {
24      var am = a.Average();
25      var bm = b.Average();
26      var cm = c.Average();
27      var p1 = Enumerable.Range(0, a.Length).Where(i => a[i] < am);
28      var p2 = Enumerable.Range(0, a.Length).Where(i => a[i] > am);
29      var p3 = Enumerable.Range(0, a.Length).Where(i => b[i] < bm);
30      var p4 = Enumerable.Range(0, a.Length).Where(i => b[i] > bm);
31      var p5 = Enumerable.Range(0, a.Length).Where(i => c[i] < cm);
32      var p6 = Enumerable.Range(0, a.Length).Where(i => c[i] > cm);
33
34      return 1.0 / (p1.Count() + p2.Count() + p3.Count() + p4.Count() + p5.Count() + p6.Count()) *
35        (
36        p1.Count() * CorrelationForInteraction(b, c, target, p1) +
37        p2.Count() * CorrelationForInteraction(b, c, target, p2) +
38        p3.Count() * CorrelationForInteraction(a, c, target, p3) +
39        p4.Count() * CorrelationForInteraction(a, c, target, p3) +
40        p5.Count() * CorrelationForInteraction(a, b, target, p5) +
41        p6.Count() * CorrelationForInteraction(a, b, target, p6)
42      );
43    }
44    public static double CorrelationForInteraction(double[] a, double[] b, double[] z) {
45      return CorrelationForInteraction(a, b, z, Enumerable.Range(0, a.Length));
46    }
47    public static double CorrelationForInteraction(double[] a, double[] b, double[] z, IEnumerable<int> idx) {
48      //
49      var am = a.Average();
50      var bm = b.Average();
51      var p1 = idx.Where(i => a[i] < am);
52      var p2 = idx.Where(i => a[i] > am);
53      var p3 = idx.Where(i => b[i] < bm);
54      var p4 = idx.Where(i => b[i] > bm);
55
56      return 1.0 / (p1.Count() + p2.Count() + p3.Count() + p4.Count()) *
57             (p1.Count() * CorrelForPartition(b, z, p1) +
58             p2.Count() * CorrelForPartition(b, z, p2) +
59             p3.Count() * CorrelForPartition(a, z, p3) +
60             p4.Count() * CorrelForPartition(a, z, p4));
61    }
62
63    public static double CorrelForPartition(double[] a, double[] z, IEnumerable<int> idx) {
64      var zp = new List<double>();
65      var ap = new List<double>();
66      foreach (var i in idx) {
67        zp.Add(z[i]);
68        ap.Add(a[i]);
69      }
70      OnlineCalculatorError error;
71      var r = SpearmansRankCorrelationCoefficientCalculator.CalculateSpearmansRank(zp, ap, out error);
72      if (error != OnlineCalculatorError.None) r = 0;
73      return r * r;
74    }
75  }
76}
Note: See TracBrowser for help on using the repository browser.