1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using System.Text;
|
---|
5 | using System.Threading.Tasks;
|
---|
6 | using HeuristicLab.Problems.DataAnalysis;
|
---|
7 |
|
---|
8 | namespace HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg {
|
---|
9 | // experimenting with heuristics
|
---|
10 | //
|
---|
11 | // question: how can relevant interacting terms be reliably detected?
|
---|
12 | // - is this even feasible?
|
---|
13 | // - even if variables are colinear?
|
---|
14 | // - even for non-linear transformations
|
---|
15 | //
|
---|
16 | // Also see Multi-variate adaptive regression splines (MARS)
|
---|
17 | // Maybe we could use MARS-style basis functions to identify the relevant interaction terms. (tune split points and find optimal interaction term with max spearmans rank)
|
---|
18 | //
|
---|
19 | // assuming we interactions of have scaled/shifted variables (x + xo) * (y + yo) with constant xo and yo
|
---|
20 | // this leads to: x y + x yo + y xo + yo xo.
|
---|
21 | // We only need to identify the x y as we assume that all other terms are accounted for
|
---|
22 | public static class Heuristics {
|
---|
23 | public static double CorrelationForInteraction(double[] a, double[] b, double[] c, double[] target) {
|
---|
24 | var am = a.Average();
|
---|
25 | var bm = b.Average();
|
---|
26 | var cm = c.Average();
|
---|
27 | var p1 = Enumerable.Range(0, a.Length).Where(i => a[i] < am);
|
---|
28 | var p2 = Enumerable.Range(0, a.Length).Where(i => a[i] > am);
|
---|
29 | var p3 = Enumerable.Range(0, a.Length).Where(i => b[i] < bm);
|
---|
30 | var p4 = Enumerable.Range(0, a.Length).Where(i => b[i] > bm);
|
---|
31 | var p5 = Enumerable.Range(0, a.Length).Where(i => c[i] < cm);
|
---|
32 | var p6 = Enumerable.Range(0, a.Length).Where(i => c[i] > cm);
|
---|
33 |
|
---|
34 | return 1.0 / (p1.Count() + p2.Count() + p3.Count() + p4.Count() + p5.Count() + p6.Count()) *
|
---|
35 | (
|
---|
36 | p1.Count() * CorrelationForInteraction(b, c, target, p1) +
|
---|
37 | p2.Count() * CorrelationForInteraction(b, c, target, p2) +
|
---|
38 | p3.Count() * CorrelationForInteraction(a, c, target, p3) +
|
---|
39 | p4.Count() * CorrelationForInteraction(a, c, target, p3) +
|
---|
40 | p5.Count() * CorrelationForInteraction(a, b, target, p5) +
|
---|
41 | p6.Count() * CorrelationForInteraction(a, b, target, p6)
|
---|
42 | );
|
---|
43 | }
|
---|
44 | public static double CorrelationForInteraction(double[] a, double[] b, double[] z) {
|
---|
45 | return CorrelationForInteraction(a, b, z, Enumerable.Range(0, a.Length));
|
---|
46 | }
|
---|
47 | public static double CorrelationForInteraction(double[] a, double[] b, double[] z, IEnumerable<int> idx) {
|
---|
48 | //
|
---|
49 | var am = a.Average();
|
---|
50 | var bm = b.Average();
|
---|
51 | var p1 = idx.Where(i => a[i] < am);
|
---|
52 | var p2 = idx.Where(i => a[i] > am);
|
---|
53 | var p3 = idx.Where(i => b[i] < bm);
|
---|
54 | var p4 = idx.Where(i => b[i] > bm);
|
---|
55 |
|
---|
56 | return 1.0 / (p1.Count() + p2.Count() + p3.Count() + p4.Count()) *
|
---|
57 | (p1.Count() * CorrelForPartition(b, z, p1) +
|
---|
58 | p2.Count() * CorrelForPartition(b, z, p2) +
|
---|
59 | p3.Count() * CorrelForPartition(a, z, p3) +
|
---|
60 | p4.Count() * CorrelForPartition(a, z, p4));
|
---|
61 | }
|
---|
62 |
|
---|
63 | public static double CorrelForPartition(double[] a, double[] z, IEnumerable<int> idx) {
|
---|
64 | var zp = new List<double>();
|
---|
65 | var ap = new List<double>();
|
---|
66 | foreach (var i in idx) {
|
---|
67 | zp.Add(z[i]);
|
---|
68 | ap.Add(a[i]);
|
---|
69 | }
|
---|
70 | OnlineCalculatorError error;
|
---|
71 | var r = SpearmansRankCorrelationCoefficientCalculator.CalculateSpearmansRank(zp, ap, out error);
|
---|
72 | if (error != OnlineCalculatorError.None) r = 0;
|
---|
73 | return r * r;
|
---|
74 | }
|
---|
75 | }
|
---|
76 | }
|
---|