[15425] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Linq;
|
---|
| 4 | using System.Text;
|
---|
| 5 | using System.Threading.Tasks;
|
---|
| 6 | using HeuristicLab.Problems.DataAnalysis;
|
---|
| 7 |
|
---|
| 8 | namespace HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg {
|
---|
| 9 | // experimenting with heuristics
|
---|
| 10 | //
|
---|
| 11 | // question: how can relevant interacting terms be reliably detected?
|
---|
| 12 | // - is this even feasible?
|
---|
| 13 | // - even if variables are colinear?
|
---|
| 14 | // - even for non-linear transformations
|
---|
[15437] | 15 | //
|
---|
| 16 | // Also see Multi-variate adaptive regression splines (MARS)
|
---|
| 17 | // Maybe we could use MARS-style basis functions to identify the relevant interaction terms. (tune split points and find optimal interaction term with max spearmans rank)
|
---|
| 18 | //
|
---|
[15425] | 19 | // assuming we interactions of have scaled/shifted variables (x + xo) * (y + yo) with constant xo and yo
|
---|
| 20 | // this leads to: x y + x yo + y xo + yo xo.
|
---|
| 21 | // We only need to identify the x y as we assume that all other terms are accounted for
|
---|
| 22 | public static class Heuristics {
|
---|
[15426] | 23 | public static double CorrelationForInteraction(double[] a, double[] b, double[] c, double[] target) {
|
---|
[15425] | 24 | var am = a.Average();
|
---|
| 25 | var bm = b.Average();
|
---|
[15438] | 26 | var cm = c.Average();
|
---|
[15425] | 27 | var p1 = Enumerable.Range(0, a.Length).Where(i => a[i] < am);
|
---|
| 28 | var p2 = Enumerable.Range(0, a.Length).Where(i => a[i] > am);
|
---|
| 29 | var p3 = Enumerable.Range(0, a.Length).Where(i => b[i] < bm);
|
---|
| 30 | var p4 = Enumerable.Range(0, a.Length).Where(i => b[i] > bm);
|
---|
[15438] | 31 | var p5 = Enumerable.Range(0, a.Length).Where(i => c[i] < cm);
|
---|
| 32 | var p6 = Enumerable.Range(0, a.Length).Where(i => c[i] > cm);
|
---|
[15425] | 33 |
|
---|
[15438] | 34 | return 1.0 / (p1.Count() + p2.Count() + p3.Count() + p4.Count() + p5.Count() + p6.Count()) *
|
---|
| 35 | (
|
---|
| 36 | p1.Count() * CorrelationForInteraction(b, c, target, p1) +
|
---|
| 37 | p2.Count() * CorrelationForInteraction(b, c, target, p2) +
|
---|
| 38 | p3.Count() * CorrelationForInteraction(a, c, target, p3) +
|
---|
| 39 | p4.Count() * CorrelationForInteraction(a, c, target, p3) +
|
---|
| 40 | p5.Count() * CorrelationForInteraction(a, b, target, p5) +
|
---|
| 41 | p6.Count() * CorrelationForInteraction(a, b, target, p6)
|
---|
| 42 | );
|
---|
| 43 | }
|
---|
| 44 | public static double CorrelationForInteraction(double[] a, double[] b, double[] z) {
|
---|
| 45 | return CorrelationForInteraction(a, b, z, Enumerable.Range(0, a.Length));
|
---|
| 46 | }
|
---|
| 47 | public static double CorrelationForInteraction(double[] a, double[] b, double[] z, IEnumerable<int> idx) {
|
---|
| 48 | //
|
---|
| 49 | var am = a.Average();
|
---|
| 50 | var bm = b.Average();
|
---|
| 51 | var p1 = idx.Where(i => a[i] < am);
|
---|
| 52 | var p2 = idx.Where(i => a[i] > am);
|
---|
| 53 | var p3 = idx.Where(i => b[i] < bm);
|
---|
| 54 | var p4 = idx.Where(i => b[i] > bm);
|
---|
| 55 |
|
---|
[15425] | 56 | return 1.0 / (p1.Count() + p2.Count() + p3.Count() + p4.Count()) *
|
---|
| 57 | (p1.Count() * CorrelForPartition(b, z, p1) +
|
---|
| 58 | p2.Count() * CorrelForPartition(b, z, p2) +
|
---|
| 59 | p3.Count() * CorrelForPartition(a, z, p3) +
|
---|
| 60 | p4.Count() * CorrelForPartition(a, z, p4));
|
---|
| 61 | }
|
---|
| 62 |
|
---|
| 63 | public static double CorrelForPartition(double[] a, double[] z, IEnumerable<int> idx) {
|
---|
| 64 | var zp = new List<double>();
|
---|
| 65 | var ap = new List<double>();
|
---|
| 66 | foreach (var i in idx) {
|
---|
| 67 | zp.Add(z[i]);
|
---|
| 68 | ap.Add(a[i]);
|
---|
| 69 | }
|
---|
| 70 | OnlineCalculatorError error;
|
---|
| 71 | var r = SpearmansRankCorrelationCoefficientCalculator.CalculateSpearmansRank(zp, ap, out error);
|
---|
| 72 | if (error != OnlineCalculatorError.None) r = 0;
|
---|
| 73 | return r * r;
|
---|
| 74 | }
|
---|
| 75 | }
|
---|
| 76 | }
|
---|