using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg { // experimenting with heuristics // // question: how can relevant interacting terms be reliably detected? // - is this even feasible? // - even if variables are colinear? // - even for non-linear transformations // // Also see Multi-variate adaptive regression splines (MARS) // Maybe we could use MARS-style basis functions to identify the relevant interaction terms. (tune split points and find optimal interaction term with max spearmans rank) // // assuming we interactions of have scaled/shifted variables (x + xo) * (y + yo) with constant xo and yo // this leads to: x y + x yo + y xo + yo xo. // We only need to identify the x y as we assume that all other terms are accounted for public static class Heuristics { public static double CorrelationForInteraction(double[] a, double[] b, double[] c, double[] target) { var am = a.Average(); var bm = b.Average(); var cm = c.Average(); var p1 = Enumerable.Range(0, a.Length).Where(i => a[i] < am); var p2 = Enumerable.Range(0, a.Length).Where(i => a[i] > am); var p3 = Enumerable.Range(0, a.Length).Where(i => b[i] < bm); var p4 = Enumerable.Range(0, a.Length).Where(i => b[i] > bm); var p5 = Enumerable.Range(0, a.Length).Where(i => c[i] < cm); var p6 = Enumerable.Range(0, a.Length).Where(i => c[i] > cm); return 1.0 / (p1.Count() + p2.Count() + p3.Count() + p4.Count() + p5.Count() + p6.Count()) * ( p1.Count() * CorrelationForInteraction(b, c, target, p1) + p2.Count() * CorrelationForInteraction(b, c, target, p2) + p3.Count() * CorrelationForInteraction(a, c, target, p3) + p4.Count() * CorrelationForInteraction(a, c, target, p3) + p5.Count() * CorrelationForInteraction(a, b, target, p5) + p6.Count() * CorrelationForInteraction(a, b, target, p6) ); } public static double CorrelationForInteraction(double[] a, double[] b, double[] z) { return CorrelationForInteraction(a, b, z, Enumerable.Range(0, a.Length)); } public static double CorrelationForInteraction(double[] a, double[] b, double[] z, IEnumerable idx) { // var am = a.Average(); var bm = b.Average(); var p1 = idx.Where(i => a[i] < am); var p2 = idx.Where(i => a[i] > am); var p3 = idx.Where(i => b[i] < bm); var p4 = idx.Where(i => b[i] > bm); return 1.0 / (p1.Count() + p2.Count() + p3.Count() + p4.Count()) * (p1.Count() * CorrelForPartition(b, z, p1) + p2.Count() * CorrelForPartition(b, z, p2) + p3.Count() * CorrelForPartition(a, z, p3) + p4.Count() * CorrelForPartition(a, z, p4)); } public static double CorrelForPartition(double[] a, double[] z, IEnumerable idx) { var zp = new List(); var ap = new List(); foreach (var i in idx) { zp.Add(z[i]); ap.Add(a[i]); } OnlineCalculatorError error; var r = SpearmansRankCorrelationCoefficientCalculator.CalculateSpearmansRank(zp, ap, out error); if (error != OnlineCalculatorError.None) r = 0; return r * r; } } }