Changeset 15426


Ignore:
Timestamp:
10/20/17 17:18:33 (5 years ago)
Author:
gkronber
Message:

#2796 testing interaction heuristics

Location:
branches/MCTS-SymbReg-2796
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/Heuristics.cs

    r15425 r15426  
    1818  // We only need to identify the x y as we assume that all other terms are accounted for
    1919  public static class Heuristics {
     20    public static double CorrelationForInteraction(double[] a, double[] b, double[] c, double[] target) {
     21      return 0.0;
     22    }
    2023    public static double CorrelationForInteraction(double[] a, double[] b, double[] z) {
    2124      //
  • branches/MCTS-SymbReg-2796/Tests/HeuristicLab.Algorithms.DataAnalysis-3.4/MctsSymbolicRegressionTest.cs

    r15425 r15426  
    1919    [TestCategory("Algorithms.DataAnalysis")]
    2020    [TestProperty("Time", "short")]
    21     public void TestHeuristics() {
     21    public void TestSimple2dInteractions() {
    2222      {
    2323        // a, b ~ U(0, 1) should be trivial
     
    9797      }
    9898      {
     99        var rand = new MersenneTwister(1234);
    99100        // a ~ N(100, 1), b ~ N(-100, 1)
    100         var nRand = new NormalDistributedRandom(new MersenneTwister(1234), 0, 1);
    101         var aRand = new NormalDistributedRandom(new MersenneTwister(1234), 100, 1);
    102         var bRand = new NormalDistributedRandom(new MersenneTwister(1234), -100, 1);
     101        var nRand = new NormalDistributedRandom(rand, 0, 1);
     102        var aRand = new NormalDistributedRandom(rand, 100, 1);
     103        var bRand = new NormalDistributedRandom(rand, -100, 1);
    103104
    104105        int n = 10000; // large sample so that we can use the thresholds below
     
    115116        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05); // b < 0
    116117        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05);
    117         Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
     118        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); // random variables are not correlated
     119      }
     120    }
     121
     122    [TestMethod]
     123    [TestCategory("Algorithms.DataAnalysis")]
     124    [TestProperty("Time", "short")]
     125    public void TestGeneral2dInteractions() {
     126      {
     127        // we should be able to reliably detect when a product of two variables is correlated with the target variable       
     128
     129        // the test samples x from a two dimensional normal distribution
     130        // the covariance matrix for the normal distribution is randomly sampled
     131        // this means x_1 and x_2 might be highly correlated
     132        // the mean of the normal distribution is randomly sampled (most critical are probably zero-mean distributions)
     133        // y is calculated as x_1*x_2
     134
     135        var conditionNumber = 10000;
     136        for (int iter = 0; iter < 100; iter++) {
     137          double m0 = 0.0; double m1 = 0.0;
     138          alglib.hqrndstate randState;
     139          alglib.hqrndseed(1234, 31415, out randState);
     140
     141          // uncomment if non-zero mean distributions should be tested
     142          //alglib.hqrndnormal2(randState, out m0, out m1);
     143
     144          double[,] cov_ab = new double[2, 2];
     145          double[,] cov_xy = new double[2, 2];
     146          alglib.matgen.spdmatrixrndcond(2, conditionNumber, ref cov_ab);
     147          alglib.spdmatrixcholesky(ref cov_ab, 2, true);
     148
     149          alglib.matgen.spdmatrixrndcond(2, conditionNumber, ref cov_xy);
     150          alglib.spdmatrixcholesky(ref cov_xy, 2, true);
     151
     152          // generate a, b by sampling from a 2dim multivariate normal distribution
     153          // generate x, y by sampling from another 2dim multivariate normal distribution
     154          // a,b and x,y might be correlated but x,y are not correlated to a,b
     155          int N = 1000; // large sample size to make sure the test thresholds hold
     156          double[] a = new double[N];
     157          double[] b = new double[N];
     158          double[] x = new double[N];
     159          double[] y = new double[N];
     160          double[] z = new double[N];
     161
     162          for (int i = 0; i < N; i++) {
     163            double r1, r2, r3, r4;
     164            alglib.hqrndnormal2(randState, out r1, out r2);
     165            alglib.hqrndnormal2(randState, out r3, out r4);
     166
     167            var r_ab = new double[] { r1, r2 };
     168            var r_xy = new double[] { r3, r4 };
     169            double[] s_ab = new double[2];
     170            double[] s_xy = new double[2];
     171            alglib.ablas.rmatrixmv(2, 2, cov_ab, 0, 0, 0, r_ab, 0, ref s_ab, 0);
     172            alglib.ablas.rmatrixmv(2, 2, cov_xy, 0, 0, 0, r_xy, 0, ref s_xy, 0);
     173
     174            a[i] = s_ab[0] + m0;
     175            b[i] = s_ab[1] + m1;
     176            x[i] = s_xy[0] + m0; // use same mean (just for the sake of it)
     177            y[i] = s_xy[1] + m1;
     178
     179            z[i] = a[i] * b[i];
     180          }
     181
     182          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05);
     183          Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
     184
     185          /* we might see correlations when only using one of the two relevant factors.
     186           * this depends on the distribution / location of a and b
     187          // for zero-mean distributions the following should all be quasi-zero
     188          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05);
     189          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05);
     190          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05);
     191          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05);
     192          */
     193          Console.WriteLine("a,b: {0:N3}\tx,y: {1:N3}\ta,x: {2:N3}\tb,x: {3:N3}\ta,y: {4:N3}\tb,y: {5:N3}\tcov(a,b): {6:N3}",
     194            Heuristics.CorrelationForInteraction(a, b, z),
     195            Heuristics.CorrelationForInteraction(x, y, z),
     196            Heuristics.CorrelationForInteraction(a, x, z),
     197            Heuristics.CorrelationForInteraction(b, x, z),
     198            Heuristics.CorrelationForInteraction(a, y, z),
     199            Heuristics.CorrelationForInteraction(b, y, z),
     200            alglib.cov2(a, b)
     201            );
     202        }
     203      }
     204    }
     205    [TestMethod]
     206    [TestCategory("Algorithms.DataAnalysis")]
     207    [TestProperty("Time", "short")]
     208    public void TestGeneral3dInteractions() {
     209      {
     210        // same as TestGeneral2dInteractions but for terms with three variables
     211
     212        var conditionNumber = 100;
     213        for (int iter = 0; iter < 100; iter++) {
     214          double m0 = 0.0; double m1 = 0.0; double m2 = 0.0;
     215          alglib.hqrndstate randState;
     216          alglib.hqrndseed(1234, 31415, out randState);
     217
     218          // uncomment if non-zero mean distributions should be tested
     219          //alglib.hqrndnormal2(randState, out m0, out m1);
     220          //alglib.hqrndnormal2(randState, out m1, out m2);
     221
     222          double[,] cov_abc = new double[3, 3];
     223          double[,] cov_xyz = new double[3, 3];
     224          alglib.matgen.spdmatrixrndcond(3, conditionNumber, ref cov_abc);
     225          alglib.spdmatrixcholesky(ref cov_abc, 3, true);
     226
     227          alglib.matgen.spdmatrixrndcond(3, conditionNumber, ref cov_xyz);
     228          alglib.spdmatrixcholesky(ref cov_xyz, 3, true);
     229
     230          int N = 1000; // large sample size to make sure the test thresholds hold
     231          double[] a = new double[N];
     232          double[] b = new double[N];
     233          double[] c = new double[N];
     234          double[] x = new double[N];
     235          double[] y = new double[N];
     236          double[] z = new double[N];
     237          double[] t = new double[N];
     238
     239          for (int i = 0; i < N; i++) {
     240            double r1, r2, r3, r4, r5, r6;
     241            alglib.hqrndnormal2(randState, out r1, out r2);
     242            alglib.hqrndnormal2(randState, out r3, out r4);
     243            alglib.hqrndnormal2(randState, out r5, out r6);
     244
     245            var r_abc = new double[] { r1, r2, r3 };
     246            var r_xyz = new double[] { r4, r5, r6 };
     247            double[] s_abc = new double[3];
     248            double[] s_xyz = new double[3];
     249            alglib.ablas.rmatrixmv(3, 3, cov_abc, 0, 0, 0, r_abc, 0, ref s_abc, 0);
     250            alglib.ablas.rmatrixmv(3, 3, cov_xyz, 0, 0, 0, r_xyz, 0, ref s_xyz, 0);
     251
     252            a[i] = s_abc[0] + m0;
     253            b[i] = s_abc[1] + m1;
     254            c[i] = s_abc[2] + m2;
     255            x[i] = s_xyz[0] + m0; // use same mean (just for the sake of it)
     256            y[i] = s_xyz[1] + m1;
     257            z[i] = s_xyz[2] + m2;
     258
     259            t[i] = a[i] * b[i] * c[i];
     260          }
     261
     262          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, c, z) > 0.05);
     263          Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z, z) < 0.05);
     264
     265          /* we might see correlations when only using one of the two relevant factors.
     266           * this depends on the distribution / location of a and b
     267          // for zero-mean distributions the following should all be quasi-zero
     268          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05);
     269          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05);
     270          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05);
     271          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05);
     272          */
     273          Console.WriteLine("a,b: {0:N3}\tx,y: {1:N3}\ta,x: {2:N3}\tb,x: {3:N3}\ta,y: {4:N3}\tb,y: {5:N3}\tcov(a,b): {6:N3}",
     274            Heuristics.CorrelationForInteraction(a, b, z),
     275            Heuristics.CorrelationForInteraction(x, y, z),
     276            Heuristics.CorrelationForInteraction(a, x, z),
     277            Heuristics.CorrelationForInteraction(b, x, z),
     278            Heuristics.CorrelationForInteraction(a, y, z),
     279            Heuristics.CorrelationForInteraction(b, y, z),
     280            alglib.cov2(a, b)
     281            );
     282        }
    118283      }
    119284    }
  • branches/MCTS-SymbReg-2796/Tests/Test.csproj

    r15416 r15426  
    3636  </PropertyGroup>
    3737  <ItemGroup>
     38    <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     39      <SpecificVersion>False</SpecificVersion>
     40      <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath>
     41    </Reference>
    3842    <Reference Include="HeuristicLab.Algorithms.DataAnalysis-3.4">
    3943      <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Algorithms.DataAnalysis-3.4.dll</HintPath>
Note: See TracChangeset for help on using the changeset viewer.