Changeset 15426
- Timestamp:
- 10/20/17 17:18:33 (7 years ago)
- Location:
- branches/MCTS-SymbReg-2796
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/Heuristics.cs
r15425 r15426 18 18 // We only need to identify the x y as we assume that all other terms are accounted for 19 19 public static class Heuristics { 20 public static double CorrelationForInteraction(double[] a, double[] b, double[] c, double[] target) { 21 return 0.0; 22 } 20 23 public static double CorrelationForInteraction(double[] a, double[] b, double[] z) { 21 24 // -
branches/MCTS-SymbReg-2796/Tests/HeuristicLab.Algorithms.DataAnalysis-3.4/MctsSymbolicRegressionTest.cs
r15425 r15426 19 19 [TestCategory("Algorithms.DataAnalysis")] 20 20 [TestProperty("Time", "short")] 21 public void Test Heuristics() {21 public void TestSimple2dInteractions() { 22 22 { 23 23 // a, b ~ U(0, 1) should be trivial … … 97 97 } 98 98 { 99 var rand = new MersenneTwister(1234); 99 100 // a ~ N(100, 1), b ~ N(-100, 1) 100 var nRand = new NormalDistributedRandom( new MersenneTwister(1234), 0, 1);101 var aRand = new NormalDistributedRandom( new MersenneTwister(1234), 100, 1);102 var bRand = new NormalDistributedRandom( new MersenneTwister(1234), -100, 1);101 var nRand = new NormalDistributedRandom(rand, 0, 1); 102 var aRand = new NormalDistributedRandom(rand, 100, 1); 103 var bRand = new NormalDistributedRandom(rand, -100, 1); 103 104 104 105 int n = 10000; // large sample so that we can use the thresholds below … … 115 116 Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05); // b < 0 116 117 Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05); 117 Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); 118 Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); // random variables are not correlated 119 } 120 } 121 122 [TestMethod] 123 [TestCategory("Algorithms.DataAnalysis")] 124 [TestProperty("Time", "short")] 125 public void TestGeneral2dInteractions() { 126 { 127 // we should be able to reliably detect when a product of two variables is correlated with the target variable 128 129 // the test samples x from a two dimensional normal distribution 130 // the covariance matrix for the normal distribution is randomly sampled 131 // this means x_1 and x_2 might be highly correlated 132 // the mean of the normal distribution is randomly sampled (most critical are probably zero-mean distributions) 133 // y is calculated as x_1*x_2 134 135 var conditionNumber = 10000; 136 for (int iter = 0; iter < 100; iter++) { 137 double m0 = 0.0; double m1 = 0.0; 138 alglib.hqrndstate randState; 139 alglib.hqrndseed(1234, 31415, out randState); 140 141 // uncomment if non-zero mean distributions should be tested 142 //alglib.hqrndnormal2(randState, out m0, out m1); 143 144 double[,] cov_ab = new double[2, 2]; 145 double[,] cov_xy = new double[2, 2]; 146 alglib.matgen.spdmatrixrndcond(2, conditionNumber, ref cov_ab); 147 alglib.spdmatrixcholesky(ref cov_ab, 2, true); 148 149 alglib.matgen.spdmatrixrndcond(2, conditionNumber, ref cov_xy); 150 alglib.spdmatrixcholesky(ref cov_xy, 2, true); 151 152 // generate a, b by sampling from a 2dim multivariate normal distribution 153 // generate x, y by sampling from another 2dim multivariate normal distribution 154 // a,b and x,y might be correlated but x,y are not correlated to a,b 155 int N = 1000; // large sample size to make sure the test thresholds hold 156 double[] a = new double[N]; 157 double[] b = new double[N]; 158 double[] x = new double[N]; 159 double[] y = new double[N]; 160 double[] z = new double[N]; 161 162 for (int i = 0; i < N; i++) { 163 double r1, r2, r3, r4; 164 alglib.hqrndnormal2(randState, out r1, out r2); 165 alglib.hqrndnormal2(randState, out r3, out r4); 166 167 var r_ab = new double[] { r1, r2 }; 168 var r_xy = new double[] { r3, r4 }; 169 double[] s_ab = new double[2]; 170 double[] s_xy = new double[2]; 171 alglib.ablas.rmatrixmv(2, 2, cov_ab, 0, 0, 0, r_ab, 0, ref s_ab, 0); 172 alglib.ablas.rmatrixmv(2, 2, cov_xy, 0, 0, 0, r_xy, 0, ref s_xy, 0); 173 174 a[i] = s_ab[0] + m0; 175 b[i] = s_ab[1] + m1; 176 x[i] = s_xy[0] + m0; // use same mean (just for the sake of it) 177 y[i] = s_xy[1] + m1; 178 179 z[i] = a[i] * b[i]; 180 } 181 182 Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); 183 Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); 184 185 /* we might see correlations when only using one of the two relevant factors. 186 * this depends on the distribution / location of a and b 187 // for zero-mean distributions the following should all be quasi-zero 188 Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05); 189 Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05); 190 Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05); 191 Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05); 192 */ 193 Console.WriteLine("a,b: {0:N3}\tx,y: {1:N3}\ta,x: {2:N3}\tb,x: {3:N3}\ta,y: {4:N3}\tb,y: {5:N3}\tcov(a,b): {6:N3}", 194 Heuristics.CorrelationForInteraction(a, b, z), 195 Heuristics.CorrelationForInteraction(x, y, z), 196 Heuristics.CorrelationForInteraction(a, x, z), 197 Heuristics.CorrelationForInteraction(b, x, z), 198 Heuristics.CorrelationForInteraction(a, y, z), 199 Heuristics.CorrelationForInteraction(b, y, z), 200 alglib.cov2(a, b) 201 ); 202 } 203 } 204 } 205 [TestMethod] 206 [TestCategory("Algorithms.DataAnalysis")] 207 [TestProperty("Time", "short")] 208 public void TestGeneral3dInteractions() { 209 { 210 // same as TestGeneral2dInteractions but for terms with three variables 211 212 var conditionNumber = 100; 213 for (int iter = 0; iter < 100; iter++) { 214 double m0 = 0.0; double m1 = 0.0; double m2 = 0.0; 215 alglib.hqrndstate randState; 216 alglib.hqrndseed(1234, 31415, out randState); 217 218 // uncomment if non-zero mean distributions should be tested 219 //alglib.hqrndnormal2(randState, out m0, out m1); 220 //alglib.hqrndnormal2(randState, out m1, out m2); 221 222 double[,] cov_abc = new double[3, 3]; 223 double[,] cov_xyz = new double[3, 3]; 224 alglib.matgen.spdmatrixrndcond(3, conditionNumber, ref cov_abc); 225 alglib.spdmatrixcholesky(ref cov_abc, 3, true); 226 227 alglib.matgen.spdmatrixrndcond(3, conditionNumber, ref cov_xyz); 228 alglib.spdmatrixcholesky(ref cov_xyz, 3, true); 229 230 int N = 1000; // large sample size to make sure the test thresholds hold 231 double[] a = new double[N]; 232 double[] b = new double[N]; 233 double[] c = new double[N]; 234 double[] x = new double[N]; 235 double[] y = new double[N]; 236 double[] z = new double[N]; 237 double[] t = new double[N]; 238 239 for (int i = 0; i < N; i++) { 240 double r1, r2, r3, r4, r5, r6; 241 alglib.hqrndnormal2(randState, out r1, out r2); 242 alglib.hqrndnormal2(randState, out r3, out r4); 243 alglib.hqrndnormal2(randState, out r5, out r6); 244 245 var r_abc = new double[] { r1, r2, r3 }; 246 var r_xyz = new double[] { r4, r5, r6 }; 247 double[] s_abc = new double[3]; 248 double[] s_xyz = new double[3]; 249 alglib.ablas.rmatrixmv(3, 3, cov_abc, 0, 0, 0, r_abc, 0, ref s_abc, 0); 250 alglib.ablas.rmatrixmv(3, 3, cov_xyz, 0, 0, 0, r_xyz, 0, ref s_xyz, 0); 251 252 a[i] = s_abc[0] + m0; 253 b[i] = s_abc[1] + m1; 254 c[i] = s_abc[2] + m2; 255 x[i] = s_xyz[0] + m0; // use same mean (just for the sake of it) 256 y[i] = s_xyz[1] + m1; 257 z[i] = s_xyz[2] + m2; 258 259 t[i] = a[i] * b[i] * c[i]; 260 } 261 262 Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, c, z) > 0.05); 263 Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z, z) < 0.05); 264 265 /* we might see correlations when only using one of the two relevant factors. 266 * this depends on the distribution / location of a and b 267 // for zero-mean distributions the following should all be quasi-zero 268 Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05); 269 Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05); 270 Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05); 271 Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05); 272 */ 273 Console.WriteLine("a,b: {0:N3}\tx,y: {1:N3}\ta,x: {2:N3}\tb,x: {3:N3}\ta,y: {4:N3}\tb,y: {5:N3}\tcov(a,b): {6:N3}", 274 Heuristics.CorrelationForInteraction(a, b, z), 275 Heuristics.CorrelationForInteraction(x, y, z), 276 Heuristics.CorrelationForInteraction(a, x, z), 277 Heuristics.CorrelationForInteraction(b, x, z), 278 Heuristics.CorrelationForInteraction(a, y, z), 279 Heuristics.CorrelationForInteraction(b, y, z), 280 alglib.cov2(a, b) 281 ); 282 } 118 283 } 119 284 } -
branches/MCTS-SymbReg-2796/Tests/Test.csproj
r15416 r15426 36 36 </PropertyGroup> 37 37 <ItemGroup> 38 <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 39 <SpecificVersion>False</SpecificVersion> 40 <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath> 41 </Reference> 38 42 <Reference Include="HeuristicLab.Algorithms.DataAnalysis-3.4"> 39 43 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Algorithms.DataAnalysis-3.4.dll</HintPath>
Note: See TracChangeset
for help on using the changeset viewer.