Changeset 16601 for branches/2925_AutoDiffForDynamicalModels
- Timestamp:
- 02/13/19 10:14:03 (6 years ago)
- Location:
- branches/2925_AutoDiffForDynamicalModels
- Files:
-
- 1 added
- 2 deleted
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2925_AutoDiffForDynamicalModels/AutoDiffForDynamicalModelsTest/AutoDiffForDynamicalModelsTest.csproj
r16251 r16601 78 78 <Compile Include="TestCvodes.cs" /> 79 79 <Compile Include="Properties\AssemblyInfo.cs" /> 80 <Compile Include="Test GA.cs" />80 <Compile Include="TestOdeIdentification.cs" /> 81 81 </ItemGroup> 82 82 <ItemGroup> 83 <None Include="Genetic Algorithm %28GA%29.hl">84 <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>85 </None>86 83 <None Include="packages.config" /> 87 84 </ItemGroup> 88 85 <ItemGroup> 86 <ProjectReference Include="..\HeuristicLab.Algorithms.DataAnalysis\3.4\HeuristicLab.Algorithms.DataAnalysis-3.4.csproj"> 87 <Project>{2E782078-FA81-4B70-B56F-74CE38DAC6C8}</Project> 88 <Name>HeuristicLab.Algorithms.DataAnalysis-3.4</Name> 89 </ProjectReference> 89 90 <ProjectReference Include="..\HeuristicLab.Algorithms.GeneticAlgorithm\3.3\HeuristicLab.Algorithms.GeneticAlgorithm-3.3.csproj"> 90 91 <Project>{A51DA44F-CB35-4F6F-99F5-2A2E904AB93B}</Project> 91 92 <Name>HeuristicLab.Algorithms.GeneticAlgorithm-3.3</Name> 93 </ProjectReference> 94 <ProjectReference Include="..\HeuristicLab.Collections\3.3\HeuristicLab.Collections-3.3.csproj"> 95 <Project>{958B43BC-CC5C-4FA2-8628-2B3B01D890B6}</Project> 96 <Name>HeuristicLab.Collections-3.3</Name> 92 97 </ProjectReference> 93 98 <ProjectReference Include="..\HeuristicLab.Common\3.3\HeuristicLab.Common-3.3.csproj"> … … 99 104 <Name>HeuristicLab.Core-3.3</Name> 100 105 </ProjectReference> 106 <ProjectReference Include="..\HeuristicLab.Data\3.3\HeuristicLab.Data-3.3.csproj"> 107 <Project>{BBAB9DF5-5EF3-4BA8-ADE9-B36E82114937}</Project> 108 <Name>HeuristicLab.Data-3.3</Name> 109 </ProjectReference> 101 110 <ProjectReference Include="..\HeuristicLab.Optimization\3.3\HeuristicLab.Optimization-3.3.csproj"> 102 111 <Project>{14AB8D24-25BC-400C-A846-4627AA945192}</Project> … … 106 115 <Project>{edcc8202-4463-4122-b01f-21b664343dfb}</Project> 107 116 <Name>HeuristicLab.Problems.DynamicalSystemsModelling-3.3</Name> 117 </ProjectReference> 118 <ProjectReference Include="..\HeuristicLab.Problems.Instances.DataAnalysis\3.3\HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj"> 119 <Project>{94c7714e-29d4-4d6d-b213-2c18d627ab75}</Project> 120 <Name>HeuristicLab.Problems.Instances.DataAnalysis-3.3</Name> 108 121 </ProjectReference> 109 122 </ItemGroup> -
branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs
r16600 r16601 39 39 40 40 namespace HeuristicLab.Problems.DynamicalSystemsModelling { 41 // Eine weitere Möglichkeit ist spline-smoothing der Daten (über Zeit) um damit für jede Zielvariable42 // einen bereinigten (Rauschen) Wert und die Ableitung dy/dt für alle Beobachtungen zu bekommen43 // danach kann man die Regression direkt für dy/dt anwenden (mit bereinigten y als inputs)44 41 [Item("Dynamical Systems Modelling Problem", "TODO")] 45 42 [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)] … … 179 176 Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false))); 180 177 181 var solversStr = new string[] { "HeuristicLab" , "CVODES"};178 var solversStr = new string[] { "HeuristicLab" /* , "CVODES" */}; 182 179 var solvers = new ItemSet<StringValue>( 183 180 solversStr.Select(s => new StringValue(s).AsReadOnly()) … … 188 185 InitAllParameters(); 189 186 190 // TODO: do not clear selection of target variables when the input variables are changed (keep selected target variables)191 187 // TODO: UI hangs when selecting / deselecting input variables because the encoding is updated on each item 192 188 // TODO: use training range as default training episode 193 189 // TODO: write back optimized parameters to solution? 194 190 // TODO: optimization of starting values for latent variables in CVODES solver 191 // TODO: allow to specify the name for the time variable in the dataset and allow variable step-sizes 192 // TODO: check grammars (input variables) after cloning 195 193 196 194 } … … 201 199 // retreive optimized parameters from nodes? 202 200 203 var problemData = Standardize(ProblemData);201 var problemData = ProblemData; 204 202 var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray(); 205 203 var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees 206 204 if (OptimizeParametersForEpisodes) { 205 throw new NotImplementedException(); 207 206 int eIdx = 0; 208 207 double totalNMSE = 0.0; … … 225 224 return nmse; 226 225 } 227 }228 229 private IRegressionProblemData Standardize(IRegressionProblemData problemData) {230 // var standardizedDataset = new Dataset(231 // problemData.Dataset.DoubleVariables,232 // problemData.Dataset.DoubleVariables.Select(v => Standardize(problemData.Dataset.GetReadOnlyDoubleValues(v)).ToList()));233 // return new RegressionProblemData(standardizedDataset, problemData.AllowedInputVariables, problemData.TargetVariable);234 return new RegressionProblemData(problemData.Dataset, problemData.AllowedInputVariables, problemData.TargetVariable);235 226 } 236 227 … … 247 238 out double[] optTheta, 248 239 out double nmse) { 249 var rows = episodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)).ToArray(); 250 var targetValues = new double[rows.Length, targetVars.Length]; 251 252 253 // collect values of all target variables 254 var colIdx = 0; 255 foreach (var targetVar in targetVars) { 256 int rowIdx = 0; 257 foreach (var value in problemData.Dataset.GetDoubleValues(targetVar, rows)) { 258 targetValues[rowIdx, colIdx] = value; 259 rowIdx++; 260 } 261 colIdx++; 262 } 263 240 241 242 // optimize parameters by fitting f(x,y) to calculated differences dy/dt(t) 243 nmse = PreTuneParameters(trees, problemData, targetVars, latentVariables, random, episodes, maxParameterOptIterations, out optTheta); 244 245 // optimize parameters using integration of f(x,y) to calculate y(t) 246 nmse = OptimizeParameters(trees, problemData, targetVars, latentVariables, episodes, maxParameterOptIterations, optTheta, numericIntegrationSteps, odeSolver, out optTheta); 247 248 if (double.IsNaN(nmse) || double.IsInfinity(nmse)) nmse = 100 * trees.Length * episodes.Sum(ep => ep.Size); 249 } 250 251 private static double PreTuneParameters( 252 ISymbolicExpressionTree[] trees, 253 IRegressionProblemData problemData, 254 string[] targetVars, 255 string[] latentVariables, 256 IRandom random, 257 IEnumerable<IntRange> episodes, 258 int maxParameterOptIterations, 259 out double[] optTheta) { 260 var thetas = new List<double>(); 261 double nmse = 0.0; 264 262 // NOTE: the order of values in parameter matches prefix order of constant nodes in trees 265 var paramNodes = new List<ISymbolicExpressionTreeNode>(); 266 foreach (var t in trees) { 267 foreach (var n in t.IterateNodesPrefix()) { 268 if (IsConstantNode(n)) 269 paramNodes.Add(n); 270 } 271 } 272 // init params randomly 273 // theta contains parameter values for trees and then the initial values for latent variables (a separate vector for each episode) 274 // inital values for latent variables are also optimized 275 var theta = new double[paramNodes.Count + latentVariables.Length * episodes.Count()]; 276 for (int i = 0; i < theta.Length; i++) 277 theta[i] = random.NextDouble() * 2.0e-1 - 1.0e-1; 278 279 optTheta = new double[0]; 280 if (theta.Length > 0) { 281 alglib.minlmstate state; 282 alglib.minlmreport report; 283 alglib.minlmcreatevj(targetValues.Length, theta, out state); 284 alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations); 285 alglib.minlmsetgradientcheck(state, 1.0e-3); 286 var myState = new OptimizationData(trees, targetVars, problemData, targetValues, episodes.ToArray(), numericIntegrationSteps, latentVariables, odeSolver); 287 alglib.minlmoptimize(state, EvaluateObjectiveVector, EvaluateObjectiveVectorAndJacobian, null, myState); 288 289 alglib.minlmresults(state, out optTheta, out report); 290 291 /************************************************************************* 292 Levenberg-Marquardt algorithm results 293 294 INPUT PARAMETERS: 295 State - algorithm state 296 297 OUTPUT PARAMETERS: 298 X - array[0..N-1], solution 299 Rep - optimization report; includes termination codes and 300 additional information. Termination codes are listed below, 301 see comments for this structure for more info. 302 Termination code is stored in rep.terminationtype field: 303 * -8 optimizer detected NAN/INF values either in the 304 function itself, or in its Jacobian 305 * -7 derivative correctness check failed; 306 see rep.funcidx, rep.varidx for 307 more information. 308 * -3 constraints are inconsistent 309 * 2 relative step is no more than EpsX. 310 * 5 MaxIts steps was taken 311 * 7 stopping conditions are too stringent, 312 further improvement is impossible 313 * 8 terminated by user who called minlmrequesttermination(). 314 X contains point which was "current accepted" when 315 termination request was submitted. 316 317 -- ALGLIB -- 318 Copyright 10.03.2009 by Bochkanov Sergey 319 *************************************************************************/ 320 if (report.terminationtype < 0) { nmse = 10.0; return; } 321 322 nmse = state.f; //TODO check 323 324 // var myState = new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), numericIntegrationSteps, latentVariables, odeSolver }; 325 // EvaluateObjectiveVector(optTheta, ref nmse, grad,myState); 326 if (double.IsNaN(nmse) || double.IsInfinity(nmse)) { nmse = 10.0; return; } // return a large value (TODO: be consistent by using NMSE) 327 } else { 328 // no parameters 329 nmse = targetValues.Length; 330 } 331 332 } 333 334 // private static IEnumerable<double> Standardize(IEnumerable<double> xs) { 335 // var m = xs.Average(); 336 // var s = xs.StandardDeviationPop(); 337 // return xs.Select(xi => (xi - m) / s); 338 // } 339 340 alglib.ndimensional_fvec fvec; 341 alglib.ndimensional_jac jac; 342 263 for (int treeIdx = 0; treeIdx < trees.Length; treeIdx++) { 264 var t = trees[treeIdx]; 265 266 // TODO: calculation of differences for multiple episodes 267 var rowsForDataExtraction = episodes.SelectMany(e => Enumerable.Range(e.Start, e.End + 1 - e.Start)).ToArray(); 268 var targetValues = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], rowsForDataExtraction).ToArray(); 269 var targetValuesDiff = targetValues.Skip(1).Zip(targetValues, (t1, t0) => t1 - t0).ToArray();// TODO: smoothing or multi-pole 270 271 var myState = new OptimizationData(new[] { t }, targetVars, problemData, new[] { targetValuesDiff }, episodes.ToArray(), -99, latentVariables, string.Empty); // TODO 272 var paramCount = myState.nodeValueLookup.ParameterCount; 273 274 // init params randomly 275 // theta contains parameter values for trees and then the initial values for latent variables (a separate vector for each episode) 276 // inital values for latent variables are also optimized 277 var theta = new double[paramCount + latentVariables.Length * episodes.Count()]; 278 for (int i = 0; i < theta.Length; i++) 279 theta[i] = random.NextDouble() * 2.0e-1 - 1.0e-1; 280 281 optTheta = new double[0]; 282 if (theta.Length > 0) { 283 alglib.minlmstate state; 284 alglib.minlmreport report; 285 alglib.minlmcreatevj(targetValuesDiff.Length, theta, out state); 286 alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations); 287 // alglib.minlmsetgradientcheck(state, 1.0e-3); 288 alglib.minlmoptimize(state, EvaluateObjectiveVector, EvaluateObjectiveVectorAndJacobian, null, myState); 289 290 alglib.minlmresults(state, out optTheta, out report); 291 292 293 if (report.terminationtype < 0) { throw new InvalidOperationException("there was a problem in the optimizer"); } 294 295 thetas.AddRange(optTheta); 296 } 297 nmse += EvaluateMSE(optTheta, myState); 298 } // foreach tree 299 optTheta = thetas.ToArray(); 300 301 var maxNmse = 100 * trees.Length * episodes.Sum(ep => ep.Size); 302 if (double.IsNaN(nmse) || double.IsInfinity(nmse) || nmse > maxNmse) nmse = maxNmse; 303 return nmse; 304 } 305 306 307 // similar to above but this time we integrate and optimize all parameters for all targets concurrently 308 private static double OptimizeParameters(ISymbolicExpressionTree[] trees, IRegressionProblemData problemData, string[] targetVars, string[] latentVariables, 309 IEnumerable<IntRange> episodes, int maxParameterOptIterations, double[] initialTheta, int numericIntegrationSteps, string odeSolver, out double[] optTheta) { 310 var rowsForDataExtraction = episodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)).ToArray(); 311 var targetValues = new double[trees.Length][]; 312 for (int treeIdx = 0; treeIdx < trees.Length; treeIdx++) { 313 var t = trees[treeIdx]; 314 315 // TODO: calculation of differences for multiple episodes 316 targetValues[treeIdx] = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], rowsForDataExtraction).ToArray(); 317 } 318 319 var myState = new OptimizationData(trees, targetVars, problemData, targetValues, episodes.ToArray(), numericIntegrationSteps, latentVariables, odeSolver); 320 optTheta = initialTheta; 321 322 if (initialTheta.Length > 0) { 323 324 try { 325 alglib.minlmstate state; 326 alglib.minlmreport report; 327 alglib.minlmcreatevj(rowsForDataExtraction.Length * trees.Length, initialTheta, out state); 328 alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations); 329 // alglib.minlmsetgradientcheck(state, 1.0e-3); 330 alglib.minlmoptimize(state, IntegrateAndEvaluateObjectiveVector, IntegrateAndEvaluateObjectiveVectorAndJacobian, null, myState); 331 332 alglib.minlmresults(state, out optTheta, out report); 333 334 if (report.terminationtype < 0) { 335 // there was a problem: reset theta and evaluate for inital values 336 optTheta = initialTheta; 337 } 338 } catch (alglib.alglibexception) { 339 optTheta = initialTheta; 340 } 341 } 342 var nmse = EvaluateIntegratedMSE(optTheta, myState); 343 var maxNmse = 100 * targetValues.Length * rowsForDataExtraction.Length; 344 if (double.IsNaN(nmse) || double.IsInfinity(nmse) || nmse > maxNmse) nmse = maxNmse; 345 return nmse; 346 } 347 348 349 // helper 350 public static double EvaluateMSE(double[] x, OptimizationData optimizationData) { 351 var fi = new double[optimizationData.rows.Count()]; 352 EvaluateObjectiveVector(x, fi, optimizationData); 353 return fi.Sum(fii => fii * fii) / fi.Length; 354 } 343 355 public static void EvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { EvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib 344 356 public static void EvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) { 345 EvaluateObjectiveVectorAndJacobian(x, fi, null, optimizationData); 357 var rows = optimizationData.rows.ToArray(); 358 var problemData = optimizationData.problemData; 359 var nodeValueLookup = optimizationData.nodeValueLookup; 360 var ds = problemData.Dataset; 361 int outputIdx = 0; 362 363 nodeValueLookup.UpdateParamValues(x); 364 365 for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) { 366 // update variable values 367 foreach (var variable in problemData.AllowedInputVariables.Concat(optimizationData.targetVariables)) { 368 nodeValueLookup.SetVariableValue(variable, ds.GetDoubleValue(variable, rows[trainIdx])); 369 } 370 // interpret all trees 371 for (int treeIdx = 0; treeIdx < optimizationData.trees.Length; treeIdx++) { 372 var tree = optimizationData.trees[treeIdx]; 373 var pred = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValueLookup); 374 var y = optimizationData.targetValues[treeIdx][trainIdx]; 375 fi[outputIdx++] = y - pred; 376 } 377 } 346 378 } 347 379 348 380 public static void EvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, object optimizationData) { EvaluateObjectiveVectorAndJacobian(x, fi, jac, (OptimizationData)optimizationData); } // for alglib 349 381 public static void EvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, OptimizationData optimizationData) { 350 351 352 Tuple<double, Vector>[][] predicted = null; // one array of predictions for each episode 353 predicted = Integrate(optimizationData, x).ToArray(); 354 355 // clear all result data structures 356 for (int j = 0; j < fi.Length; j++) { 357 fi[j] = 10.0; 358 if (jac != null) Array.Clear(jac, 0, jac.Length); 359 } 360 361 if (predicted.Length != optimizationData.targetValues.GetLength(0)) { 362 return; 363 } 364 365 // for normalized MSE = 1/variance(t) * MSE(t, pred) 366 // TODO: Perf. (by standardization of target variables before evaluation of all trees) 367 // var invVar = Enumerable.Range(0, targetVariables.Length) 368 // .Select(c => Enumerable.Range(0, targetValues.GetLength(0)).Select(row => targetValues[row, c])) // column vectors 369 // .Select(vec => vec.StandardDeviation()) // TODO: variance of stddev 370 // .Select(v => 1.0 / v) 371 // .ToArray(); 372 373 // double[] invVar = Enumerable.Repeat(1.0, targetVariables.Length).ToArray(); 374 375 376 // objective function is NMSE 377 int n = predicted.Length; 378 double invN = 1.0 / n; 379 int i = 0; 380 int r = 0; 381 foreach (var y_pred in predicted) { 382 // y_pred contains the predicted values for target variables first and then predicted values for latent variables 383 for (int c = 0; c < optimizationData.targetVariables.Length; c++) { 384 385 var y_pred_f = y_pred[c].Item1; 386 var y = optimizationData.targetValues[r, c]; 387 388 fi[i] = (y - y_pred_f); 389 var g = y_pred[c].Item2; 390 if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[i, j] = -g[j]; 391 i++; // we put the errors for each target variable after each other 392 } 393 r++; 382 // extract variable values from dataset 383 var variableValues = new Dictionary<string, Tuple<double, Vector>>(); 384 var problemData = optimizationData.problemData; 385 var ds = problemData.Dataset; 386 var rows = optimizationData.episodes.SelectMany(ep => Enumerable.Range(ep.Start, ep.Size)).ToArray(); 387 388 var nodeValueLookup = optimizationData.nodeValueLookup; 389 nodeValueLookup.UpdateParamValues(x); 390 391 // TODO add integration of latent variables 392 int termIdx = 0; 393 394 for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) { 395 // update variable values 396 foreach (var variable in problemData.AllowedInputVariables.Concat(optimizationData.targetVariables)) { 397 nodeValueLookup.SetVariableValue(variable, ds.GetDoubleValue(variable, rows[trainIdx])); 398 } 399 400 var calculatedVariables = optimizationData.targetVariables; 401 402 var trees = optimizationData.trees; 403 for (int i = 0; i < trees.Length; i++) { 404 var tree = trees[i]; 405 var targetVarName = calculatedVariables[i]; 406 407 double f; Vector g; 408 InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValueLookup, out f, out g); 409 410 var y = optimizationData.targetValues[i][trainIdx]; 411 fi[termIdx] = y - f; 412 if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[termIdx, j] = -g[j]; 413 414 termIdx++; 415 } 416 } 417 418 } 419 420 // helper 421 public static double EvaluateIntegratedMSE(double[] x, OptimizationData optimizationData) { 422 var fi = new double[optimizationData.rows.Count() * optimizationData.targetVariables.Length]; 423 IntegrateAndEvaluateObjectiveVector(x, fi, optimizationData); 424 return fi.Sum(fii => fii * fii) / fi.Length; 425 } 426 public static void IntegrateAndEvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { IntegrateAndEvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib 427 public static void IntegrateAndEvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) { 428 IntegrateAndEvaluateObjectiveVectorAndJacobian(x, fi, null, optimizationData); 429 } 430 431 public static void IntegrateAndEvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, object optimizationData) { IntegrateAndEvaluateObjectiveVectorAndJacobian(x, fi, jac, (OptimizationData)optimizationData); } // for alglib 432 public static void IntegrateAndEvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, OptimizationData optimizationData) { 433 var rows = optimizationData.rows.ToArray(); 434 var problemData = optimizationData.problemData; 435 var nodeValueLookup = optimizationData.nodeValueLookup; 436 var ds = problemData.Dataset; 437 int outputIdx = 0; 438 439 var prediction = Integrate(optimizationData, x).ToArray(); 440 var trees = optimizationData.trees; 441 442 for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) { 443 var pred_i = prediction[Math.Min(trainIdx, prediction.Length - 1)]; // if we stopped earlier in the integration then just use the last generated value 444 445 for (int i = 0; i < trees.Length; i++) { 446 var tree = trees[i]; 447 var y = optimizationData.targetValues[i][trainIdx]; 448 var f = pred_i[i].Item1; 449 var g = pred_i[i].Item2; 450 fi[outputIdx] = y - f; 451 if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[outputIdx, j] = -g[j]; 452 453 outputIdx++; 454 } 394 455 } 395 456 } … … 422 483 results["SNMSE"].Value = new DoubleValue(bestIndividualAndQuality.Item2); 423 484 424 var problemData = Standardize(ProblemData);485 var problemData = ProblemData; 425 486 var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray(); 426 487 var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees … … 519 580 var res = (y - y_pred_f); 520 581 var ressq = res * res; 521 f_i += ressq * invN /* * Math.Exp(-0.2 * r) */;522 g_i = g_i - 2.0 * res * y_pred[colIdx].Item2 * invN /* * Math.Exp(-0.2 * r)*/;582 f_i += ressq * invN; 583 g_i = g_i - 2.0 * res * y_pred[colIdx].Item2 * invN; 523 584 } 524 585 seRow.Values.Add(f_i); … … 603 664 ref nextParIdx)); 604 665 605 // var shownTree = (SymbolicExpressionTree)tree.Clone();606 // var constantsNodeOrig = tree.IterateNodesPrefix().Where(IsConstantNode);607 // var constantsNodeShown = shownTree.IterateNodesPrefix().Where(IsConstantNode);608 //609 // foreach (var n in constantsNodeOrig.Zip(constantsNodeShown, (original, shown) => new { original, shown })) {610 // double constantsVal = optTheta[nodeIdx[n.original]];611 //612 // ConstantTreeNode replacementNode = new ConstantTreeNode(new Constant()) { Value = constantsVal };613 //614 // var parentNode = n.shown.Parent;615 // int replacementIndex = parentNode.IndexOfSubtree(n.shown);616 // parentNode.RemoveSubtree(replacementIndex);617 // parentNode.InsertSubtree(replacementIndex, replacementNode);618 // }619 666 620 667 var origTreeVar = new HeuristicLab.Core.Variable(varName + "(original)"); … … 655 702 var odeSolver = optimizationData.odeSolver; 656 703 var numericIntegrationSteps = optimizationData.numericIntegrationSteps; 704 var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding 705 706 707 var nodeValues = optimizationData.nodeValueLookup; 708 nodeValues.UpdateParamValues(parameterValues); 657 709 658 710 … … 660 712 var episodeIdx = 0; 661 713 foreach (var episode in optimizationData.episodes) { 662 var rows = Enumerable.Range(episode.Start, episode.End - episode.Start); 663 664 // integrate forward starting with known values for the target in t0 665 666 var variableValues = new Dictionary<string, Tuple<double, Vector>>(); 714 var rows = Enumerable.Range(episode.Start, episode.End - episode.Start).ToArray(); 715 667 716 var t0 = rows.First(); 668 foreach (var varName in inputVariables) { 669 variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero)); 670 } 671 foreach (var varName in targetVariables) { 672 variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero)); 673 } 674 // add value entries for latent variables which are also integrated 675 // initial values are at the end of the parameter vector 676 // separete initial values for each episode 677 var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length; 678 foreach (var latentVar in latentVariables) { 679 var arr = new double[parameterValues.Length]; // backing array 680 arr[initialValueIdx] = 1.0; 681 var g = new Vector(arr); 682 variableValues.Add(latentVar, 683 Tuple.Create(parameterValues[initialValueIdx], g)); // we don't have observations for latent variables therefore we optimize the initial value for each episode 684 initialValueIdx++; 685 } 686 687 var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding 717 718 // initialize values for inputs and targets from dataset 719 foreach (var varName in inputVariables.Concat(targetVariables)) { 720 nodeValues.SetVariableValue(varName, dataset.GetDoubleValue(varName, t0), Vector.Zero); 721 } 722 723 { // CODE BELOW MUST BE TESTED 724 if (latentVariables.Length > 0) throw new NotImplementedException(); 725 726 // add value entries for latent variables which are also integrated 727 // initial values are at the end of the parameter vector 728 // separate initial values for each episode 729 var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length; 730 foreach (var latentVar in latentVariables) { 731 var arr = new double[parameterValues.Length]; // backing array 732 arr[initialValueIdx] = 1.0; 733 var g = new Vector(arr); 734 nodeValues.SetVariableValue(latentVar, parameterValues[initialValueIdx], g); // we don't have observations for latent variables therefore we optimize the initial value for each episode 735 initialValueIdx++; 736 } 737 } 688 738 689 739 // return first value as stored in the dataset 690 740 yield return calculatedVariables 691 .Select(calcVarName => variableValues[calcVarName])741 .Select(calcVarName => nodeValues.GetVariableValue(calcVarName)) 692 742 .ToArray(); 693 743 694 var prevT = rows.First(); // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements.744 var prevT = t0; // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements. 695 745 foreach (var t in rows.Skip(1)) { 696 746 if (odeSolver == "HeuristicLab") 697 IntegrateHL(trees, calculatedVariables, variableValues, parameterValues, numericIntegrationSteps);747 IntegrateHL(trees, calculatedVariables, nodeValues, numericIntegrationSteps); // integrator updates nodeValues 698 748 else if (odeSolver == "CVODES") 699 749 throw new NotImplementedException(); … … 702 752 prevT = t; 703 753 704 // This check doesn't work with the HeuristicLab integrator if there are input variables 705 //if (variableValues.Count == targetVariables.Length) { 706 // only return the target variables for calculation of errors 754 707 755 var res = calculatedVariables 708 .Select(targetVar => variableValues[targetVar])756 .Select(targetVar => nodeValues.GetVariableValue(targetVar)) 709 757 .ToArray(); 758 759 // stop early if there are undefined values 710 760 if (res.Any(ri => double.IsNaN(ri.Item1) || double.IsInfinity(ri.Item1))) yield break; 711 761 yield return res; 712 //} else { 713 // yield break; // stop early on problems 714 //} 715 716 717 // update for next time step 762 763 // update for next time step (only the inputs) 718 764 foreach (var varName in inputVariables) { 719 variableValues[varName] = Tuple.Create(dataset.GetDoubleValue(varName, t), Vector.Zero);765 nodeValues.SetVariableValue(varName, dataset.GetDoubleValue(varName, t), Vector.Zero); 720 766 } 721 767 } … … 1026 1072 ISymbolicExpressionTree[] trees, 1027 1073 string[] calculatedVariables, // names of integrated variables 1028 Dictionary<string, Tuple<double, Vector>> variableValues, //y (intput and output) input: y(t0), output: y(t0+1) 1029 double[] parameterValues, 1074 // Dictionary<string, Tuple<double, Vector>> variableValues, //y (intput and output) input: y(t0), output: y(t0+1) 1075 NodeValueLookup nodeValues, 1076 // double[] parameterValues, 1030 1077 int numericIntegrationSteps) { 1031 1078 1032 var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();1033 1034 // the nodeValues for parameters are constant over time1035 // TODO: this needs to be done only once for each iteration in gradient descent (whenever parameter values change)1036 // NOTE: the order must be the same as above (prefix order for nodes)1037 int pIdx = 0;1038 foreach (var tree in trees) {1039 foreach (var node in tree.Root.IterateNodesPrefix()) {1040 if (IsConstantNode(node)) {1041 var gArr = new double[parameterValues.Length]; // backing array1042 gArr[pIdx] = 1.0;1043 var g = new Vector(gArr);1044 nodeValues.Add(node, new Tuple<double, Vector>(parameterValues[pIdx], g));1045 pIdx++;1046 } else if (node.SubtreeCount == 0) {1047 // for (latent) variables get the values from variableValues1048 var varName = node.Symbol.Name;1049 nodeValues.Add(node, variableValues[varName]);1050 }1051 }1052 }1053 1079 1054 1080 double[] deltaF = new double[calculatedVariables.Length]; … … 1057 1083 double h = 1.0 / numericIntegrationSteps; 1058 1084 for (int step = 0; step < numericIntegrationSteps; step++) { 1059 //var deltaValues = new Dictionary<string, Tuple<double, Vector>>(); 1085 1086 // evaluate all trees 1060 1087 for (int i = 0; i < trees.Length; i++) { 1061 1088 var tree = trees[i]; 1062 var targetVarName = calculatedVariables[i];1063 1089 1064 1090 // Root.GetSubtree(0).GetSubtree(0) skips programRoot and startSymbol … … 1072 1098 for (int i = 0; i < trees.Length; i++) { 1073 1099 var varName = calculatedVariables[i]; 1074 var oldVal = variableValues[varName]; 1075 var newVal = Tuple.Create( 1076 oldVal.Item1 + h * deltaF[i], 1077 oldVal.Item2 + deltaG[i].Scale(h) 1078 ); 1079 variableValues[varName] = newVal; 1080 } 1081 1082 // TODO perf 1083 foreach (var node in nodeValues.Keys.ToArray()) { 1084 if (node.SubtreeCount == 0 && !IsConstantNode(node)) { 1085 // update values for (latent) variables 1086 var varName = node.Symbol.Name; 1087 nodeValues[node] = variableValues[varName]; 1088 } 1089 } 1100 var oldVal = nodeValues.GetVariableValue(varName); 1101 nodeValues.SetVariableValue(varName, oldVal.Item1 + h * deltaF[i], oldVal.Item2 + deltaG[i].Scale(h)); 1102 } 1103 } 1104 } 1105 1106 private static double InterpretRec(ISymbolicExpressionTreeNode node, NodeValueLookup nodeValues) { 1107 switch (node.Symbol.Name) { 1108 case "+": { 1109 var f = InterpretRec(node.GetSubtree(0), nodeValues); 1110 var g = InterpretRec(node.GetSubtree(1), nodeValues); 1111 return f + g; 1112 } 1113 case "*": { 1114 var f = InterpretRec(node.GetSubtree(0), nodeValues); 1115 var g = InterpretRec(node.GetSubtree(1), nodeValues); 1116 return f * g; 1117 } 1118 1119 case "-": { 1120 if (node.SubtreeCount == 1) { 1121 var f = InterpretRec(node.GetSubtree(0), nodeValues); 1122 return -f; 1123 } else { 1124 var f = InterpretRec(node.GetSubtree(0), nodeValues); 1125 var g = InterpretRec(node.GetSubtree(1), nodeValues); 1126 1127 return f - g; 1128 } 1129 } 1130 case "%": { 1131 var f = InterpretRec(node.GetSubtree(0), nodeValues); 1132 var g = InterpretRec(node.GetSubtree(1), nodeValues); 1133 1134 // protected division 1135 if (g.IsAlmost(0.0)) { 1136 return 0; 1137 } else { 1138 return f / g; 1139 } 1140 } 1141 case "sin": { 1142 var f = InterpretRec(node.GetSubtree(0), nodeValues); 1143 return Math.Sin(f); 1144 } 1145 case "cos": { 1146 var f = InterpretRec(node.GetSubtree(0), nodeValues); 1147 return Math.Cos(f); 1148 } 1149 case "sqr": { 1150 var f = InterpretRec(node.GetSubtree(0), nodeValues); 1151 return f * f; 1152 } 1153 default: { 1154 return nodeValues.NodeValue(node); 1155 } 1090 1156 } 1091 1157 } … … 1093 1159 private static void InterpretRec( 1094 1160 ISymbolicExpressionTreeNode node, 1095 Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>nodeValues, // contains value and gradient vector for a node (variables and constants only)1161 NodeValueLookup nodeValues, // contains value and gradient vector for a node (variables and constants only) 1096 1162 out double z, 1097 1163 out Vector dz … … 1162 1228 } 1163 1229 default: { 1164 var t = nodeValues[node]; 1165 z = t.Item1; 1166 dz = Vector.CreateNew(t.Item2); 1230 z = nodeValues.NodeValue(node); 1231 dz = Vector.CreateNew(nodeValues.NodeGradient(node)); 1167 1232 break; 1168 1233 } … … 1265 1330 return n.Symbol.Name[0] == 'θ'; 1266 1331 } 1332 private static double GetConstantValue(ISymbolicExpressionTreeNode n) { 1333 return 0.0; // TODO: needs to be updated when we write back values to the tree 1334 } 1267 1335 private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) { 1268 1336 return n.Symbol.Name[0] == 'λ'; … … 1270 1338 private static bool IsVariableNode(ISymbolicExpressionTreeNode n) { 1271 1339 return (n.SubtreeCount == 0) && !IsConstantNode(n) && !IsLatentVariableNode(n); 1340 } 1341 private static string GetVariableName(ISymbolicExpressionTreeNode n) { 1342 return n.Symbol.Name; 1272 1343 } 1273 1344 … … 1410 1481 #endregion 1411 1482 1483 1412 1484 #region Import & Export 1413 1485 public void Load(IRegressionProblemData data) { … … 1420 1492 return ProblemData; 1421 1493 } 1494 #endregion 1495 1496 1497 // TODO: for integration we only need a part of the data that we need for optimization 1422 1498 1423 1499 public class OptimizationData { … … 1425 1501 public readonly string[] targetVariables; 1426 1502 public readonly IRegressionProblemData problemData; 1427 public readonly double[ ,] targetValues;1503 public readonly double[][] targetValues; 1428 1504 public readonly IntRange[] episodes; 1429 1505 public readonly int numericIntegrationSteps; 1430 1506 public readonly string[] latentVariables; 1431 1507 public readonly string odeSolver; 1432 1433 public OptimizationData(ISymbolicExpressionTree[] trees, string[] targetVars, IRegressionProblemData problemData, double[,] targetValues, IntRange[] episodes, int numericIntegrationSteps, string[] latentVariables, string odeSolver) { 1508 public readonly NodeValueLookup nodeValueLookup; 1509 public IEnumerable<int> rows => episodes.SelectMany(ep => Enumerable.Range(ep.Start, ep.Size)); 1510 1511 public OptimizationData(ISymbolicExpressionTree[] trees, string[] targetVars, IRegressionProblemData problemData, 1512 double[][] targetValues, 1513 IntRange[] episodes, 1514 int numericIntegrationSteps, string[] latentVariables, string odeSolver) { 1515 if (episodes.Length > 1) throw new NotSupportedException("multiple episodes are not yet implemented"); 1434 1516 this.trees = trees; 1435 1517 this.targetVariables = targetVars; … … 1440 1522 this.latentVariables = latentVariables; 1441 1523 this.odeSolver = odeSolver; 1442 } 1443 } 1444 #endregion 1445 1524 this.nodeValueLookup = new NodeValueLookup(trees); 1525 } 1526 } 1527 1528 public class NodeValueLookup { 1529 private readonly Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>> node2val = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>(); 1530 private readonly Dictionary<string, List<ISymbolicExpressionTreeNode>> name2nodes = new Dictionary<string, List<ISymbolicExpressionTreeNode>>(); 1531 private readonly Dictionary<int, ISymbolicExpressionTreeNode> paramIdx2node = new Dictionary<int, ISymbolicExpressionTreeNode>(); 1532 1533 public double NodeValue(ISymbolicExpressionTreeNode node) => node2val[node].Item1; 1534 public Vector NodeGradient(ISymbolicExpressionTreeNode node) => node2val[node].Item2; 1535 1536 public NodeValueLookup(ISymbolicExpressionTree[] trees) { 1537 int paramIdx = 0; 1538 1539 var constantNodes = trees.SelectMany(t => t.IterateNodesPrefix().Where(IsConstantNode)).ToArray(); 1540 foreach (var n in constantNodes) { 1541 paramIdx2node[paramIdx] = n; 1542 SetParamValue(paramIdx, GetConstantValue(n), Vector.CreateIndicator(length: constantNodes.Length, idx: paramIdx)); 1543 paramIdx++; 1544 } 1545 1546 foreach (var tree in trees) { 1547 foreach (var node in tree.IterateNodesPrefix().Where(IsVariableNode)) { 1548 var varName = GetVariableName(node); 1549 if (!name2nodes.TryGetValue(varName, out List<ISymbolicExpressionTreeNode> nodes)) { 1550 nodes = new List<ISymbolicExpressionTreeNode>(); 1551 name2nodes.Add(varName, nodes); 1552 } 1553 nodes.Add(node); 1554 SetVariableValue(varName, 0.0); 1555 } 1556 } 1557 } 1558 1559 public int ParameterCount => paramIdx2node.Count; 1560 1561 1562 public void SetParamValue(int paramIdx, double val, Vector dVal) { 1563 node2val[paramIdx2node[paramIdx]] = Tuple.Create(val, dVal); 1564 } 1565 1566 public void SetVariableValue(string variableName, double val) { 1567 SetVariableValue(variableName, val, Vector.Zero); 1568 } 1569 public Tuple<double, Vector> GetVariableValue(string variableName) { 1570 return node2val[name2nodes[variableName].First()]; 1571 } 1572 public void SetVariableValue(string variableName, double val, Vector dVal) { 1573 if (name2nodes.TryGetValue(variableName, out List<ISymbolicExpressionTreeNode> nodes)) { 1574 nodes.ForEach(n => node2val[n] = Tuple.Create(val, dVal)); 1575 } else { 1576 var fakeNode = new SimpleSymbol(variableName, 0).CreateTreeNode(); 1577 var newNodeList = new List<ISymbolicExpressionTreeNode>(); 1578 newNodeList.Add(fakeNode); 1579 name2nodes.Add(variableName, newNodeList); 1580 node2val[fakeNode] = Tuple.Create(val, dVal); 1581 } 1582 } 1583 1584 internal void UpdateParamValues(double[] x) { 1585 Trace.Assert(x.Length == paramIdx2node.Count); 1586 for (int paramIdx = 0; paramIdx < x.Length; paramIdx++) { 1587 var n = paramIdx2node[paramIdx]; 1588 node2val[n] = Tuple.Create(x[paramIdx], node2val[n].Item2); // prevent allocation of new Vector 1589 } 1590 } 1591 } 1446 1592 } 1447 1593 } -
branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Vector.cs
r16600 r16601 1 1 using System; 2 2 using System.Diagnostics; 3 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 3 4 4 5 namespace HeuristicLab.Problems.DynamicalSystemsModelling { … … 158 159 return new Vector(arr); 159 160 } 161 162 internal static Vector CreateIndicator(int length, int idx) { 163 var gArr = new double[length]; // backing array 164 gArr[idx] = 1.0; 165 return new Vector(gArr); 166 } 160 167 } 161 168 }
Note: See TracChangeset
for help on using the changeset viewer.