Changeset 16155
 Timestamp:
 09/18/18 11:28:37 (13 months ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs
r16154 r16155 117 117 private const string NumericIntegrationStepsParameterName = "Steps for numeric integration"; 118 118 private const string TrainingEpisodesParameterName = "Training episodes"; 119 private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes"; 119 120 #endregion 120 121 … … 145 146 public IValueParameter<ItemList<IntRange>> TrainingEpisodesParameter { 146 147 get { return (IValueParameter<ItemList<IntRange>>)Parameters[TrainingEpisodesParameterName]; } 148 } 149 public IFixedValueParameter<BoolValue> OptimizeParametersForEpisodesParameter { 150 get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersForEpisodesParameterName]; } 147 151 } 148 152 #endregion … … 177 181 public IEnumerable<IntRange> TrainingEpisodes { 178 182 get { return TrainingEpisodesParameter.Value; } 183 } 184 public bool OptimizeParametersForEpisodes { 185 get { return OptimizeParametersForEpisodesParameter.Value.Value; } 179 186 } 180 187 … … 193 200 [StorableHook(HookType.AfterDeserialization)] 194 201 private void AfterDeserialization() { 202 if(!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) { 203 Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false))); 204 } 195 205 RegisterEventHandlers(); 196 206 } … … 216 226 Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10))); 217 227 Parameters.Add(new ValueParameter<ItemList<IntRange>>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList<IntRange>())); 218 228 Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false))); 219 229 RegisterEventHandlers(); 220 230 InitAllParameters(); … … 225 235 } 226 236 227 228 237 public override double Evaluate(Individual individual, IRandom random) { 229 238 var trees = individual.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual 230 239 240 if(OptimizeParametersForEpisodes) { 241 int eIdx = 0; 242 double totalNMSE = 0.0; 243 int totalSize = 0; 244 foreach(var episode in TrainingEpisodes) { 245 double[] optTheta; 246 double nmse; 247 OptimizeForEpisodes(trees, random, new[] { episode }, out optTheta, out nmse); 248 individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method 249 eIdx++; 250 totalNMSE += nmse * episode.Size; 251 totalSize += episode.Size; 252 } 253 return totalNMSE / totalSize; 254 } else { 255 double[] optTheta; 256 double nmse; 257 OptimizeForEpisodes(trees, random, TrainingEpisodes, out optTheta, out nmse); 258 individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method 259 return nmse; 260 } 261 } 262 263 private void OptimizeForEpisodes(ISymbolicExpressionTree[] trees, IRandom random, IEnumerable<IntRange> episodes, out double[] optTheta, out double nmse) { 264 var rows = episodes.SelectMany(e => Enumerable.Range(e.Start, e.End  e.Start)).ToArray(); 231 265 var problemData = ProblemData; 232 var rows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End  e.Start)).ToArray();233 266 var targetVars = TargetVariables.CheckedItems.Select(i => i.Value).ToArray(); 234 267 var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees … … 256 289 var theta = nodeIdx.Select(_ => random.NextDouble() * 2.0  1.0).ToArray(); // init params randomly from Unif(1,1) 257 290 258 double[]optTheta = new double[0];291 optTheta = new double[0]; 259 292 if(theta.Length > 0) { 260 293 alglib.minlbfgsstate state; … … 263 296 alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, MaximumParameterOptimizationIterations); 264 297 alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null, 265 new object[] { trees, targetVars, problemData, nodeIdx, targetValues, TrainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables }); //TODO: create a type298 new object[] { trees, targetVars, problemData, nodeIdx, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables }); //TODO: create a type 266 299 alglib.minlbfgsresults(state, out optTheta, out report); 267 300 … … 292 325 * NFEV countains number of function calculations 293 326 */ 294 if(report.terminationtype < 0) return double.MaxValue;327 if(report.terminationtype < 0) { nmse = 10E6; return; } 295 328 } 296 329 297 330 // perform evaluation for optimal theta to get quality value 298 331 double[] grad = new double[optTheta.Length]; 299 double optQuality = double.NaN; 300 EvaluateObjectiveAndGradient(optTheta, ref optQuality, grad, 301 new object[] { trees, targetVars, problemData, nodeIdx, targetValues, TrainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables }); 302 if(double.IsNaN(optQuality)  double.IsInfinity(optQuality)) return 10E6; // return a large value (TODO: be consistent by using NMSE) 303 304 individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method 305 return optQuality; 332 nmse = double.NaN; 333 EvaluateObjectiveAndGradient(optTheta, ref nmse, grad, 334 new object[] { trees, targetVars, problemData, nodeIdx, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables }); 335 if(double.IsNaN(nmse)  double.IsInfinity(nmse)) { nmse = 10E6; return; } // return a large value (TODO: be consistent by using NMSE) 306 336 } 307 337 … … 371 401 } 372 402 403 var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities); 404 var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual 405 373 406 // TODO extract common functionality from Evaluate and Analyze 374 var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);375 var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate376 var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual377 407 var nodeIdx = new Dictionary<ISymbolicExpressionTreeNode, int>(); 378 379 380 408 foreach(var tree in trees) { 381 409 foreach(var node in tree.Root.IterateNodesPrefix().Where(n => IsConstantNode(n))) { … … 388 416 389 417 var trainingList = new ItemList<DataTable>(); 390 var trainingPrediction = Integrate( 391 trees, // we assume trees contain expressions for the change of each target variable over time y'(t) 392 problemData.Dataset, 393 problemData.AllowedInputVariables.ToArray(), 394 targetVars, 395 latentVariables, 396 TrainingEpisodes, 397 nodeIdx, 398 optTheta, 399 NumericIntegrationSteps).ToArray(); 400 401 // only for actual target values 402 var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End  e.Start)); 403 for(int colIdx = 0; colIdx < targetVars.Length; colIdx++) { 404 var targetVar = targetVars[colIdx]; 405 var trainingDataTable = new DataTable(targetVar + " prediction (training)"); 406 var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows)); 407 var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray()); 408 trainingDataTable.Rows.Add(actualValuesRow); 409 trainingDataTable.Rows.Add(predictedValuesRow); 410 trainingList.Add(trainingDataTable); 411 } 412 413 // TODO: DRY for training and test 414 var testList = new ItemList<DataTable>(); 415 var testRows = ProblemData.TestIndices.ToArray(); 416 var testPrediction = Integrate( 417 trees, // we assume trees contain expressions for the change of each target variable over time y'(t) 418 problemData.Dataset, 419 problemData.AllowedInputVariables.ToArray(), 420 targetVars, 421 latentVariables, 422 new IntRange[] { ProblemData.TestPartition }, 423 nodeIdx, 424 optTheta, 425 NumericIntegrationSteps).ToArray(); 426 427 for(int colIdx = 0; colIdx < targetVars.Length; colIdx++) { 428 var targetVar = targetVars[colIdx]; 429 var testDataTable = new DataTable(targetVar + " prediction (test)"); 430 var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows)); 431 var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray()); 432 testDataTable.Rows.Add(actualValuesRow); 433 testDataTable.Rows.Add(predictedValuesRow); 434 testList.Add(testDataTable); 435 } 436 437 results["Prediction (training)"].Value = trainingList.AsReadOnly(); 438 results["Prediction (test)"].Value = testList.AsReadOnly(); 439 440 #region simplification of models 441 // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal 442 var models = new VariableCollection(); // to store target var names and original version of tree 443 444 foreach(var tup in targetVars.Zip(trees, Tuple.Create)) { 445 var targetVarName = tup.Item1; 446 var tree = tup.Item2; 447 448 // when we reference HeuristicLab.Problems.DataAnalysis.Symbolic we can translate symbols 449 int nextParIdx = 0; 450 var shownTree = new SymbolicExpressionTree(TranslateTreeNode(tree.Root, optTheta, ref nextParIdx)); 451 452 // var shownTree = (SymbolicExpressionTree)tree.Clone(); 453 // var constantsNodeOrig = tree.IterateNodesPrefix().Where(IsConstantNode); 454 // var constantsNodeShown = shownTree.IterateNodesPrefix().Where(IsConstantNode); 455 // 456 // foreach (var n in constantsNodeOrig.Zip(constantsNodeShown, (original, shown) => new { original, shown })) { 457 // double constantsVal = optTheta[nodeIdx[n.original]]; 458 // 459 // ConstantTreeNode replacementNode = new ConstantTreeNode(new Constant()) { Value = constantsVal }; 460 // 461 // var parentNode = n.shown.Parent; 462 // int replacementIndex = parentNode.IndexOfSubtree(n.shown); 463 // parentNode.RemoveSubtree(replacementIndex); 464 // parentNode.InsertSubtree(replacementIndex, replacementNode); 465 // } 466 467 var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)"); 468 origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone(); 469 models.Add(origTreeVar); 470 var simplifiedTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(simplified)"); 471 simplifiedTreeVar.Value = TreeSimplifier.Simplify(shownTree); 472 models.Add(simplifiedTreeVar); 473 474 } 475 results["Models"].Value = models; 476 #endregion 418 419 if(OptimizeParametersForEpisodes) { 420 var eIdx = 0; 421 var trainingPredictions = new List<Tuple<double, Vector>[][]>(); 422 foreach(var episode in TrainingEpisodes) { 423 var episodes = new[] { episode }; 424 var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta_" + eIdx]).ToArray(); // see evaluate 425 var trainingPrediction = Integrate( 426 trees, // we assume trees contain expressions for the change of each target variable over time y'(t) 427 problemData.Dataset, 428 problemData.AllowedInputVariables.ToArray(), 429 targetVars, 430 latentVariables, 431 episodes, 432 nodeIdx, 433 optTheta, 434 NumericIntegrationSteps).ToArray(); 435 trainingPredictions.Add(trainingPrediction); 436 eIdx++; 437 } 438 439 // only for actual target values 440 var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End  e.Start)); 441 for(int colIdx = 0; colIdx < targetVars.Length; colIdx++) { 442 var targetVar = targetVars[colIdx]; 443 var trainingDataTable = new DataTable(targetVar + " prediction (training)"); 444 var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows)); 445 var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray()); 446 trainingDataTable.Rows.Add(actualValuesRow); 447 trainingDataTable.Rows.Add(predictedValuesRow); 448 trainingList.Add(trainingDataTable); 449 } 450 results["Prediction (training)"].Value = trainingList.AsReadOnly(); 451 452 453 var models = new VariableCollection(); 454 455 foreach(var tup in targetVars.Zip(trees, Tuple.Create)) { 456 var targetVarName = tup.Item1; 457 var tree = tup.Item2; 458 459 var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)"); 460 origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone(); 461 models.Add(origTreeVar); 462 } 463 results["Models"].Value = models; 464 } else { 465 var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate 466 var trainingPrediction = Integrate( 467 trees, // we assume trees contain expressions for the change of each target variable over time y'(t) 468 problemData.Dataset, 469 problemData.AllowedInputVariables.ToArray(), 470 targetVars, 471 latentVariables, 472 TrainingEpisodes, 473 nodeIdx, 474 optTheta, 475 NumericIntegrationSteps).ToArray(); 476 // only for actual target values 477 var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End  e.Start)); 478 for(int colIdx = 0; colIdx < targetVars.Length; colIdx++) { 479 var targetVar = targetVars[colIdx]; 480 var trainingDataTable = new DataTable(targetVar + " prediction (training)"); 481 var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows)); 482 var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray()); 483 trainingDataTable.Rows.Add(actualValuesRow); 484 trainingDataTable.Rows.Add(predictedValuesRow); 485 trainingList.Add(trainingDataTable); 486 } 487 // TODO: DRY for training and test 488 var testList = new ItemList<DataTable>(); 489 var testRows = ProblemData.TestIndices.ToArray(); 490 var testPrediction = Integrate( 491 trees, // we assume trees contain expressions for the change of each target variable over time y'(t) 492 problemData.Dataset, 493 problemData.AllowedInputVariables.ToArray(), 494 targetVars, 495 latentVariables, 496 new IntRange[] { ProblemData.TestPartition }, 497 nodeIdx, 498 optTheta, 499 NumericIntegrationSteps).ToArray(); 500 501 for(int colIdx = 0; colIdx < targetVars.Length; colIdx++) { 502 var targetVar = targetVars[colIdx]; 503 var testDataTable = new DataTable(targetVar + " prediction (test)"); 504 var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows)); 505 var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray()); 506 testDataTable.Rows.Add(actualValuesRow); 507 testDataTable.Rows.Add(predictedValuesRow); 508 testList.Add(testDataTable); 509 } 510 511 results["Prediction (training)"].Value = trainingList.AsReadOnly(); 512 results["Prediction (test)"].Value = testList.AsReadOnly(); 513 #region simplification of models 514 // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal 515 var models = new VariableCollection(); // to store target var names and original version of tree 516 517 foreach(var tup in targetVars.Zip(trees, Tuple.Create)) { 518 var targetVarName = tup.Item1; 519 var tree = tup.Item2; 520 521 // when we reference HeuristicLab.Problems.DataAnalysis.Symbolic we can translate symbols 522 int nextParIdx = 0; 523 var shownTree = new SymbolicExpressionTree(TranslateTreeNode(tree.Root, optTheta, ref nextParIdx)); 524 525 // var shownTree = (SymbolicExpressionTree)tree.Clone(); 526 // var constantsNodeOrig = tree.IterateNodesPrefix().Where(IsConstantNode); 527 // var constantsNodeShown = shownTree.IterateNodesPrefix().Where(IsConstantNode); 528 // 529 // foreach (var n in constantsNodeOrig.Zip(constantsNodeShown, (original, shown) => new { original, shown })) { 530 // double constantsVal = optTheta[nodeIdx[n.original]]; 531 // 532 // ConstantTreeNode replacementNode = new ConstantTreeNode(new Constant()) { Value = constantsVal }; 533 // 534 // var parentNode = n.shown.Parent; 535 // int replacementIndex = parentNode.IndexOfSubtree(n.shown); 536 // parentNode.RemoveSubtree(replacementIndex); 537 // parentNode.InsertSubtree(replacementIndex, replacementNode); 538 // } 539 540 var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)"); 541 origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone(); 542 models.Add(origTreeVar); 543 var simplifiedTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(simplified)"); 544 simplifiedTreeVar.Value = TreeSimplifier.Simplify(shownTree); 545 models.Add(simplifiedTreeVar); 546 547 } 548 results["Models"].Value = models; 549 #endregion 550 } 477 551 } 478 552
Note: See TracChangeset
for help on using the changeset viewer.