Changeset 15970
- Timestamp:
- 06/22/18 09:47:35 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs
r15968 r15970 112 112 private const string MaximumLengthParameterName = "Size limit"; 113 113 private const string MaximumParameterOptimizationIterationsParameterName = "Max. parameter optimization iterations"; 114 private const string NumberOfLatentVariablesParameterName = "Number of latent variables"; 115 private const string NumericIntegrationStepsParameterName = "Steps for numeric integration"; 114 116 #endregion 115 117 … … 131 133 public IFixedValueParameter<IntValue> MaximumParameterOptimizationIterationsParameter { 132 134 get { return (IFixedValueParameter<IntValue>)Parameters[MaximumParameterOptimizationIterationsParameterName]; } 135 } 136 public IFixedValueParameter<IntValue> NumberOfLatentVariablesParameter { 137 get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfLatentVariablesParameterName]; } 138 } 139 public IFixedValueParameter<IntValue> NumericIntegrationStepsParameter { 140 get { return (IFixedValueParameter<IntValue>)Parameters[NumericIntegrationStepsParameterName]; } 133 141 } 134 142 #endregion … … 155 163 get { return MaximumParameterOptimizationIterationsParameter.Value.Value; } 156 164 } 165 public int NumberOfLatentVariables { 166 get { return NumberOfLatentVariablesParameter.Value.Value; } 167 } 168 public int NumericIntegrationSteps { 169 get { return NumericIntegrationStepsParameter.Value.Value; } 170 } 171 157 172 #endregion 158 173 … … 184 199 var targetVariables = new CheckedItemCollection<StringValue>().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem 185 200 var functions = CreateFunctionSet(); 186 Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data captured from the dynamical system ", new RegressionProblemData()));201 Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData())); 187 202 Parameters.Add(new ValueParameter<ReadOnlyCheckedItemCollection<StringValue>>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables)); 188 203 Parameters.Add(new ValueParameter<ReadOnlyCheckedItemCollection<StringValue>>(FunctionSetParameterName, "The list of allowed functions", functions)); 189 Parameters.Add(new FixedValueParameter<IntValue>(MaximumLengthParameterName, "The maximally allowed length of each expression", new IntValue(20))); 190 Parameters.Add(new FixedValueParameter<IntValue>(MaximumParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters (using L-BFGS)", new IntValue(100))); 204 Parameters.Add(new FixedValueParameter<IntValue>(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10))); 205 Parameters.Add(new FixedValueParameter<IntValue>(MaximumParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100))); 206 Parameters.Add(new FixedValueParameter<IntValue>(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0))); 207 Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10))); 191 208 192 209 RegisterEventHandlers(); … … 201 218 var rows = ProblemData.TrainingIndices.ToArray(); 202 219 var targetVars = TargetVariables.CheckedItems.Select(i => i.Value).ToArray(); 203 var targetValues = new double[rows.Length,targetVars.Length]; 204 220 var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees 221 var targetValues = new double[rows.Length, targetVars.Length]; 222 205 223 // collect values of all target variables 206 224 var colIdx = 0; 207 foreach (var targetVar in targetVars) {225 foreach (var targetVar in targetVars) { 208 226 int rowIdx = 0; 209 foreach (var value in problemData.Dataset.GetDoubleValues(targetVar, rows)) {227 foreach (var value in problemData.Dataset.GetDoubleValues(targetVar, rows)) { 210 228 targetValues[rowIdx, colIdx] = value; 211 229 rowIdx++; … … 230 248 alglib.minlbfgscreate(Math.Min(theta.Length, 5), theta, out state); 231 249 alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, MaximumParameterOptimizationIterations); 232 alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null, new object[] { trees, targetVars, problemData, nodeIdx, targetValues, rows }); //TODO: create a type 250 alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null, 251 new object[] { trees, targetVars, problemData, nodeIdx, targetValues, rows, NumericIntegrationSteps, latentVariables }); //TODO: create a type 233 252 alglib.minlbfgsresults(state, out optTheta, out report); 234 253 … … 265 284 double[] grad = new double[optTheta.Length]; 266 285 double optQuality = double.NaN; 267 EvaluateObjectiveAndGradient(optTheta, ref optQuality, grad, new object[] { trees, targetVars, problemData, nodeIdx, targetValues, rows }); 286 EvaluateObjectiveAndGradient(optTheta, ref optQuality, grad, 287 new object[] { trees, targetVars, problemData, nodeIdx, targetValues, rows, NumericIntegrationSteps, latentVariables }); 268 288 if (double.IsNaN(optQuality) || double.IsInfinity(optQuality)) return 10E6; // return a large value (TODO: be consistent by using NMSE) 269 289 … … 279 299 var targetValues = (double[,])((object[])obj)[4]; 280 300 var rows = (int[])((object[])obj)[5]; 301 var numericIntegrationSteps = (int)((object[])obj)[6]; 302 var latentVariables = (string[])((object[])obj)[7]; 281 303 282 304 var predicted = Integrate( … … 285 307 problemData.AllowedInputVariables.ToArray(), 286 308 targetVariables, 309 latentVariables, 287 310 rows, 288 311 nodeIdx, // TODO: is it Ok to use rows here ? 289 x ).ToArray();312 x, numericIntegrationSteps).ToArray(); 290 313 291 314 … … 305 328 foreach (var y_pred in predicted) { 306 329 // TODO NMSE to put the same weight on each target regardless of the value range; 307 for (int c = 0;c<y_pred.Length;c++) {308 330 for (int c = 0; c < y_pred.Length; c++) { 331 309 332 var y_pred_f = y_pred[c].Item1; 310 var y = targetValues[r, c];333 var y = targetValues[r, c]; 311 334 312 335 var res = (y - y_pred_f); … … 336 359 // TODO extract common functionality from Evaluate and Analyze 337 360 var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities); 338 var optTheta = ((DoubleArray) 361 var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate 339 362 var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual 340 363 var nodeIdx = new Dictionary<ISymbolicExpressionTreeNode, int>(); … … 348 371 var problemData = ProblemData; 349 372 var targetVars = TargetVariables.CheckedItems.Select(i => i.Value).ToArray(); 373 var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees 350 374 351 375 var trainingList = new ItemList<DataTable>(); … … 356 380 problemData.AllowedInputVariables.ToArray(), 357 381 targetVars, 382 latentVariables, 358 383 trainingRows, 359 384 nodeIdx, 360 optTheta).ToArray(); 385 optTheta, 386 NumericIntegrationSteps).ToArray(); 361 387 362 388 for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) { 363 389 var targetVar = targetVars[colIdx]; 364 var trainingDataTable = new DataTable(targetVar + " prediction (training)");390 var trainingDataTable = new DataTable(targetVar + " prediction (training)"); 365 391 var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows)); 366 392 var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray()); … … 378 404 problemData.AllowedInputVariables.ToArray(), 379 405 targetVars, 406 latentVariables, 380 407 testRows, 381 408 nodeIdx, 382 optTheta).ToArray(); 409 optTheta, 410 NumericIntegrationSteps).ToArray(); 383 411 384 412 for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) { … … 400 428 #region interpretation 401 429 private static IEnumerable<Tuple<double, Vector>[]> Integrate( 402 ISymbolicExpressionTree[] trees, IDataset dataset, string[] inputVariables, string[] targetVariables, IEnumerable<int> rows,403 Dictionary<ISymbolicExpressionTreeNode, int> nodeIdx, double[] parameterValues ) {404 405 int NUM_STEPS = 1;430 ISymbolicExpressionTree[] trees, IDataset dataset, string[] inputVariables, string[] targetVariables, string[] latentVariables, IEnumerable<int> rows, 431 Dictionary<ISymbolicExpressionTreeNode, int> nodeIdx, double[] parameterValues, int numericIntegrationSteps = 100) { 432 433 int NUM_STEPS = numericIntegrationSteps ; 406 434 double h = 1.0 / NUM_STEPS; 407 435 408 436 // return first value as stored in the dataset 409 410 437 yield return targetVariables 411 438 .Select(targetVar => Tuple.Create(dataset.GetDoubleValue(targetVar, rows.First()), Vector.Zero)) … … 422 449 variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero)); 423 450 } 451 // add value entries for latent variables which are also integrated 452 foreach(var latentVar in latentVariables) { 453 variableValues.Add(latentVar, Tuple.Create(0.0, Vector.Zero)); // we don't have observations for latent variables -> assume zero as starting value 454 } 455 var calculatedVariables = targetVariables.Concat(latentVariables); // TODO: must conincide with the order of trees in the encoding 424 456 425 457 foreach (var t in rows.Skip(1)) { 426 458 for (int step = 0; step < NUM_STEPS; step++) { 427 459 var deltaValues = new Dictionary<string, Tuple<double, Vector>>(); 428 foreach (var tup in trees.Zip( targetVariables, Tuple.Create)) {460 foreach (var tup in trees.Zip(calculatedVariables, Tuple.Create)) { 429 461 var tree = tup.Item1; 430 462 var targetVarName = tup.Item2; … … 444 476 } 445 477 478 // only return the target variables for calculation of errors 446 479 yield return targetVariables 447 480 .Select(targetVar => variableValues[targetVar]) … … 464 497 switch (node.Symbol.Name) { 465 498 case "+": { 466 var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx, parameterValues); 499 var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx, parameterValues); // TODO capture all parameters into a state type for interpretation 467 500 var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues); 468 501 … … 520 553 * | 521 554 * V 522 * TargetVariables FunctionSet MaximumLength 523 * | | | 524 * V V | 525 * Grammar <---------------+ 555 * TargetVariables FunctionSet MaximumLength NumberOfLatentVariables 556 * | | | | 557 * V V | | 558 * Grammar <---------------+------------------- 526 559 * | 527 560 * V … … 539 572 540 573 MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged; 574 575 NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged; 576 } 577 578 private void NumLatentVariablesChanged(object sender, EventArgs e) { 579 UpdateGrammarAndEncoding(); 541 580 } 542 581 … … 599 638 return n.Symbol.Name.StartsWith("θ"); 600 639 } 640 private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) { 641 return n.Symbol.Name.StartsWith("λ"); 642 } 601 643 602 644 … … 616 658 var g = CreateGrammar(); 617 659 foreach (var targetVar in TargetVariables.CheckedItems) { 618 encoding = encoding.Add(new SymbolicExpressionTreeEncoding(targetVar+"_tree",g, MaximumLength, MaximumLength)); // only limit by length 660 encoding = encoding.Add(new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength)); // only limit by length 661 } 662 for (int i = 1; i <= NumberOfLatentVariables; i++) { 663 encoding = encoding.Add(new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength)); 619 664 } 620 665 Encoding = encoding; … … 642 687 g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff 643 688 } 689 690 // generate symbols for latent variables 691 for (int i = 1; i <= NumberOfLatentVariables; i++) { 692 g.AddTerminalSymbol("λ" + i); // numeric parameter for which the value is optimized using AutoDiff 693 } 694 644 695 return g; 645 696 }
Note: See TracChangeset
for help on using the changeset viewer.