Changeset 16954


Ignore:
Timestamp:
05/15/19 12:45:38 (12 days ago)
Author:
gkronber
Message:

#2925: Add problem instance provider and instances. Use penalized regression splines for calculation of numeric differences (for pre-tuning).

Location:
branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3
Files:
25 added
3 edited

Legend:

Unmodified
Added
Removed
  • branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/HeuristicLab.Problems.DynamicalSystemsModelling-3.3.csproj

    r16664 r16954  
    114114    </Reference>
    115115    <Reference Include="System.Drawing" />
     116    <Reference Include="System.IO.Compression" />
    116117    <Reference Include="System.Windows.Forms" />
    117118  </ItemGroup>
    118119  <ItemGroup>
     120    <Compile Include="Instances\DataDescriptor.cs" />
    119121    <Compile Include="OdeParameterIdentification.cs" />
    120122    <Compile Include="Plugin.cs" />
    121123    <Compile Include="Problem.cs" />
    122124    <Compile Include="Properties\AssemblyInfo.cs" />
     125    <Compile Include="ProblemInstanceProvider.cs" />
    123126    <Compile Include="Solution.cs" />
    124127    <Compile Include="SolutionView.cs">
     
    132135  <ItemGroup>
    133136    <None Include="HeuristicLab.snk" />
     137    <None Include="Instances\bacterial_1.csv" />
     138    <EmbeddedResource Include="Instances\bacterial_1.csv.zip" />
     139    <None Include="Instances\bar_magnets_1.csv" />
     140    <EmbeddedResource Include="Instances\bar_magnets_1.csv.zip" />
     141    <None Include="Instances\ChemicalReaction.csv" />
     142    <EmbeddedResource Include="Instances\ChemicalReaction.csv.zip" />
     143    <None Include="Instances\E-CELL.csv" />
     144    <EmbeddedResource Include="Instances\E-CELL.csv.zip" />
     145    <None Include="Instances\Glider_1.csv" />
     146    <EmbeddedResource Include="Instances\Glider_1.csv.zip" />
     147    <None Include="Instances\LotkaVolterra.csv" />
     148    <EmbeddedResource Include="Instances\LotkaVolterra.csv.zip" />
     149    <None Include="Instances\predator_prey_1.csv" />
     150    <EmbeddedResource Include="Instances\predator_prey_1.csv.zip" />
     151    <None Include="Instances\S-System.csv" />
     152    <EmbeddedResource Include="Instances\S-System.csv.zip" />
     153    <None Include="Instances\shear_flow_1.csv" />
     154    <EmbeddedResource Include="Instances\shear_flow_1.csv.zip" />
     155    <None Include="Instances\ThreeLotkaVolterra.csv" />
     156    <EmbeddedResource Include="Instances\ThreeLotkaVolterra.csv.zip" />
     157    <None Include="Instances\van_der_pol_1.csv" />
     158    <EmbeddedResource Include="Instances\van_der_pol_1.csv.zip" />
    134159    <None Include="packages.config" />
    135160    <None Include="Plugin.cs.frame" />
     
    232257      <Name>HeuristicLab.Problems.DataAnalysis-3.4</Name>
    233258      <Private>False</Private>
     259    </ProjectReference>
     260    <ProjectReference Include="..\..\HeuristicLab.Problems.Instances.DataAnalysis\3.3\HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj">
     261      <Project>{94C7714E-29D4-4D6D-B213-2C18D627AB75}</Project>
     262      <Name>HeuristicLab.Problems.Instances.DataAnalysis-3.3</Name>
    234263    </ProjectReference>
    235264    <ProjectReference Include="..\..\HeuristicLab.Problems.Instances\3.3\HeuristicLab.Problems.Instances-3.3.csproj">
  • branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Plugin.cs.frame

    r16952 r16954  
    5252  [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic.Regression", "3.4")]
    5353  [PluginDependency("HeuristicLab.Problems.Instances", "3.3")]
     54  [PluginDependency("HeuristicLab.Problems.Instances.DataAnalysis", "3.3")]
    5455  [PluginDependency("HeuristicLab.Random", "3.3")]
    5556  public class Plugin : PluginBase {
  • branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs

    r16951 r16954  
    3333using HeuristicLab.Optimization;
    3434using HeuristicLab.Parameters;
    35 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3635using HeuristicLab.Problems.DataAnalysis;
    3736using HeuristicLab.Problems.DataAnalysis.Symbolic;
     
    4544  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
    4645  [StorableType("065C6A61-773A-42C9-9DE5-61A5D1D823EB")]
    47   public sealed class Problem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
     46  public sealed class Problem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<Problem> {
    4847    #region parameter names
    4948    private const string ProblemDataParameterName = "Data";
     
    103102      get { return (IFixedValueParameter<DoubleValue>)Parameters["ODE NMSE weight"]; }
    104103    }
     104    public IFixedValueParameter<DoubleValue> NumericDifferencesSmoothingParameter {
     105      get { return (IFixedValueParameter<DoubleValue>)Parameters["Numeric differences smoothing"]; }
     106    }
    105107    #endregion
    106108
     
    135137      get { return NumericIntegrationStepsParameter.Value.Value; }
    136138    }
    137     public IEnumerable<IntRange> TrainingEpisodes {
     139    public IList<IntRange> TrainingEpisodes {
    138140      get { return TrainingEpisodesParameter.Value; }
    139141    }
     
    141143      get { return OptimizeParametersForEpisodesParameter.Value.Value; }
    142144    }
     145    public double NumericDifferencesSmoothing {
     146      get { return NumericDifferencesSmoothingParameter.Value.Value; }
     147    }
     148
    143149
    144150    public string OdeSolver {
     
    212218      Parameters.Add(new FixedValueParameter<DoubleValue>("Pretuning NMSE weight", "For fitness weighting", new DoubleValue(0.5)));
    213219      Parameters.Add(new FixedValueParameter<DoubleValue>("ODE NMSE weight", "For fitness weighting", new DoubleValue(0.5)));
     220      Parameters.Add(new FixedValueParameter<DoubleValue>("Numeric differences smoothing", "Determines the amount of smoothing for the numeric differences which are calculated for pre-tuning. Values from -8 to 8 are reasonable. Use very low value if the data contains no noise. Default: 2.", new DoubleValue(2.0)));
    214221
    215222      var solversStr = new string[] { "HeuristicLab" /* , "CVODES" */};
     
    248255        return totalNMSE / totalSize;
    249256      } else {
    250         // double[] optTheta;
    251         double nmse = OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, TrainingEpisodes, MaximumPretuningParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver, MaximumOdeParameterOptimizationIterations,
    252           PretuningErrorWeight.Value.Value, OdeErrorWeight.Value.Value);
     257        // when no training episodes are specified then we implicitly use the training parition from the problemData
     258        var trainingEpisodes = TrainingEpisodes;
     259        if (!trainingEpisodes.Any()) {
     260          trainingEpisodes = new List<IntRange>();
     261          trainingEpisodes.Add((IntRange)ProblemData.TrainingPartition.Clone());
     262        }
     263        double nmse = OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, trainingEpisodes, MaximumPretuningParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver, MaximumOdeParameterOptimizationIterations,
     264          PretuningErrorWeight.Value.Value, OdeErrorWeight.Value.Value, NumericDifferencesSmoothing);
    253265        // individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
    254266        return nmse;
     
    268280      int maxOdeParameterOptIterations,
    269281      double pretuningErrorWeight = 0.5,
    270       double odeErrorWeight = 0.5
     282      double odeErrorWeight = 0.5,
     283      double numericDifferencesSmoothing = 2
    271284      ) {
    272285
     
    280293
    281294      // optimize parameters by fitting f(x,y) to calculated differences dy/dt(t)
    282       double nmse = pretuningErrorWeight * PreTuneParameters(trees, problemData, targetVars, latentVariables, random, episodes, maxPretuningParameterOptIterations,
     295      double nmse = pretuningErrorWeight * PreTuneParameters(trees, problemData, targetVars, latentVariables, random, episodes,
     296        maxPretuningParameterOptIterations, numericDifferencesSmoothing,
    283297        initialTheta, out double[] pretunedParameters);
    284298
     
    316330      IEnumerable<IntRange> episodes,
    317331      int maxParameterOptIterations,
     332      double numericDifferencesSmoothing, // for smoothing of numeric differences
    318333      double[][] initialTheta,
    319334      out double[] optTheta) {
     
    326341
    327342      // first calculate values of latent variables by integration
    328       if(latentVariables.Length > 0) {
     343      if (latentVariables.Length > 0) {
    329344        var inputVariables = targetVars.Concat(latentTrees.SelectMany(t => t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName))).Except(latentVariables).Distinct();
    330345        var myState = new OptimizationData(latentTrees, targetVars, inputVariables.ToArray(), problemData, null, episodes.ToArray(), 10, latentVariables, "HeuristicLab");
     
    360375          var episodeRows = Enumerable.Range(ep.Start, ep.Size);
    361376          var targetValues = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], episodeRows).ToArray();
    362           targetValuesDiff.AddRange(targetValues.Skip(1).Zip(targetValues, (t1, t0) => t1 - t0));// TODO: smoothing or multi-pole);
    363         }
    364         var adjustedEpisodes = episodes.Select(ep => new IntRange(ep.Start, ep.End - 1)); // because we lose the last row in the differencing step
     377          targetValuesDiff.AddRange(CalculateDifferences(targetValues, numericDifferencesSmoothing));
     378        }
     379        var adjustedEpisodes = episodes.Select(ep => new IntRange(ep.Start, ep.End));
    365380
    366381        // data for input variables is assumed to be known
     
    417432
    418433
     434
    419435    // similar to above but this time we integrate and optimize all parameters for all targets concurrently
    420436    private static double OptimizeParameters(ISymbolicExpressionTree[] trees, IRegressionProblemData problemData, string[] targetVars, string[] latentVariables,
     
    617633      // }
    618634
     635      // when no training episodes are specified then we implicitly use the training parition from the problemData
     636      var trainingEpisodes = TrainingEpisodes;
     637      if (!trainingEpisodes.Any()) {
     638        trainingEpisodes = new List<IntRange>();
     639        trainingEpisodes.Add((IntRange)ProblemData.TrainingPartition.Clone());
     640      }
     641
    619642      var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
    620643      var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
     
    674697          .Distinct();
    675698
    676         var optimizationData = new OptimizationData(trees, targetVars, inputVariables.ToArray(), problemData, null, TrainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver);
     699        var optimizationData = new OptimizationData(trees, targetVars, inputVariables.ToArray(), problemData, null, trainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver);
    677700        var numParams = optimizationData.nodeValueLookup.ParameterCount;
    678701
     
    755778        // results["Squared error and gradient"].Value = errorTable;
    756779
    757         // TODO: DRY for training and test
    758         var testList = new ItemList<DataTable>();
    759         var testRows = ProblemData.TestIndices.ToArray();
    760         var testOptimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, new IntRange[] { ProblemData.TestPartition }, NumericIntegrationSteps, latentVariables, OdeSolver);
    761         var testPrediction = Integrate(testOptimizationData).ToArray();
    762 
    763         for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
    764           // is target variable
    765           if (colIdx < targetVars.Length) {
    766             var targetVar = targetVars[colIdx];
    767             var testDataTable = new DataTable(targetVar + " prediction (test)");
    768             var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
    769             var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
    770             testDataTable.Rows.Add(actualValuesRow);
    771             testDataTable.Rows.Add(predictedValuesRow);
    772             testList.Add(testDataTable);
    773 
    774           } else {
    775             // var latentVar = latentVariables[colIdx - targetVars.Length];
    776             // var testDataTable = new DataTable(latentVar + " prediction (test)");
    777             // var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
    778             // var emptyRow = new DataRow(latentVar);
    779             // testDataTable.Rows.Add(emptyRow);
    780             // testDataTable.Rows.Add(predictedValuesRow);
    781             // testList.Add(testDataTable);
    782           }
    783         }
    784 
    785         results["Prediction (training)"].Value = trainingList.AsReadOnly();
    786         results["Prediction (test)"].Value = testList.AsReadOnly();
    787 
     780        // only if there is a non-empty test partition
     781        if (ProblemData.TestIndices.Any()) {
     782          // TODO: DRY for training and test
     783
     784          var testList = new ItemList<DataTable>();
     785          var testRows = ProblemData.TestIndices.ToArray();
     786          var testOptimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, new IntRange[] { ProblemData.TestPartition }, NumericIntegrationSteps, latentVariables, OdeSolver);
     787          var testPrediction = Integrate(testOptimizationData).ToArray();
     788
     789          for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
     790            // is target variable
     791            if (colIdx < targetVars.Length) {
     792              var targetVar = targetVars[colIdx];
     793              var testDataTable = new DataTable(targetVar + " prediction (test)");
     794              var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
     795              var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
     796              testDataTable.Rows.Add(actualValuesRow);
     797              testDataTable.Rows.Add(predictedValuesRow);
     798              testList.Add(testDataTable);
     799
     800            } else {
     801              // var latentVar = latentVariables[colIdx - targetVars.Length];
     802              // var testDataTable = new DataTable(latentVar + " prediction (test)");
     803              // var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
     804              // var emptyRow = new DataRow(latentVar);
     805              // testDataTable.Rows.Add(emptyRow);
     806              // testDataTable.Rows.Add(predictedValuesRow);
     807              // testList.Add(testDataTable);
     808            }
     809          }
     810
     811          results["Prediction (training)"].Value = trainingList.AsReadOnly();
     812          results["Prediction (test)"].Value = testList.AsReadOnly();
     813
     814        }
    788815
    789816        #region simplification of models
     
    802829                   targetVars,
    803830                   latentVariables,
    804                    TrainingEpisodes,
     831                   trainingEpisodes,
    805832                   OdeSolver,
    806833                   NumericIntegrationSteps);
     
    834861          var solutionDataset = ((Dataset)problemData.Dataset).ToModifiable();
    835862          var absValues = solutionDataset.GetDoubleValues(name).ToArray();
    836           solutionDataset.AddVariable(name + "_diff", absValues.Skip(1).Zip(absValues, (v1, v0) => v1 - v0).Concat(new double[] { 0.0 }).ToList());
     863
     864          solutionDataset.AddVariable(name + "_diff", CalculateDifferences(absValues, NumericDifferencesSmoothing).ToList());
    837865          var solutionProblemData = new RegressionProblemData(solutionDataset, problemData.AllowedInputVariables, name + "_diff");
     866          solutionProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
     867          solutionProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
     868          solutionProblemData.TestPartition.Start = problemData.TestPartition.Start;
     869          solutionProblemData.TestPartition.End = problemData.TestPartition.End;
    838870          var solution = model.CreateRegressionSolution(solutionProblemData);
    839871          results.AddOrUpdateResult("Solution " + name, solution);
     
    15441576     *
    15451577     * ProblemData
    1546      *    |
    1547      *    V
    1548      * TargetVariables   FunctionSet    MaximumLength    NumberOfLatentVariables
    1549      *               |   |                 |                   |
    1550      *               V   V                 |                   |
    1551      *             Grammar <---------------+-------------------
    1552      *                |
    1553      *                V
    1554      *            Encoding
     1578     *    |                                                                         
     1579     *    V                                                                         
     1580     * TargetVariables   FunctionSet    MaximumLength    NumberOfLatentVariables     
     1581     *               |   |                 |                   |                     
     1582     *               V   V                 |                   |                     
     1583     *             Grammar <---------------+-------------------                     
     1584     *                |                                                             
     1585     *                V                                                             
     1586     *            Encoding                                                           
    15551587     */
    15561588    private void RegisterEventHandlers() {
     
    15871619    private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) {
    15881620      TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
     1621      UpdateGrammarAndEncoding();
    15891622    }
    15901623
     
    16141647    #region  helper
    16151648
     1649    private static double[] CalculateDifferences(double[] targetValues, double numericDifferencesSmoothing) {
     1650      var x = Enumerable.Range(0, targetValues.Length).Select(i => (double)i).ToArray();
     1651      alglib.spline1dfitpenalized(x, targetValues, targetValues.Length / 2, numericDifferencesSmoothing,
     1652        out int info, out alglib.spline1dinterpolant s, out alglib.spline1dfitreport rep);
     1653      if (info <= 0) throw new ArgumentException("There was a problem while smoothing numeric differences. Try to use a different smoothing parameter value.");
     1654
     1655      double[] dy = new double[x.Length];
     1656      for (int i = 0; i < x.Length; i++) {
     1657        double xi = x[i];
     1658        alglib.spline1ddiff(s, xi, out double y, out double dyi, out double d2y);
     1659        dy[i] = dyi;
     1660      }
     1661      return dy;
     1662    }
    16161663
    16171664    private void InitAllParameters() {
     
    17971844
    17981845
    1799     #region Import & Export
    1800     public void Load(IRegressionProblemData data) {
    1801       Name = data.Name;
    1802       Description = data.Description;
    1803       ProblemData = data;
    1804     }
    1805 
    1806     public IRegressionProblemData Export() {
    1807       return ProblemData;
     1846    #region Import
     1847    public void Load(Problem problem) {
     1848      // transfer parameter values from problem parameter
     1849      this.ProblemData = problem.ProblemData;
     1850      this.TrainingEpisodesParameter.Value = problem.TrainingEpisodesParameter.Value;
     1851      this.TargetVariablesParameter.Value = problem.TargetVariablesParameter.Value;
     1852      this.Name = problem.Name;
     1853      this.Description = problem.Description;
    18081854    }
    18091855    #endregion
Note: See TracChangeset for help on using the changeset viewer.