Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/ProblemInstanceProvider.cs @ 16954

Last change on this file since 16954 was 16954, checked in by gkronber, 5 years ago

#2925: Add problem instance provider and instances. Use penalized regression splines for calculation of numeric differences (for pre-tuning).

File size: 10.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.IO;
26using System.IO.Compression;
27using System.Linq;
28using System.Text.RegularExpressions;
29using HeuristicLab.Data;
30using HeuristicLab.Problems.DataAnalysis;
31using HeuristicLab.Problems.DynamicalSystemsModelling.Instances;
32using HeuristicLab.Problems.Instances;
33using HeuristicLab.Problems.Instances.DataAnalysis;
34
35namespace HeuristicLab.Problems.DynamicalSystemsModelling {
36  public class ProblemInstanceProvider : ProblemInstanceProvider<Problem> {
37    public override string Name {
38      get { return "Dynamic Systems"; }
39    }
40    public override string Description {
41      get {
42        return "A set of problem instances for dynamical modelling.";
43      }
44    }
45    public override Uri WebLink {
46      get { return null; }
47    }
48    public override string ReferencePublication {
49      get { return ""; }
50    }
51
52    public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
53      List<DataDescriptor> descriptorList = new List<DataDescriptor>();
54      descriptorList.Add(BacterialRespiration());
55      descriptorList.Add(BarMagnets());
56      descriptorList.Add(ChemicalReaction());
57      descriptorList.Add(E_Cell());
58      descriptorList.Add(Glider());
59      descriptorList.Add(LotkaVolterra());
60      descriptorList.Add(PredatorPrey());
61      descriptorList.Add(S_System());
62      descriptorList.Add(ShearFlow());
63      descriptorList.Add(ThreeSpeciesLotkaVolterra());
64      descriptorList.Add(VanDerPol());
65      return descriptorList;
66    }
67
68    private DataDescriptor BacterialRespiration() {
69      return new DataDescriptor {
70        Name = "Bacterial Respiration",
71        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
72        TargetVariables = new[] { "y1", "y2" },
73        InputVariables = new string[] { },
74        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
75        TestEpisodes = new IntRange[] { },
76        FileName = "bacterial_1.csv"
77      };
78    }
79
80
81    private DataDescriptor BarMagnets() {
82      return new DataDescriptor {
83        Name = "Bar Magnets",
84        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
85        TargetVariables = new[] { "y1", "y2" },
86        InputVariables = new string[] { },
87        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
88        TestEpisodes = new IntRange[] { },
89        FileName = "bar_magnets_1.csv"
90      };
91    }
92
93    private DataDescriptor ChemicalReaction() {
94      return new DataDescriptor {
95        Name = "ChemicalReaction",
96        Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
97        TargetVariables = new[] { "y1", "y2", "y3" },
98        InputVariables = new string[] { },
99        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
100        TestEpisodes = new IntRange[] { },
101        FileName = "ChemicalReaction.csv"
102      };
103    }
104
105    private DataDescriptor E_Cell() {
106      return new DataDescriptor {
107        Name = "E-CELL",
108        Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
109        TargetVariables = new[] { "y1", "y2", "y3" },
110        InputVariables = new string[] { },
111        TrainingEpisodes = new IntRange[] { new IntRange(0, 40) },
112        TestEpisodes = new IntRange[] { },
113        FileName = "E-CELL.csv"
114      };
115    }
116
117    private DataDescriptor Glider() {
118      return new DataDescriptor {
119        Name = "Bar Magnets",
120        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
121        TargetVariables = new[] { "y1", "y2" },
122        InputVariables = new string[] { },
123        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
124        TestEpisodes = new IntRange[] { },
125        FileName = "Glider_1.csv"
126      };
127    }
128
129    private DataDescriptor LotkaVolterra() {
130      return new DataDescriptor {
131        Name = "Lotka-Volterra",
132        Description = "Publication: Gaucel et al.: Learning Dynamical Systems using Standard Symbolic Regression, Evostar 2014.",
133        TargetVariables = new[] { "y1", "y2" },
134        InputVariables = new string[] { },
135        TrainingEpisodes = new IntRange[] { new IntRange(0, 193) },
136        TestEpisodes = new IntRange[] { },
137        FileName = "LotkaVolterra.csv"
138      };
139    }
140
141    private DataDescriptor PredatorPrey() {
142      return new DataDescriptor {
143        Name = "Predator Prey",
144        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
145        TargetVariables = new[] { "y1", "y2" },
146        InputVariables = new string[] { },
147        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
148        TestEpisodes = new IntRange[] { },
149        FileName = "predator_prey_1.csv"
150      };
151    }
152
153    private DataDescriptor ShearFlow() {
154      return new DataDescriptor {
155        Name = "Shear Flow",
156        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
157        TargetVariables = new[] { "y1", "y2" },
158        InputVariables = new string[] { },
159        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
160        TestEpisodes = new IntRange[] { },
161        FileName = "shear_flow_1.csv"
162      };
163    }
164
165    private DataDescriptor S_System() {
166      return new DataDescriptor {
167        Name = "S-System",
168        Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
169        TargetVariables = new[] { "y1", "y2", "y3", "y4", "y5" },
170        InputVariables = new string[] { },
171        TrainingEpisodes = new IntRange[] { new IntRange(0, 30), new IntRange(31, 61), new IntRange(62, 92) },
172        TestEpisodes = new IntRange[] { },
173        FileName = "S-System.csv"
174      };
175    }
176
177
178    private DataDescriptor ThreeSpeciesLotkaVolterra() {
179      return new DataDescriptor {
180        Name = "Lotka Volterra (three species)",
181        Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
182        TargetVariables = new[] { "y1", "y2", "y3" },
183        InputVariables = new string[] { },
184        TrainingEpisodes = new IntRange[] { new IntRange(0, 100)},
185        TestEpisodes = new IntRange[] { },
186        FileName = "ThreeLotkaVolterra.csv"
187      };
188    }
189
190
191    private DataDescriptor VanDerPol() {
192      return new DataDescriptor {
193        Name = "Van der Pol Oscillator",
194        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
195        TargetVariables = new[] { "y1", "y2" },
196        InputVariables = new string[] { },
197        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
198        TestEpisodes = new IntRange[] { },
199        FileName = "van_der_pol_1.csv"
200      };
201    }
202
203    public override Problem LoadData(IDataDescriptor id) {
204      var descriptor = (DataDescriptor)id;
205
206      var instanceArchiveName = GetResourceName(descriptor.FileName + @"\.zip");
207      using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read)) {
208        var entry = instancesZipFile.GetEntry(descriptor.FileName);
209        NumberFormatInfo numberFormat;
210        DateTimeFormatInfo dateFormat;
211        char separator;
212        using (Stream stream = entry.Open()) {
213          TableFileParser.DetermineFileFormat(stream, out numberFormat, out dateFormat, out separator);
214        }
215
216        TableFileParser csvFileParser = new TableFileParser();
217        using (Stream stream = entry.Open()) {
218          csvFileParser.Parse(stream, numberFormat, dateFormat, separator, true);
219        }
220
221        Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
222
223
224        // using a RegressionProblemData is suboptimal here --> TODO introduce a new datatype and refactor the whole problem
225        var problemData = new RegressionProblemData(dataset, descriptor.InputVariables, descriptor.TargetVariables.First());
226        problemData.TrainingPartition.Start = 0;
227        problemData.TrainingPartition.End = 0;
228        problemData.TestPartition.Start = 0;
229        problemData.TestPartition.End = 0;
230
231        var problem = new Problem();
232        problem.Name = descriptor.Name;
233        problem.Description = descriptor.Description;
234        problem.ProblemData = problemData;
235        foreach (var ep in descriptor.TrainingEpisodes) problem.TrainingEpisodes.Add((IntRange)ep.Clone());
236        foreach (var targetVar in problem.TargetVariables) {
237          problem.TargetVariables.SetItemCheckedState(targetVar, descriptor.TargetVariables.Contains(targetVar.Value));
238        }
239        return problem;
240      }
241    }
242
243    protected virtual string GetResourceName(string fileName) {
244      return GetType().Assembly.GetManifestResourceNames()
245              .Where(x => Regex.Match(x, @".*\.Instances\." + fileName).Success).SingleOrDefault();
246    }
247  }
248}
Note: See TracBrowser for help on using the repository browser.