Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/ProblemInstanceProvider.cs @ 17187

Last change on this file since 17187 was 17003, checked in by gkronber, 6 years ago

#2925 fixed bug in problem instance provider

File size: 17.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.IO;
26using System.IO.Compression;
27using System.Linq;
28using System.Text.RegularExpressions;
29using HeuristicLab.Data;
30using HeuristicLab.Problems.DataAnalysis;
31using HeuristicLab.Problems.DynamicalSystemsModelling.Instances;
32using HeuristicLab.Problems.Instances;
33using HeuristicLab.Problems.Instances.DataAnalysis;
34
35namespace HeuristicLab.Problems.DynamicalSystemsModelling {
36  public class ProblemInstanceProvider : ProblemInstanceProvider<Problem> {
37    private static readonly NumberFormatInfo germanNumberFormat = CultureInfo.GetCultureInfo("de-DE").NumberFormat;
38
39    public override string Name {
40      get { return "Dynamic Systems"; }
41    }
42    public override string Description {
43      get {
44        return "A set of problem instances for dynamical modelling.";
45      }
46    }
47    public override Uri WebLink {
48      get { return null; }
49    }
50    public override string ReferencePublication {
51      get { return ""; }
52    }
53
54    public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
55      List<DataDescriptor> descriptorList = new List<DataDescriptor>();
56      descriptorList.Add(BacterialRespiration());
57      descriptorList.Add(BarMagnets());
58      descriptorList.Add(ChemicalReaction());
59      descriptorList.Add(E_Cell());
60      descriptorList.Add(Glider());
61      descriptorList.Add(LotkaVolterra());
62      descriptorList.Add(PredatorPrey());
63      descriptorList.Add(S_System());
64      descriptorList.Add(ShearFlow());
65      descriptorList.Add(ThreeSpeciesLotkaVolterra());
66      descriptorList.Add(VanDerPol());
67      descriptorList.Add(Oscillator());
68      descriptorList.Add(RealOscillator());
69      descriptorList.Add(Pendulum());
70      descriptorList.Add(RealPendulum());
71      descriptorList.Add(DoubleOscillator());
72      descriptorList.Add(RealDoubleOscillator());
73      descriptorList.Add(DoublePendulum());
74      descriptorList.Add(RealDoublePendulum());
75
76      return descriptorList;
77    }
78
79    private DataDescriptor BacterialRespiration() {
80      return new DataDescriptor {
81        Name = "Bacterial Respiration",
82        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
83        TargetVariables = new[] { "y1", "y2" },
84        InputVariables = new string[] { },
85        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
86        TestEpisodes = new IntRange[] { },
87        FileName = "bacterial_1.csv",
88        NumberFormat = germanNumberFormat,
89        Separator = ';'
90      };
91    }
92
93
94    private DataDescriptor BarMagnets() {
95      return new DataDescriptor {
96        Name = "Bar Magnets",
97        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
98        TargetVariables = new[] { "y1", "y2" },
99        InputVariables = new string[] { },
100        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
101        TestEpisodes = new IntRange[] { },
102        FileName = "bar_magnets_1.csv",
103        NumberFormat = germanNumberFormat,
104        Separator = ';'
105      };
106    }
107
108    private DataDescriptor ChemicalReaction() {
109      return new DataDescriptor {
110        Name = "ChemicalReaction",
111        Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
112        TargetVariables = new[] { "y1", "y2", "y3" },
113        InputVariables = new string[] { },
114        TrainingEpisodes = new IntRange[] { new IntRange(0, 101) },
115        TestEpisodes = new IntRange[] { },
116        FileName = "ChemicalReaction.csv",
117        NumberFormat = germanNumberFormat,
118        Separator = ';'
119      };
120    }
121
122    private DataDescriptor E_Cell() {
123      return new DataDescriptor {
124        Name = "E-CELL",
125        Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
126        TargetVariables = new[] { "y1", "y2", "y3" },
127        InputVariables = new string[] { },
128        TrainingEpisodes = new IntRange[] { new IntRange(0, 41) },
129        TestEpisodes = new IntRange[] { },
130        FileName = "E-CELL.csv",
131        NumberFormat = germanNumberFormat,
132        Separator = ';'
133      };
134    }
135
136    private DataDescriptor Glider() {
137      return new DataDescriptor {
138        Name = "Glider",
139        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
140        TargetVariables = new[] { "y1", "y2" },
141        InputVariables = new string[] { },
142        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
143        TestEpisodes = new IntRange[] { },
144        FileName = "Glider_1.csv",
145        NumberFormat = germanNumberFormat,
146        Separator = ';'
147      };
148    }
149
150    private DataDescriptor LotkaVolterra() {
151      return new DataDescriptor {
152        Name = "Lotka-Volterra",
153        Description = "Publication: Gaucel et al.: Learning Dynamical Systems using Standard Symbolic Regression, Evostar 2014.",
154        TargetVariables = new[] { "y1", "y2" },
155        InputVariables = new string[] { },
156        TrainingEpisodes = new IntRange[] { new IntRange(0, 193) },
157        TestEpisodes = new IntRange[] { new IntRange(193, 300) },
158        FileName = "LotkaVolterra.csv",
159        NumberFormat = germanNumberFormat,
160        Separator = ';'
161      };
162    }
163
164    private DataDescriptor PredatorPrey() {
165      return new DataDescriptor {
166        Name = "Predator Prey",
167        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
168        TargetVariables = new[] { "y1", "y2" },
169        InputVariables = new string[] { },
170        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
171        TestEpisodes = new IntRange[] { },
172        FileName = "predator_prey_1.csv",
173        NumberFormat = germanNumberFormat,
174        Separator = ';'
175      };
176    }
177
178    private DataDescriptor ShearFlow() {
179      return new DataDescriptor {
180        Name = "Shear Flow",
181        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
182        TargetVariables = new[] { "y1", "y2" },
183        InputVariables = new string[] { },
184        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
185        TestEpisodes = new IntRange[] { },
186        FileName = "shear_flow_1.csv",
187        NumberFormat = germanNumberFormat,
188        Separator = ';'
189      };
190    }
191
192    private DataDescriptor S_System() {
193      return new DataDescriptor {
194        Name = "S-System",
195        Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
196        TargetVariables = new[] { "y1", "y2", "y3", "y4", "y5" },
197        InputVariables = new string[] { },
198        TrainingEpisodes = new IntRange[] { new IntRange(0, 31), new IntRange(31, 62), new IntRange(62, 93) },
199        TestEpisodes = new IntRange[] { },
200        FileName = "S-System.csv",
201        NumberFormat = germanNumberFormat,
202        Separator = ';'
203      };
204    }
205
206
207    private DataDescriptor ThreeSpeciesLotkaVolterra() {
208      return new DataDescriptor {
209        Name = "Lotka Volterra (three species)",
210        Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
211        TargetVariables = new[] { "y1", "y2", "y3" },
212        InputVariables = new string[] { },
213        TrainingEpisodes = new IntRange[] { new IntRange(0, 100)},
214        TestEpisodes = new IntRange[] { },
215        FileName = "ThreeLotkaVolterra.csv",
216        NumberFormat = germanNumberFormat,
217        Separator = ';'
218      };
219    }
220
221
222    private DataDescriptor VanDerPol() {
223      return new DataDescriptor {
224        Name = "Van der Pol Oscillator",
225        Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
226        TargetVariables = new[] { "y1", "y2" },
227        InputVariables = new string[] { },
228        TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
229        TestEpisodes = new IntRange[] { },
230        FileName = "van_der_pol_1.csv",
231        NumberFormat = germanNumberFormat,
232        Separator = ';'
233      };
234    }
235
236    private DataDescriptor Oscillator() {
237      return new DataDescriptor {
238        Name = "Linear Oscillator (simulation)",
239        Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
240        TargetVariables = new[] { "x", "v" },
241        InputVariables = new string[] { },
242        TrainingEpisodes = new IntRange[] { new IntRange(0, 512) },
243        TestEpisodes = new IntRange[] { },
244        FileName = "linear_h_1_equidistant.txt",
245        NumberFormat = germanNumberFormat,
246        Separator = ';'
247      };
248    }
249
250    private DataDescriptor RealOscillator() {
251      return new DataDescriptor {
252        Name = "Linear Oscillator (motion-tracked)",
253        Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
254        TargetVariables = new[] { "x", "v" },
255        InputVariables = new string[] { },
256        TrainingEpisodes = new IntRange[] { new IntRange(0, 879) },
257        TestEpisodes = new IntRange[] { },
258        FileName = "real_linear_h_1_equidistant.txt",
259        NumberFormat = germanNumberFormat,
260        Separator = ';'
261      };
262    }
263    private DataDescriptor Pendulum() {
264      return new DataDescriptor {
265        Name = "Pendulum (simulated)",
266        Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
267        TargetVariables = new[] { "theta", "omega1" },
268        InputVariables = new string[] { },
269        TrainingEpisodes = new IntRange[] { new IntRange(0, 502) },
270        TestEpisodes = new IntRange[] { },
271        FileName = "pendulum_h_1_equidistant.txt",
272        NumberFormat = germanNumberFormat,
273        Separator = ';'
274      };
275    }
276    private DataDescriptor RealPendulum() {
277      return new DataDescriptor {
278        Name = "Pendulum (motion-tracked)",
279        Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
280        TargetVariables = new[] { "theta", "omega" },
281        InputVariables = new string[] { },
282        TrainingEpisodes = new IntRange[] { new IntRange(0, 568) },
283        TestEpisodes = new IntRange[] { },
284        FileName = "real_pend_h_1_equidistant.txt",
285        NumberFormat = germanNumberFormat,
286        Separator = ';'
287      };
288    }
289    private DataDescriptor DoubleOscillator() {
290      return new DataDescriptor {
291        Name = "Double Oscillator (simulated)",
292        Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
293        TargetVariables = new[] { "x1", "x2", "v1", "v2" },
294        InputVariables = new string[] { },
295        TrainingEpisodes = new IntRange[] { new IntRange(0, 200) },
296        TestEpisodes = new IntRange[] { },
297        FileName = "double_linear_h_1_equidistant.txt",
298        NumberFormat = germanNumberFormat,
299        Separator = ';'
300      };
301    }
302    private DataDescriptor RealDoubleOscillator() {
303      return new DataDescriptor {
304        Name = "Double Oscillator (motion-tracked)",
305        Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
306        TargetVariables = new[] { "x1", "x2", "v1", "v2" },
307        InputVariables = new string[] { },
308        TrainingEpisodes = new IntRange[] { new IntRange(0, 150) },
309        TestEpisodes = new IntRange[] { },
310        FileName = "real_double_linear_h_1_equidistant.txt",
311        NumberFormat = germanNumberFormat,
312        Separator = ';'
313      };
314    }
315    private DataDescriptor DoublePendulum() {
316      return new DataDescriptor {
317        Name = "Double Pendulum (simulated)",
318        Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
319        TargetVariables = new[] { "theta1", "theta2", "omega1", "omega2" },
320        InputVariables = new string[] { },
321        TrainingEpisodes = new IntRange[] { new IntRange(0, 1355) },
322        TestEpisodes = new IntRange[] {new IntRange(1355, 2660) },
323        FileName = "double_pend_h_1_equidistant.txt",
324        NumberFormat = germanNumberFormat,
325        Separator = ';'
326      };
327    }
328    private DataDescriptor RealDoublePendulum() {
329      return new DataDescriptor {
330        Name = "Double Pendulum (motion-tracked)",
331        Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
332        TargetVariables = new[] { "theta1", "theta2", "omega1", "omega2" },
333        InputVariables = new string[] { },
334        TrainingEpisodes = new IntRange[] { new IntRange(0, 200) },
335        TestEpisodes = new IntRange[] {new IntRange(886, 1731) },
336        FileName = "real_double_pend_h_1_equidistant.txt",
337        NumberFormat = germanNumberFormat,
338        Separator = ';'
339      };
340    }
341
342    public override Problem LoadData(IDataDescriptor id) {
343      var descriptor = (DataDescriptor)id;
344
345      var instanceArchiveName = GetResourceName(descriptor.FileName + @"\.zip");
346      using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read)) {
347        var entry = instancesZipFile.GetEntry(descriptor.FileName);
348        NumberFormatInfo numberFormat = descriptor.NumberFormat;
349        DateTimeFormatInfo dateFormat = System.Globalization.DateTimeFormatInfo.InvariantInfo ;
350        char separator = descriptor.Separator;
351        //using (Stream stream = entry.Open()) {
352        //  TableFileParser.DetermineFileFormat(stream, out numberFormat, out dateFormat, out separator);
353        //}
354
355        TableFileParser csvFileParser = new TableFileParser();
356        using (Stream stream = entry.Open()) {
357          csvFileParser.Parse(stream, numberFormat, dateFormat, separator, true);
358        }
359
360        Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
361
362        dataset.Name = descriptor.FileName;
363        dataset.Description = descriptor.Description;
364        // using a RegressionProblemData is suboptimal here --> TODO introduce a new datatype and refactor the whole problem
365        var problemData = new RegressionProblemData(dataset, descriptor.InputVariables, descriptor.TargetVariables.First());
366        problemData.Name = descriptor.Name;
367        problemData.Description = descriptor.Description;
368        problemData.TrainingPartition.Start = 0;
369        problemData.TrainingPartition.End = 0;
370        problemData.TestPartition.Start = 0;
371        problemData.TestPartition.End = 0;
372
373        var problem = new Problem();
374        problem.Name = descriptor.Name;
375        problem.Description = descriptor.Description;
376        problem.ProblemData = problemData;
377        foreach (var ep in descriptor.TrainingEpisodes) problem.TrainingEpisodes.Add((IntRange)ep.Clone());
378        foreach (var targetVar in problem.TargetVariables) {
379          problem.TargetVariables.SetItemCheckedState(targetVar, descriptor.TargetVariables.Contains(targetVar.Value));
380        }
381        return problem;
382      }
383    }
384
385    protected virtual string GetResourceName(string fileName) {
386      return GetType().Assembly.GetManifestResourceNames()
387              .Where(x => Regex.Match(x, @".*\.Instances\." + fileName).Success).SingleOrDefault();
388    }
389  }
390}
Note: See TracBrowser for help on using the repository browser.