1 | #region License Information
|
---|
2 | /* HeuristicLab
|
---|
3 | * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
4 | *
|
---|
5 | * This file is part of HeuristicLab.
|
---|
6 | *
|
---|
7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | */
|
---|
20 | #endregion
|
---|
21 |
|
---|
22 | using System;
|
---|
23 | using System.Collections.Generic;
|
---|
24 | using System.Globalization;
|
---|
25 | using System.IO;
|
---|
26 | using System.IO.Compression;
|
---|
27 | using System.Linq;
|
---|
28 | using System.Text.RegularExpressions;
|
---|
29 | using HeuristicLab.Data;
|
---|
30 | using HeuristicLab.Problems.DataAnalysis;
|
---|
31 | using HeuristicLab.Problems.DynamicalSystemsModelling.Instances;
|
---|
32 | using HeuristicLab.Problems.Instances;
|
---|
33 | using HeuristicLab.Problems.Instances.DataAnalysis;
|
---|
34 |
|
---|
35 | namespace HeuristicLab.Problems.DynamicalSystemsModelling {
|
---|
36 | public class ProblemInstanceProvider : ProblemInstanceProvider<Problem> {
|
---|
37 | private static readonly NumberFormatInfo germanNumberFormat = CultureInfo.GetCultureInfo("de-DE").NumberFormat;
|
---|
38 |
|
---|
39 | public override string Name {
|
---|
40 | get { return "Dynamic Systems"; }
|
---|
41 | }
|
---|
42 | public override string Description {
|
---|
43 | get {
|
---|
44 | return "A set of problem instances for dynamical modelling.";
|
---|
45 | }
|
---|
46 | }
|
---|
47 | public override Uri WebLink {
|
---|
48 | get { return null; }
|
---|
49 | }
|
---|
50 | public override string ReferencePublication {
|
---|
51 | get { return ""; }
|
---|
52 | }
|
---|
53 |
|
---|
54 | public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
|
---|
55 | List<DataDescriptor> descriptorList = new List<DataDescriptor>();
|
---|
56 | descriptorList.Add(BacterialRespiration());
|
---|
57 | descriptorList.Add(BarMagnets());
|
---|
58 | descriptorList.Add(ChemicalReaction());
|
---|
59 | descriptorList.Add(E_Cell());
|
---|
60 | descriptorList.Add(Glider());
|
---|
61 | descriptorList.Add(LotkaVolterra());
|
---|
62 | descriptorList.Add(PredatorPrey());
|
---|
63 | descriptorList.Add(S_System());
|
---|
64 | descriptorList.Add(ShearFlow());
|
---|
65 | descriptorList.Add(ThreeSpeciesLotkaVolterra());
|
---|
66 | descriptorList.Add(VanDerPol());
|
---|
67 | descriptorList.Add(Oscillator());
|
---|
68 | descriptorList.Add(RealOscillator());
|
---|
69 | descriptorList.Add(Pendulum());
|
---|
70 | descriptorList.Add(RealPendulum());
|
---|
71 | descriptorList.Add(DoubleOscillator());
|
---|
72 | descriptorList.Add(RealDoubleOscillator());
|
---|
73 | descriptorList.Add(DoublePendulum());
|
---|
74 | descriptorList.Add(RealDoublePendulum());
|
---|
75 |
|
---|
76 | return descriptorList;
|
---|
77 | }
|
---|
78 |
|
---|
79 | private DataDescriptor BacterialRespiration() {
|
---|
80 | return new DataDescriptor {
|
---|
81 | Name = "Bacterial Respiration",
|
---|
82 | Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
|
---|
83 | TargetVariables = new[] { "y1", "y2" },
|
---|
84 | InputVariables = new string[] { },
|
---|
85 | TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
|
---|
86 | TestEpisodes = new IntRange[] { },
|
---|
87 | FileName = "bacterial_1.csv",
|
---|
88 | NumberFormat = germanNumberFormat,
|
---|
89 | Separator = ';'
|
---|
90 | };
|
---|
91 | }
|
---|
92 |
|
---|
93 |
|
---|
94 | private DataDescriptor BarMagnets() {
|
---|
95 | return new DataDescriptor {
|
---|
96 | Name = "Bar Magnets",
|
---|
97 | Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
|
---|
98 | TargetVariables = new[] { "y1", "y2" },
|
---|
99 | InputVariables = new string[] { },
|
---|
100 | TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
|
---|
101 | TestEpisodes = new IntRange[] { },
|
---|
102 | FileName = "bar_magnets_1.csv",
|
---|
103 | NumberFormat = germanNumberFormat,
|
---|
104 | Separator = ';'
|
---|
105 | };
|
---|
106 | }
|
---|
107 |
|
---|
108 | private DataDescriptor ChemicalReaction() {
|
---|
109 | return new DataDescriptor {
|
---|
110 | Name = "ChemicalReaction",
|
---|
111 | Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
|
---|
112 | TargetVariables = new[] { "y1", "y2", "y3" },
|
---|
113 | InputVariables = new string[] { },
|
---|
114 | TrainingEpisodes = new IntRange[] { new IntRange(0, 101) },
|
---|
115 | TestEpisodes = new IntRange[] { },
|
---|
116 | FileName = "ChemicalReaction.csv",
|
---|
117 | NumberFormat = germanNumberFormat,
|
---|
118 | Separator = ';'
|
---|
119 | };
|
---|
120 | }
|
---|
121 |
|
---|
122 | private DataDescriptor E_Cell() {
|
---|
123 | return new DataDescriptor {
|
---|
124 | Name = "E-CELL",
|
---|
125 | Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
|
---|
126 | TargetVariables = new[] { "y1", "y2", "y3" },
|
---|
127 | InputVariables = new string[] { },
|
---|
128 | TrainingEpisodes = new IntRange[] { new IntRange(0, 41) },
|
---|
129 | TestEpisodes = new IntRange[] { },
|
---|
130 | FileName = "E-CELL.csv",
|
---|
131 | NumberFormat = germanNumberFormat,
|
---|
132 | Separator = ';'
|
---|
133 | };
|
---|
134 | }
|
---|
135 |
|
---|
136 | private DataDescriptor Glider() {
|
---|
137 | return new DataDescriptor {
|
---|
138 | Name = "Glider",
|
---|
139 | Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
|
---|
140 | TargetVariables = new[] { "y1", "y2" },
|
---|
141 | InputVariables = new string[] { },
|
---|
142 | TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
|
---|
143 | TestEpisodes = new IntRange[] { },
|
---|
144 | FileName = "Glider_1.csv",
|
---|
145 | NumberFormat = germanNumberFormat,
|
---|
146 | Separator = ';'
|
---|
147 | };
|
---|
148 | }
|
---|
149 |
|
---|
150 | private DataDescriptor LotkaVolterra() {
|
---|
151 | return new DataDescriptor {
|
---|
152 | Name = "Lotka-Volterra",
|
---|
153 | Description = "Publication: Gaucel et al.: Learning Dynamical Systems using Standard Symbolic Regression, Evostar 2014.",
|
---|
154 | TargetVariables = new[] { "y1", "y2" },
|
---|
155 | InputVariables = new string[] { },
|
---|
156 | TrainingEpisodes = new IntRange[] { new IntRange(0, 193) },
|
---|
157 | TestEpisodes = new IntRange[] { new IntRange(193, 300) },
|
---|
158 | FileName = "LotkaVolterra.csv",
|
---|
159 | NumberFormat = germanNumberFormat,
|
---|
160 | Separator = ';'
|
---|
161 | };
|
---|
162 | }
|
---|
163 |
|
---|
164 | private DataDescriptor PredatorPrey() {
|
---|
165 | return new DataDescriptor {
|
---|
166 | Name = "Predator Prey",
|
---|
167 | Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
|
---|
168 | TargetVariables = new[] { "y1", "y2" },
|
---|
169 | InputVariables = new string[] { },
|
---|
170 | TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
|
---|
171 | TestEpisodes = new IntRange[] { },
|
---|
172 | FileName = "predator_prey_1.csv",
|
---|
173 | NumberFormat = germanNumberFormat,
|
---|
174 | Separator = ';'
|
---|
175 | };
|
---|
176 | }
|
---|
177 |
|
---|
178 | private DataDescriptor ShearFlow() {
|
---|
179 | return new DataDescriptor {
|
---|
180 | Name = "Shear Flow",
|
---|
181 | Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
|
---|
182 | TargetVariables = new[] { "y1", "y2" },
|
---|
183 | InputVariables = new string[] { },
|
---|
184 | TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
|
---|
185 | TestEpisodes = new IntRange[] { },
|
---|
186 | FileName = "shear_flow_1.csv",
|
---|
187 | NumberFormat = germanNumberFormat,
|
---|
188 | Separator = ';'
|
---|
189 | };
|
---|
190 | }
|
---|
191 |
|
---|
192 | private DataDescriptor S_System() {
|
---|
193 | return new DataDescriptor {
|
---|
194 | Name = "S-System",
|
---|
195 | Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
|
---|
196 | TargetVariables = new[] { "y1", "y2", "y3", "y4", "y5" },
|
---|
197 | InputVariables = new string[] { },
|
---|
198 | TrainingEpisodes = new IntRange[] { new IntRange(0, 31), new IntRange(31, 62), new IntRange(62, 93) },
|
---|
199 | TestEpisodes = new IntRange[] { },
|
---|
200 | FileName = "S-System.csv",
|
---|
201 | NumberFormat = germanNumberFormat,
|
---|
202 | Separator = ';'
|
---|
203 | };
|
---|
204 | }
|
---|
205 |
|
---|
206 |
|
---|
207 | private DataDescriptor ThreeSpeciesLotkaVolterra() {
|
---|
208 | return new DataDescriptor {
|
---|
209 | Name = "Lotka Volterra (three species)",
|
---|
210 | Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
|
---|
211 | TargetVariables = new[] { "y1", "y2", "y3" },
|
---|
212 | InputVariables = new string[] { },
|
---|
213 | TrainingEpisodes = new IntRange[] { new IntRange(0, 100)},
|
---|
214 | TestEpisodes = new IntRange[] { },
|
---|
215 | FileName = "ThreeLotkaVolterra.csv",
|
---|
216 | NumberFormat = germanNumberFormat,
|
---|
217 | Separator = ';'
|
---|
218 | };
|
---|
219 | }
|
---|
220 |
|
---|
221 |
|
---|
222 | private DataDescriptor VanDerPol() {
|
---|
223 | return new DataDescriptor {
|
---|
224 | Name = "Van der Pol Oscillator",
|
---|
225 | Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
|
---|
226 | TargetVariables = new[] { "y1", "y2" },
|
---|
227 | InputVariables = new string[] { },
|
---|
228 | TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
|
---|
229 | TestEpisodes = new IntRange[] { },
|
---|
230 | FileName = "van_der_pol_1.csv",
|
---|
231 | NumberFormat = germanNumberFormat,
|
---|
232 | Separator = ';'
|
---|
233 | };
|
---|
234 | }
|
---|
235 |
|
---|
236 | private DataDescriptor Oscillator() {
|
---|
237 | return new DataDescriptor {
|
---|
238 | Name = "Linear Oscillator (simulation)",
|
---|
239 | Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
|
---|
240 | TargetVariables = new[] { "x", "v" },
|
---|
241 | InputVariables = new string[] { },
|
---|
242 | TrainingEpisodes = new IntRange[] { new IntRange(0, 512) },
|
---|
243 | TestEpisodes = new IntRange[] { },
|
---|
244 | FileName = "linear_h_1_equidistant.txt",
|
---|
245 | NumberFormat = germanNumberFormat,
|
---|
246 | Separator = ';'
|
---|
247 | };
|
---|
248 | }
|
---|
249 |
|
---|
250 | private DataDescriptor RealOscillator() {
|
---|
251 | return new DataDescriptor {
|
---|
252 | Name = "Linear Oscillator (motion-tracked)",
|
---|
253 | Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
|
---|
254 | TargetVariables = new[] { "x", "v" },
|
---|
255 | InputVariables = new string[] { },
|
---|
256 | TrainingEpisodes = new IntRange[] { new IntRange(0, 879) },
|
---|
257 | TestEpisodes = new IntRange[] { },
|
---|
258 | FileName = "real_linear_h_1_equidistant.txt",
|
---|
259 | NumberFormat = germanNumberFormat,
|
---|
260 | Separator = ';'
|
---|
261 | };
|
---|
262 | }
|
---|
263 | private DataDescriptor Pendulum() {
|
---|
264 | return new DataDescriptor {
|
---|
265 | Name = "Pendulum (simulated)",
|
---|
266 | Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
|
---|
267 | TargetVariables = new[] { "theta", "omega1" },
|
---|
268 | InputVariables = new string[] { },
|
---|
269 | TrainingEpisodes = new IntRange[] { new IntRange(0, 502) },
|
---|
270 | TestEpisodes = new IntRange[] { },
|
---|
271 | FileName = "pendulum_h_1_equidistant.txt",
|
---|
272 | NumberFormat = germanNumberFormat,
|
---|
273 | Separator = ';'
|
---|
274 | };
|
---|
275 | }
|
---|
276 | private DataDescriptor RealPendulum() {
|
---|
277 | return new DataDescriptor {
|
---|
278 | Name = "Pendulum (motion-tracked)",
|
---|
279 | Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
|
---|
280 | TargetVariables = new[] { "theta", "omega" },
|
---|
281 | InputVariables = new string[] { },
|
---|
282 | TrainingEpisodes = new IntRange[] { new IntRange(0, 568) },
|
---|
283 | TestEpisodes = new IntRange[] { },
|
---|
284 | FileName = "real_pend_h_1_equidistant.txt",
|
---|
285 | NumberFormat = germanNumberFormat,
|
---|
286 | Separator = ';'
|
---|
287 | };
|
---|
288 | }
|
---|
289 | private DataDescriptor DoubleOscillator() {
|
---|
290 | return new DataDescriptor {
|
---|
291 | Name = "Double Oscillator (simulated)",
|
---|
292 | Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
|
---|
293 | TargetVariables = new[] { "x1", "x2", "v1", "v2" },
|
---|
294 | InputVariables = new string[] { },
|
---|
295 | TrainingEpisodes = new IntRange[] { new IntRange(0, 200) },
|
---|
296 | TestEpisodes = new IntRange[] { },
|
---|
297 | FileName = "double_linear_h_1_equidistant.txt",
|
---|
298 | NumberFormat = germanNumberFormat,
|
---|
299 | Separator = ';'
|
---|
300 | };
|
---|
301 | }
|
---|
302 | private DataDescriptor RealDoubleOscillator() {
|
---|
303 | return new DataDescriptor {
|
---|
304 | Name = "Double Oscillator (motion-tracked)",
|
---|
305 | Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
|
---|
306 | TargetVariables = new[] { "x1", "x2", "v1", "v2" },
|
---|
307 | InputVariables = new string[] { },
|
---|
308 | TrainingEpisodes = new IntRange[] { new IntRange(0, 150) },
|
---|
309 | TestEpisodes = new IntRange[] { },
|
---|
310 | FileName = "real_double_linear_h_1_equidistant.txt",
|
---|
311 | NumberFormat = germanNumberFormat,
|
---|
312 | Separator = ';'
|
---|
313 | };
|
---|
314 | }
|
---|
315 | private DataDescriptor DoublePendulum() {
|
---|
316 | return new DataDescriptor {
|
---|
317 | Name = "Double Pendulum (simulated)",
|
---|
318 | Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
|
---|
319 | TargetVariables = new[] { "theta1", "theta2", "omega1", "omega2" },
|
---|
320 | InputVariables = new string[] { },
|
---|
321 | TrainingEpisodes = new IntRange[] { new IntRange(0, 1355) },
|
---|
322 | TestEpisodes = new IntRange[] {new IntRange(1355, 2660) },
|
---|
323 | FileName = "double_pend_h_1_equidistant.txt",
|
---|
324 | NumberFormat = germanNumberFormat,
|
---|
325 | Separator = ';'
|
---|
326 | };
|
---|
327 | }
|
---|
328 | private DataDescriptor RealDoublePendulum() {
|
---|
329 | return new DataDescriptor {
|
---|
330 | Name = "Double Pendulum (motion-tracked)",
|
---|
331 | Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
|
---|
332 | TargetVariables = new[] { "theta1", "theta2", "omega1", "omega2" },
|
---|
333 | InputVariables = new string[] { },
|
---|
334 | TrainingEpisodes = new IntRange[] { new IntRange(0, 200) },
|
---|
335 | TestEpisodes = new IntRange[] {new IntRange(886, 1731) },
|
---|
336 | FileName = "real_double_pend_h_1_equidistant.txt",
|
---|
337 | NumberFormat = germanNumberFormat,
|
---|
338 | Separator = ';'
|
---|
339 | };
|
---|
340 | }
|
---|
341 |
|
---|
342 | public override Problem LoadData(IDataDescriptor id) {
|
---|
343 | var descriptor = (DataDescriptor)id;
|
---|
344 |
|
---|
345 | var instanceArchiveName = GetResourceName(descriptor.FileName + @"\.zip");
|
---|
346 | using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read)) {
|
---|
347 | var entry = instancesZipFile.GetEntry(descriptor.FileName);
|
---|
348 | NumberFormatInfo numberFormat = descriptor.NumberFormat;
|
---|
349 | DateTimeFormatInfo dateFormat = System.Globalization.DateTimeFormatInfo.InvariantInfo ;
|
---|
350 | char separator = descriptor.Separator;
|
---|
351 | //using (Stream stream = entry.Open()) {
|
---|
352 | // TableFileParser.DetermineFileFormat(stream, out numberFormat, out dateFormat, out separator);
|
---|
353 | //}
|
---|
354 |
|
---|
355 | TableFileParser csvFileParser = new TableFileParser();
|
---|
356 | using (Stream stream = entry.Open()) {
|
---|
357 | csvFileParser.Parse(stream, numberFormat, dateFormat, separator, true);
|
---|
358 | }
|
---|
359 |
|
---|
360 | Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
|
---|
361 |
|
---|
362 | dataset.Name = descriptor.FileName;
|
---|
363 | dataset.Description = descriptor.Description;
|
---|
364 | // using a RegressionProblemData is suboptimal here --> TODO introduce a new datatype and refactor the whole problem
|
---|
365 | var problemData = new RegressionProblemData(dataset, descriptor.InputVariables, descriptor.TargetVariables.First());
|
---|
366 | problemData.Name = descriptor.Name;
|
---|
367 | problemData.Description = descriptor.Description;
|
---|
368 | problemData.TrainingPartition.Start = 0;
|
---|
369 | problemData.TrainingPartition.End = 0;
|
---|
370 | problemData.TestPartition.Start = 0;
|
---|
371 | problemData.TestPartition.End = 0;
|
---|
372 |
|
---|
373 | var problem = new Problem();
|
---|
374 | problem.Name = descriptor.Name;
|
---|
375 | problem.Description = descriptor.Description;
|
---|
376 | problem.ProblemData = problemData;
|
---|
377 | foreach (var ep in descriptor.TrainingEpisodes) problem.TrainingEpisodes.Add((IntRange)ep.Clone());
|
---|
378 | foreach (var targetVar in problem.TargetVariables) {
|
---|
379 | problem.TargetVariables.SetItemCheckedState(targetVar, descriptor.TargetVariables.Contains(targetVar.Value));
|
---|
380 | }
|
---|
381 | return problem;
|
---|
382 | }
|
---|
383 | }
|
---|
384 |
|
---|
385 | protected virtual string GetResourceName(string fileName) {
|
---|
386 | return GetType().Assembly.GetManifestResourceNames()
|
---|
387 | .Where(x => Regex.Match(x, @".*\.Instances\." + fileName).Success).SingleOrDefault();
|
---|
388 | }
|
---|
389 | }
|
---|
390 | }
|
---|