#region License Information
/* HeuristicLab
* Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Text.RegularExpressions;
using HeuristicLab.Data;
using HeuristicLab.Problems.DataAnalysis;
using HeuristicLab.Problems.DynamicalSystemsModelling.Instances;
using HeuristicLab.Problems.Instances;
using HeuristicLab.Problems.Instances.DataAnalysis;
namespace HeuristicLab.Problems.DynamicalSystemsModelling {
public class ProblemInstanceProvider : ProblemInstanceProvider {
private static readonly NumberFormatInfo germanNumberFormat = CultureInfo.GetCultureInfo("de-DE").NumberFormat;
public override string Name {
get { return "Dynamic Systems"; }
}
public override string Description {
get {
return "A set of problem instances for dynamical modelling.";
}
}
public override Uri WebLink {
get { return null; }
}
public override string ReferencePublication {
get { return ""; }
}
public override IEnumerable GetDataDescriptors() {
List descriptorList = new List();
descriptorList.Add(BacterialRespiration());
descriptorList.Add(BarMagnets());
descriptorList.Add(ChemicalReaction());
descriptorList.Add(E_Cell());
descriptorList.Add(Glider());
descriptorList.Add(LotkaVolterra());
descriptorList.Add(PredatorPrey());
descriptorList.Add(S_System());
descriptorList.Add(ShearFlow());
descriptorList.Add(ThreeSpeciesLotkaVolterra());
descriptorList.Add(VanDerPol());
descriptorList.Add(Oscillator());
descriptorList.Add(RealOscillator());
descriptorList.Add(Pendulum());
descriptorList.Add(RealPendulum());
descriptorList.Add(DoubleOscillator());
descriptorList.Add(RealDoubleOscillator());
descriptorList.Add(DoublePendulum());
descriptorList.Add(RealDoublePendulum());
return descriptorList;
}
private DataDescriptor BacterialRespiration() {
return new DataDescriptor {
Name = "Bacterial Respiration",
Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
TargetVariables = new[] { "y1", "y2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
TestEpisodes = new IntRange[] { },
FileName = "bacterial_1.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor BarMagnets() {
return new DataDescriptor {
Name = "Bar Magnets",
Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
TargetVariables = new[] { "y1", "y2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
TestEpisodes = new IntRange[] { },
FileName = "bar_magnets_1.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor ChemicalReaction() {
return new DataDescriptor {
Name = "ChemicalReaction",
Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
TargetVariables = new[] { "y1", "y2", "y3" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 101) },
TestEpisodes = new IntRange[] { },
FileName = "ChemicalReaction.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor E_Cell() {
return new DataDescriptor {
Name = "E-CELL",
Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
TargetVariables = new[] { "y1", "y2", "y3" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 41) },
TestEpisodes = new IntRange[] { },
FileName = "E-CELL.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor Glider() {
return new DataDescriptor {
Name = "Glider",
Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
TargetVariables = new[] { "y1", "y2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
TestEpisodes = new IntRange[] { },
FileName = "Glider_1.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor LotkaVolterra() {
return new DataDescriptor {
Name = "Lotka-Volterra",
Description = "Publication: Gaucel et al.: Learning Dynamical Systems using Standard Symbolic Regression, Evostar 2014.",
TargetVariables = new[] { "y1", "y2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 193) },
TestEpisodes = new IntRange[] { new IntRange(193, 300) },
FileName = "LotkaVolterra.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor PredatorPrey() {
return new DataDescriptor {
Name = "Predator Prey",
Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
TargetVariables = new[] { "y1", "y2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
TestEpisodes = new IntRange[] { },
FileName = "predator_prey_1.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor ShearFlow() {
return new DataDescriptor {
Name = "Shear Flow",
Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
TargetVariables = new[] { "y1", "y2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
TestEpisodes = new IntRange[] { },
FileName = "shear_flow_1.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor S_System() {
return new DataDescriptor {
Name = "S-System",
Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
TargetVariables = new[] { "y1", "y2", "y3", "y4", "y5" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 31), new IntRange(31, 62), new IntRange(62, 93) },
TestEpisodes = new IntRange[] { },
FileName = "S-System.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor ThreeSpeciesLotkaVolterra() {
return new DataDescriptor {
Name = "Lotka Volterra (three species)",
Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468",
TargetVariables = new[] { "y1", "y2", "y3" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 100)},
TestEpisodes = new IntRange[] { },
FileName = "ThreeLotkaVolterra.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor VanDerPol() {
return new DataDescriptor {
Name = "Van der Pol Oscillator",
Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.",
TargetVariables = new[] { "y1", "y2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 100) },
TestEpisodes = new IntRange[] { },
FileName = "van_der_pol_1.csv",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor Oscillator() {
return new DataDescriptor {
Name = "Linear Oscillator (simulation)",
Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
TargetVariables = new[] { "x", "v" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 512) },
TestEpisodes = new IntRange[] { },
FileName = "linear_h_1_equidistant.txt",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor RealOscillator() {
return new DataDescriptor {
Name = "Linear Oscillator (motion-tracked)",
Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
TargetVariables = new[] { "x", "v" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 879) },
TestEpisodes = new IntRange[] { },
FileName = "real_linear_h_1_equidistant.txt",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor Pendulum() {
return new DataDescriptor {
Name = "Pendulum (simulated)",
Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
TargetVariables = new[] { "theta", "omega1" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 502) },
TestEpisodes = new IntRange[] { },
FileName = "pendulum_h_1_equidistant.txt",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor RealPendulum() {
return new DataDescriptor {
Name = "Pendulum (motion-tracked)",
Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
TargetVariables = new[] { "theta", "omega" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 568) },
TestEpisodes = new IntRange[] { },
FileName = "real_pend_h_1_equidistant.txt",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor DoubleOscillator() {
return new DataDescriptor {
Name = "Double Oscillator (simulated)",
Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
TargetVariables = new[] { "x1", "x2", "v1", "v2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 200) },
TestEpisodes = new IntRange[] { },
FileName = "double_linear_h_1_equidistant.txt",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor RealDoubleOscillator() {
return new DataDescriptor {
Name = "Double Oscillator (motion-tracked)",
Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
TargetVariables = new[] { "x1", "x2", "v1", "v2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 150) },
TestEpisodes = new IntRange[] { },
FileName = "real_double_linear_h_1_equidistant.txt",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor DoublePendulum() {
return new DataDescriptor {
Name = "Double Pendulum (simulated)",
Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
TargetVariables = new[] { "theta1", "theta2", "omega1", "omega2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 1355) },
TestEpisodes = new IntRange[] {new IntRange(1355, 2660) },
FileName = "double_pend_h_1_equidistant.txt",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
private DataDescriptor RealDoublePendulum() {
return new DataDescriptor {
Name = "Double Pendulum (motion-tracked)",
Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.",
TargetVariables = new[] { "theta1", "theta2", "omega1", "omega2" },
InputVariables = new string[] { },
TrainingEpisodes = new IntRange[] { new IntRange(0, 200) },
TestEpisodes = new IntRange[] {new IntRange(886, 1731) },
FileName = "real_double_pend_h_1_equidistant.txt",
NumberFormat = germanNumberFormat,
Separator = ';'
};
}
public override Problem LoadData(IDataDescriptor id) {
var descriptor = (DataDescriptor)id;
var instanceArchiveName = GetResourceName(descriptor.FileName + @"\.zip");
using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read)) {
var entry = instancesZipFile.GetEntry(descriptor.FileName);
NumberFormatInfo numberFormat = descriptor.NumberFormat;
DateTimeFormatInfo dateFormat = System.Globalization.DateTimeFormatInfo.InvariantInfo ;
char separator = descriptor.Separator;
//using (Stream stream = entry.Open()) {
// TableFileParser.DetermineFileFormat(stream, out numberFormat, out dateFormat, out separator);
//}
TableFileParser csvFileParser = new TableFileParser();
using (Stream stream = entry.Open()) {
csvFileParser.Parse(stream, numberFormat, dateFormat, separator, true);
}
Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
dataset.Name = descriptor.FileName;
dataset.Description = descriptor.Description;
// using a RegressionProblemData is suboptimal here --> TODO introduce a new datatype and refactor the whole problem
var problemData = new RegressionProblemData(dataset, descriptor.InputVariables, descriptor.TargetVariables.First());
problemData.Name = descriptor.Name;
problemData.Description = descriptor.Description;
problemData.TrainingPartition.Start = 0;
problemData.TrainingPartition.End = 0;
problemData.TestPartition.Start = 0;
problemData.TestPartition.End = 0;
var problem = new Problem();
problem.Name = descriptor.Name;
problem.Description = descriptor.Description;
problem.ProblemData = problemData;
foreach (var ep in descriptor.TrainingEpisodes) problem.TrainingEpisodes.Add((IntRange)ep.Clone());
foreach (var targetVar in problem.TargetVariables) {
problem.TargetVariables.SetItemCheckedState(targetVar, descriptor.TargetVariables.Contains(targetVar.Value));
}
return problem;
}
}
protected virtual string GetResourceName(string fileName) {
return GetType().Assembly.GetManifestResourceNames()
.Where(x => Regex.Match(x, @".*\.Instances\." + fileName).Success).SingleOrDefault();
}
}
}