#region License Information /* HeuristicLab * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.IO.Compression; using System.Linq; using System.Text.RegularExpressions; using HeuristicLab.Data; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DynamicalSystemsModelling.Instances; using HeuristicLab.Problems.Instances; using HeuristicLab.Problems.Instances.DataAnalysis; namespace HeuristicLab.Problems.DynamicalSystemsModelling { public class ProblemInstanceProvider : ProblemInstanceProvider { private static readonly NumberFormatInfo germanNumberFormat = CultureInfo.GetCultureInfo("de-DE").NumberFormat; public override string Name { get { return "Dynamic Systems"; } } public override string Description { get { return "A set of problem instances for dynamical modelling."; } } public override Uri WebLink { get { return null; } } public override string ReferencePublication { get { return ""; } } public override IEnumerable GetDataDescriptors() { List descriptorList = new List(); descriptorList.Add(BacterialRespiration()); descriptorList.Add(BarMagnets()); descriptorList.Add(ChemicalReaction()); descriptorList.Add(E_Cell()); descriptorList.Add(Glider()); descriptorList.Add(LotkaVolterra()); descriptorList.Add(PredatorPrey()); descriptorList.Add(S_System()); descriptorList.Add(ShearFlow()); descriptorList.Add(ThreeSpeciesLotkaVolterra()); descriptorList.Add(VanDerPol()); descriptorList.Add(Oscillator()); descriptorList.Add(RealOscillator()); descriptorList.Add(Pendulum()); descriptorList.Add(RealPendulum()); descriptorList.Add(DoubleOscillator()); descriptorList.Add(RealDoubleOscillator()); descriptorList.Add(DoublePendulum()); descriptorList.Add(RealDoublePendulum()); return descriptorList; } private DataDescriptor BacterialRespiration() { return new DataDescriptor { Name = "Bacterial Respiration", Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.", TargetVariables = new[] { "y1", "y2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 100) }, TestEpisodes = new IntRange[] { }, FileName = "bacterial_1.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor BarMagnets() { return new DataDescriptor { Name = "Bar Magnets", Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.", TargetVariables = new[] { "y1", "y2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 100) }, TestEpisodes = new IntRange[] { }, FileName = "bar_magnets_1.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor ChemicalReaction() { return new DataDescriptor { Name = "ChemicalReaction", Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468", TargetVariables = new[] { "y1", "y2", "y3" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 101) }, TestEpisodes = new IntRange[] { }, FileName = "ChemicalReaction.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor E_Cell() { return new DataDescriptor { Name = "E-CELL", Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468", TargetVariables = new[] { "y1", "y2", "y3" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 41) }, TestEpisodes = new IntRange[] { }, FileName = "E-CELL.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor Glider() { return new DataDescriptor { Name = "Glider", Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.", TargetVariables = new[] { "y1", "y2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 100) }, TestEpisodes = new IntRange[] { }, FileName = "Glider_1.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor LotkaVolterra() { return new DataDescriptor { Name = "Lotka-Volterra", Description = "Publication: Gaucel et al.: Learning Dynamical Systems using Standard Symbolic Regression, Evostar 2014.", TargetVariables = new[] { "y1", "y2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 193) }, TestEpisodes = new IntRange[] { new IntRange(193, 300) }, FileName = "LotkaVolterra.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor PredatorPrey() { return new DataDescriptor { Name = "Predator Prey", Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.", TargetVariables = new[] { "y1", "y2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 100) }, TestEpisodes = new IntRange[] { }, FileName = "predator_prey_1.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor ShearFlow() { return new DataDescriptor { Name = "Shear Flow", Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.", TargetVariables = new[] { "y1", "y2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 100) }, TestEpisodes = new IntRange[] { }, FileName = "shear_flow_1.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor S_System() { return new DataDescriptor { Name = "S-System", Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468", TargetVariables = new[] { "y1", "y2", "y3", "y4", "y5" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 31), new IntRange(31, 62), new IntRange(62, 93) }, TestEpisodes = new IntRange[] { }, FileName = "S-System.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor ThreeSpeciesLotkaVolterra() { return new DataDescriptor { Name = "Lotka Volterra (three species)", Description = "Publication: H. Iba, E. Sakamoto: Inference of Differential Equation Models by Genetic Programming, Information Sciences Volume 178, Issue 23, 1 December 2008, Pages 4453 - 4468", TargetVariables = new[] { "y1", "y2", "y3" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 100)}, TestEpisodes = new IntRange[] { }, FileName = "ThreeLotkaVolterra.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor VanDerPol() { return new DataDescriptor { Name = "Van der Pol Oscillator", Description = "Publication: M. Schmidt, H. Lipson; Data-Mining Dynamical Systems: Automated Symbolic System Identification for Exploratory Analysis, ESDA 2008.", TargetVariables = new[] { "y1", "y2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 100) }, TestEpisodes = new IntRange[] { }, FileName = "van_der_pol_1.csv", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor Oscillator() { return new DataDescriptor { Name = "Linear Oscillator (simulation)", Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.", TargetVariables = new[] { "x", "v" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 512) }, TestEpisodes = new IntRange[] { }, FileName = "linear_h_1_equidistant.txt", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor RealOscillator() { return new DataDescriptor { Name = "Linear Oscillator (motion-tracked)", Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.", TargetVariables = new[] { "x", "v" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 879) }, TestEpisodes = new IntRange[] { }, FileName = "real_linear_h_1_equidistant.txt", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor Pendulum() { return new DataDescriptor { Name = "Pendulum (simulated)", Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.", TargetVariables = new[] { "theta", "omega1" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 502) }, TestEpisodes = new IntRange[] { }, FileName = "pendulum_h_1_equidistant.txt", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor RealPendulum() { return new DataDescriptor { Name = "Pendulum (motion-tracked)", Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.", TargetVariables = new[] { "theta", "omega" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 568) }, TestEpisodes = new IntRange[] { }, FileName = "real_pend_h_1_equidistant.txt", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor DoubleOscillator() { return new DataDescriptor { Name = "Double Oscillator (simulated)", Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.", TargetVariables = new[] { "x1", "x2", "v1", "v2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 200) }, TestEpisodes = new IntRange[] { }, FileName = "double_linear_h_1_equidistant.txt", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor RealDoubleOscillator() { return new DataDescriptor { Name = "Double Oscillator (motion-tracked)", Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.", TargetVariables = new[] { "x1", "x2", "v1", "v2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 150) }, TestEpisodes = new IntRange[] { }, FileName = "real_double_linear_h_1_equidistant.txt", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor DoublePendulum() { return new DataDescriptor { Name = "Double Pendulum (simulated)", Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.", TargetVariables = new[] { "theta1", "theta2", "omega1", "omega2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 1355) }, TestEpisodes = new IntRange[] {new IntRange(1355, 2660) }, FileName = "double_pend_h_1_equidistant.txt", NumberFormat = germanNumberFormat, Separator = ';' }; } private DataDescriptor RealDoublePendulum() { return new DataDescriptor { Name = "Double Pendulum (motion-tracked)", Description = "Publication: M. Schmidt, H. Lipson; Distilling Free-Form Natural Laws from Experimental Data, Science Apr 2009: Vol. 324, Issue 5923, pp. 81 - 85.", TargetVariables = new[] { "theta1", "theta2", "omega1", "omega2" }, InputVariables = new string[] { }, TrainingEpisodes = new IntRange[] { new IntRange(0, 200) }, TestEpisodes = new IntRange[] {new IntRange(886, 1731) }, FileName = "real_double_pend_h_1_equidistant.txt", NumberFormat = germanNumberFormat, Separator = ';' }; } public override Problem LoadData(IDataDescriptor id) { var descriptor = (DataDescriptor)id; var instanceArchiveName = GetResourceName(descriptor.FileName + @"\.zip"); using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read)) { var entry = instancesZipFile.GetEntry(descriptor.FileName); NumberFormatInfo numberFormat = descriptor.NumberFormat; DateTimeFormatInfo dateFormat = System.Globalization.DateTimeFormatInfo.InvariantInfo ; char separator = descriptor.Separator; //using (Stream stream = entry.Open()) { // TableFileParser.DetermineFileFormat(stream, out numberFormat, out dateFormat, out separator); //} TableFileParser csvFileParser = new TableFileParser(); using (Stream stream = entry.Open()) { csvFileParser.Parse(stream, numberFormat, dateFormat, separator, true); } Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); dataset.Name = descriptor.FileName; dataset.Description = descriptor.Description; // using a RegressionProblemData is suboptimal here --> TODO introduce a new datatype and refactor the whole problem var problemData = new RegressionProblemData(dataset, descriptor.InputVariables, descriptor.TargetVariables.First()); problemData.Name = descriptor.Name; problemData.Description = descriptor.Description; problemData.TrainingPartition.Start = 0; problemData.TrainingPartition.End = 0; problemData.TestPartition.Start = 0; problemData.TestPartition.End = 0; var problem = new Problem(); problem.Name = descriptor.Name; problem.Description = descriptor.Description; problem.ProblemData = problemData; foreach (var ep in descriptor.TrainingEpisodes) problem.TrainingEpisodes.Add((IntRange)ep.Clone()); foreach (var targetVar in problem.TargetVariables) { problem.TargetVariables.SetItemCheckedState(targetVar, descriptor.TargetVariables.Contains(targetVar.Value)); } return problem; } } protected virtual string GetResourceName(string fileName) { return GetType().Assembly.GetManifestResourceNames() .Where(x => Regex.Match(x, @".*\.Instances\." + fileName).Success).SingleOrDefault(); } } }