Changeset 17418


Ignore:
Timestamp:
02/03/20 17:25:38 (2 weeks ago)
Author:
pfleck
Message:

#3040

  • (partially) enabled data preprocessing for vectorial data
  • use flat zip-files for large benchmarks instead of embedded resources (faster build times)
  • added multiple variants of vector benchmark I (vector lengh constraints)
Location:
branches/3040_VectorBasedGP
Files:
1 deleted
11 edited
1 moved

Legend:

Unmodified
Added
Removed
  • branches/3040_VectorBasedGP/HeuristicLab.DataPreprocessing/3.4/Data/PreprocessingData.cs

    r17180 r17418  
    407407        } else if (dataset.VariableHasType<DateTime>(variableName)) {
    408408          variableValues.Insert(columnIndex, dataset.GetDateTimeValues(variableName).ToList());
     409        } else if (dataset.VariableHasType<DoubleVector>(variableName)) {
     410          variableValues.Insert(columnIndex, dataset.GetDoubleVectorValues(variableName).ToList());
    409411        } else {
    410412          throw new ArgumentException("The datatype of column " + variableName + " must be of type double, string or DateTime");
     
    681683      l = 0;
    682684      ir = n - 1;
    683       for (;;) {
     685      for (; ; ) {
    684686        if (ir <= l + 1) {
    685687          // Active partition contains 1 or 2 elements.
     
    706708          j = ir;
    707709          a = arr[l + 1]; // Partitioning element.
    708           for (;;) { // Beginning of innermost loop.
     710          for (; ; ) { // Beginning of innermost loop.
    709711            do i++; while (arr[i].CompareTo(a) < 0); // Scan up to find element > a.
    710712            do j--; while (arr[j].CompareTo(a) > 0); // Scan down to find element < a.
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Vectors/DoubleVector.cs

    r17400 r17418  
    8585
    8686    public double Sum() {
    87       return values.Sum();
     87      return Values.Sum();
    8888    }
    8989
    9090    public double Mean() {
    91       return values.Average();
     91      return Values.Average();
    9292    }
    9393
    9494    public DoubleVector CumulativeMean() {
     95      // todo: zero range average throws exception
    9596      return new DoubleVector(
    9697        Enumerable.Range(0, this.Count)
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Vectors/Vector.cs

    r17367 r17418  
    2020#endregion
    2121
     22using System;
    2223using System.Collections;
    2324using System.Collections.Generic;
     
    2930  public abstract class Vector<T> : IVector<T> {
    3031    [Storable]
    31     protected readonly List<T> values;
     32    protected readonly List<T> Values;
    3233
    3334    protected Vector(IEnumerable<T> values) {
    34       this.values = values.ToList();
     35      this.Values = values.ToList();
    3536    }
    3637
     
    3839    protected Vector(StorableConstructorFlag _) { }
    3940
     41    public override string ToString() {
     42      const int maxCount = 10;
     43      string extension = Values.Count > maxCount ? ", ..." : "";
     44      return $"[{string.Join(", ", Values.Cast<object>().Take(Math.Min(Values.Count, maxCount)))}{extension}]";
     45    }
    4046
    4147    #region Interface members
    4248
    4349    public int Count {
    44       get { return values.Count; }
     50      get { return Values.Count; }
    4551    }
    4652
    4753    public T this[int index] {
    48       get { return values[index]; }
     54      get { return Values[index]; }
    4955    }
    5056
    5157    public IEnumerator<T> GetEnumerator() {
    52       return values.GetEnumerator();
     58      return Values.GetEnumerator();
    5359    }
    5460
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/TimeSeries/TimeSeriesInstanceProvider.cs

    r17403 r17418  
    191191
    192192    private ZipArchive OpenZipArchive() {
    193       var instanceArchiveName = GetResourceName(FileName + @"\.zip");
    194       return new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read);
     193      var instanceArchiveName = Path.Combine("Classification", "Data", FileName + ".zip");
     194      var stream = new FileStream(instanceArchiveName, FileMode.Open, FileAccess.Read, FileShare.Read);
     195      return new ZipArchive(stream, ZipArchiveMode.Read);
    195196    }
    196197
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj

    r17415 r17418  
    257257    <Compile Include="Regression\VectorData\AzzaliBenchmark1.cs" />
    258258    <Compile Include="Regression\VectorData\RandomExtensions.cs" />
    259     <Compile Include="Regression\VectorData\VariousInstanceProvider.cs" />
     259    <Compile Include="Regression\VectorData\VectorDataInstanceProvider.cs" />
    260260    <Compile Include="Regression\VectorData\VectorDataTestOne.cs" />
    261261    <Compile Include="Regression\Vladislavleva\KotanchekFunction.cs" />
     
    275275  </ItemGroup>
    276276  <ItemGroup>
     277    <None Include="Classification\Data\TimeSeriesMultivariate.zip">
     278      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     279    </None>
    277280    <EmbeddedResource Include="Classification\Data\UCI.zip" />
    278     <EmbeddedResource Include="Classification\Data\TimeSeriesMultivariate.zip" />
    279     <EmbeddedResource Include="Classification\Data\TimeSeriesUnivariate.zip" />
     281    <None Include="Classification\Data\TimeSeriesUnivariate.zip">
     282      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     283    </None>
    280284    <None Include="HeuristicLab.snk" />
    281285    <None Include="Plugin.cs.frame" />
     
    284288    <EmbeddedResource Include="Regression\Data\MibaFriction.zip" />
    285289    <EmbeddedResource Include="Regression\Data\PennML.zip" />
    286     <EmbeddedResource Include="Regression\Data\UCITimeSeries.zip" />
     290    <None Include="Regression\Data\UCITimeSeries.zip">
     291      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     292    </None>
    287293  </ItemGroup>
    288294  <ItemGroup>
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/ResourceRegressionInstanceProvider.cs

    r17414 r17418  
    3636      var descriptor = (ResourceRegressionDataDescriptor)id;
    3737
    38       var instanceArchiveName = GetResourceName(FileName + @"\.zip");
    39       using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read)) {
     38      using (var instancesZipFile = new ZipArchive(OpenResourceStream(FileName), ZipArchiveMode.Read)) {
    4039        var entry = instancesZipFile.GetEntry(descriptor.ResourceName);
    4140        var formatOptions = GetFormatOptions(entry);
     
    6059    }
    6160
     61    protected virtual Stream OpenResourceStream(string fileName) {
     62      var instanceArchiveName = GetResourceName(FileName + @"\.zip");
     63      return GetType().Assembly.GetManifestResourceStream(instanceArchiveName);
     64    }
     65
    6266    protected virtual TableFileFormatOptions GetFormatOptions(ZipArchiveEntry entry) {
    6367      using (Stream stream = entry.Open()) {
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/UCITimeSeries/UCITimeSeriesProvider.cs

    r17415 r17418  
    2323using System.Collections.Generic;
    2424using System.Globalization;
     25using System.IO;
    2526using System.IO.Compression;
    2627
     
    5657    }
    5758
     59    protected override Stream OpenResourceStream(string fileName) {
     60      var instanceArchiveName = Path.Combine("Regression", "Data", fileName + ".zip");
     61      return new FileStream(instanceArchiveName, FileMode.Open, FileAccess.Read, FileShare.Read);
     62    }
     63
    5864    protected override TableFileFormatOptions GetFormatOptions(ZipArchiveEntry entry) {
    5965      return new TableFileFormatOptions {
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VectorData/AzzaliBenchmark1.cs

    r17400 r17418  
    1414
    1515    protected override string TargetVariable { get { return "B1"; } }
    16     protected override string[] VariableNames { get { return new string[] { "X1", "X2", "X3", "X4" }; } }
     16    protected override string[] VariableNames { get { return AllowedInputVariables.Concat(new[] { TargetVariable }).ToArray(); } }
    1717    protected override string[] AllowedInputVariables { get { return new string[] { "X1", "X2", "X3", "X4" }; } }
    1818    protected override int TrainingPartitionStart { get { return 0; } }
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VectorData/AzzaliBenchmark2.cs

    r17400 r17418  
    1414
    1515    protected override string TargetVariable { get { return "B2"; } }
    16     protected override string[] VariableNames { get { return new string[] { "X1", "X2", "X3", "X4" }; } }
     16    protected override string[] VariableNames { get { return AllowedInputVariables.Concat(new[] { TargetVariable }).ToArray(); } }
    1717    protected override string[] AllowedInputVariables { get { return new string[] { "X1", "X2", "X3", "X4" }; } }
    1818    protected override int TrainingPartitionStart { get { return 0; } }
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VectorData/AzzaliBenchmark3.cs

    r17400 r17418  
    1414
    1515    protected override string TargetVariable { get { return "B3"; } }
    16     protected override string[] VariableNames { get { return new string[] { "X1", "X2", "X3", "X4" }; } }
     16    protected override string[] VariableNames { get { return AllowedInputVariables.Concat(new[] { TargetVariable }).ToArray(); } }
    1717    protected override string[] AllowedInputVariables { get { return new string[] { "X1", "X2", "X3", "X4" }; } }
    1818    protected override int TrainingPartitionStart { get { return 0; } }
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VectorData/AzzaliKorns5.cs

    r17400 r17418  
    1414
    1515    protected override string TargetVariable { get { return "K5"; } }
    16     protected override string[] VariableNames { get { return new string[] { "X1", "X2", "X3", "X4" }; } }
     16    protected override string[] VariableNames { get { return AllowedInputVariables.Concat(new[] { TargetVariable }).ToArray(); } }
    1717    protected override string[] AllowedInputVariables { get { return new string[] { "X1", "X2", "X3", "X4" }; } }
    1818    protected override int TrainingPartitionStart { get { return 0; } }
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VectorData/VectorDataInstanceProvider.cs

    r17417 r17418  
    5050      var rand = new MersenneTwister((uint)Seed);
    5151      return new List<IDataDescriptor> {
    52         new VectorDataTestOne(rand.Next()),
    53         new AzzaliKorns5(rand.Next()),
     52        new VectorDataTestOneA(rand.Next()),
     53        new VectorDataTestOneB(rand.Next()),
     54        new VectorDataTestOneC(rand.Next()),
     55        new VectorDataTestOneD(rand.Next()),
    5456        new AzzaliBenchmark1(rand.Next()),
    5557        new AzzaliBenchmark2(rand.Next()),
    56         new AzzaliBenchmark3(rand.Next())
     58        new AzzaliBenchmark3(rand.Next()),
     59        new AzzaliKorns5(rand.Next())
    5760      };
    5861    }
Note: See TracChangeset for help on using the changeset viewer.