Changeset 18097


Ignore:
Timestamp:
11/30/21 15:49:02 (7 months ago)
Author:
pfleck
Message:

#3040 Added SOP instances from csv file with vectors.

Location:
branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3
Files:
2 added
3 edited

Legend:

Unmodified
Added
Removed
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj

    r18096 r18097  
    426426      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    427427    </None>
     428    <EmbeddedResource Include="SegmentOptimization\Data\SOPData.zip" />
    428429  </ItemGroup>
    429430  <ItemGroup>
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/SegmentOptimization/SOPDataDescriptor.cs

    r18096 r18097  
    2020#endregion
    2121
     22using System.Collections.Specialized;
     23using HeuristicLab.Data;
    2224using HeuristicLab.Problems.Instances.Types;
    2325
     
    2628    public string Name { get; internal set; }
    2729    public string Description { get; internal set; }
    28     //internal string InstanceIdentifier { get; set; }
     30
    2931    internal SOPData Data { get; set; }
    30 
    31     //internal SOPDataDescriptor(string name, string description, string instanceIdentifier) {
    32     //  Name = name;
    33     //  Description = description;
    34     //  InstanceIdentifier = instanceIdentifier;
    35     //}
    3632    internal SOPDataDescriptor(string name, string description, SOPData data) {
    3733      data.Name = name;
     
    4238      Data = data;
    4339    }
     40
     41    internal string VariableName { get; set; }
     42    internal int Row { get; set; }
     43    internal int  Lower { get; set; }
     44    internal int Upper { get; set; }
     45    internal string Aggregation { get; set; }
     46    internal SOPDataDescriptor(string name, string description, string variableName, int row, int lower, int upper, string aggregation) {
     47      Name = name;
     48      Description = description;
     49
     50      VariableName = variableName;
     51      Row = row;
     52      Lower = lower;
     53      Upper = upper;
     54      Aggregation = aggregation;
     55    }
    4456  }
    4557}
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/SegmentOptimization/SegmentOptimizationInstanceProvider.cs

    r18096 r18097  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Data;
     25using System.IO.Compression;
    2426using System.Linq;
     27using System.Reflection;
     28using System.Text.RegularExpressions;
     29using HeuristicLab.Problems.DataAnalysis;
    2530using HeuristicLab.Problems.Instances.Types;
    2631
    2732namespace HeuristicLab.Problems.Instances.DataAnalysis.SegmentOptimization {
    28   public class SegmentOptimizationInstanceProvider  : ProblemInstanceProvider<SOPData> {
     33  public class SegmentOptimizationInstanceProvider : ProblemInstanceProvider<SOPData> {
    2934
    3035    public override string Name {
    3136      get { return "Simple Generated"; }
    3237    }
     38
    3339    public override string Description {
    3440      get { return "Simple Generated"; }
     
    3743    public override string ReferencePublication => "";
    3844    public override Uri WebLink => null;
    39    
     45
    4046    public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
    4147      return new[] {
     
    5864      };
    5965    }
     66
    6067    public override SOPData LoadData(IDataDescriptor id) {
    6168      var descriptor = (SOPDataDescriptor)id;
     
    6370    }
    6471  }
     72
     73  public class SegmentOptimizationFileInstanceProvider : ProblemInstanceProvider<SOPData> {
     74    public override string Name {
     75      get { return "SOP File"; }
     76    }
     77
     78    public override string Description {
     79      get { return "SOP File"; }
     80    }
     81
     82    public override string ReferencePublication => "";
     83    public override Uri WebLink => null;
     84
     85    protected virtual string FileName => "SOPData";
     86    protected virtual string ZipEntryName => "GeneratedVectors.csv";
     87
     88    public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
     89      return new[] {
     90        new SOPDataDescriptor("v1", "", "v1", 0, 20, 60, "mean"),
     91        new SOPDataDescriptor("v2", "", "v2", 0, 20, 60, "mean"),
     92        new SOPDataDescriptor("v3", "", "v3", 0, 20, 60, "mean"),
     93        new SOPDataDescriptor("v4", "", "v4", 0, 20, 40, "mean"),
     94        new SOPDataDescriptor("v5", "", "v5", 0, 60, 80, "mean"),
     95        new SOPDataDescriptor("v6", "", "v6", 0, 40, 60, "mean"),
     96      };
     97    }
     98
     99    public override SOPData LoadData(IDataDescriptor id) {
     100      var descriptor = (SOPDataDescriptor)id;
     101      var instanceArchiveName = GetResourceName(FileName + @"\.zip");
     102
     103      var parser = new TableFileParser();
     104      var options = new TableFileFormatOptions {
     105        ColumnSeparator = ';',
     106        VectorSeparator = ','
     107      };
     108
     109      using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName))) {
     110        var entry = instancesZipFile.GetEntry(ZipEntryName);
     111        using (var stream = entry.Open()) {
     112          parser.Parse(stream, options, columnNamesInFirstLine: true);
     113
     114          var dataTable = new Dataset(parser.VariableNames, parser.Values);
     115          var instance = LoadInstance(dataTable, descriptor);
     116
     117          instance.Name = id.Name;
     118          instance.Description = id.Description;
     119
     120          return instance;
     121        }
     122      }
     123    }
     124
     125    private SOPData LoadInstance(IDataset dataset, SOPDataDescriptor descriptor) {
     126      var data = dataset.GetDoubleVectorValue(descriptor.VariableName, descriptor.Row);
     127
     128      return new SOPData {
     129        Values = data.ToArray(),
     130        Lower = descriptor.Lower,
     131        Upper = descriptor.Upper,
     132        Aggregation = descriptor.Aggregation
     133      };
     134    }
     135
     136    protected virtual string GetResourceName(string fileName) {
     137      return Assembly.GetExecutingAssembly().GetManifestResourceNames()
     138        .SingleOrDefault(x => Regex.Match(x, @".*\.Data\." + fileName).Success);
     139    }
     140
     141    protected virtual string GetInstanceDescription() {
     142      return "Embedded instance of plugin version " + Assembly.GetExecutingAssembly().GetCustomAttributes(typeof(AssemblyFileVersionAttribute), true).Cast<AssemblyFileVersionAttribute>().First().Version + ".";
     143    }
     144  }
     145
     146  public class SegmentOptimizationLargeFileInstanceProvider : SegmentOptimizationFileInstanceProvider {
     147    public override string Name {
     148      get { return "SOP Large File"; }
     149    }
     150
     151    public override string Description {
     152      get { return "SOP Large File"; }
     153    }
     154
     155    protected override string ZipEntryName => "GeneratedVectorsLarge.csv";
     156
     157    public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
     158      return base.GetDataDescriptors().Select(id => {
     159        var descriptor = (SOPDataDescriptor)id;
     160        descriptor.Lower *= 10;
     161        descriptor.Upper *= 10;
     162        return descriptor;
     163      });
     164    }
     165  }
    65166}
Note: See TracChangeset for help on using the changeset viewer.