Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/25/12 16:16:23 (12 years ago)
Author:
sforsten
Message:

#1942:

  • add combo boxes to DataAnalysisImportTypeDialog to select csv settings
  • get branch ready
Location:
branches/DataAnalysisCSVImport
Files:
2 added
9 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysisCSVImport

    • Property svn:ignore set to
      *.suo
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8693 r8701  
    9292    }
    9393
    94     public override IClassificationProblemData ImportData(string path, DataAnalysisImportType type) {
    95       TableFileParser csvFileParser = new TableFileParser();
    96       csvFileParser.Parse(path);
    97 
     94    protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) {
    9895      int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100;
    9996      List<IList> values = csvFileParser.Values;
     
    126123    }
    127124
     125    protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, int trainingPartEnd) {
     126      target = 5;
     127      IList targetValues = values[target];
     128      var group = targetValues.Cast<double>().GroupBy(x => x).Select(g => new { Key = g.Key, Count = g.Count() }).ToList();
     129      Dictionary<double, double> taken = new Dictionary<double, double>();
     130      foreach (var classCount in group) {
     131        taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100;
     132      }
     133
     134      List<IList> training = GetListOfIListCopy(values);
     135      List<IList> test = GetListOfIListCopy(values);
     136
     137      for (int i = 0; i < targetValues.Count; i++) {
     138        if (taken[(double)targetValues[i]] > 0) {
     139          AddRow(training, values, i);
     140          taken[(double)targetValues[i]]--;
     141        } else {
     142          AddRow(test, values, i);
     143        }
     144      }
     145
     146      training = Shuffle(training);
     147      test = Shuffle(test);
     148      for (int i = 0; i < training.Count; i++) {
     149        for (int j = 0; j < test[i].Count; j++) {
     150          training[i].Add(test[i][j]);
     151        }
     152      }
     153
     154      return training;
     155    }
     156
     157    private void AddRow(List<IList> destination, List<IList> source, int index) {
     158      for (int i = 0; i < source.Count; i++) {
     159        destination[i].Add(source[i][index]);
     160      }
     161    }
     162
     163    private List<IList> GetListOfIListCopy(List<IList> values) {
     164      List<IList> newList = new List<IList>(values.Count);
     165      for (int col = 0; col < values.Count; col++) {
     166
     167        if (values[col] is List<double>)
     168          newList.Add(new List<double>());
     169        else if (values[col] is List<DateTime>)
     170          newList.Add(new List<DateTime>());
     171        else if (values[col] is List<string>)
     172          newList.Add(new List<string>());
     173        else
     174          throw new InvalidOperationException();
     175      }
     176      return newList;
     177    }
     178
     179    private List<IList> NormalizeClasses(List<IList> values) {
     180      int column = GetLastDoubleColumn(values);
     181      Dictionary<object, int> count = new Dictionary<object, int>();
     182      foreach (var item in values[column]) {
     183        if (count.Keys.Contains(item)) {
     184          count[item]++;
     185        } else {
     186          count.Add(item, 1);
     187        }
     188      }
     189      int min = count.Values.Min();
     190      Dictionary<object, int> taken = new Dictionary<object, int>();
     191      foreach (var key in count.Keys) {
     192        taken[key] = 0;
     193      }
     194      List<IList> normalizedValues = new List<IList>(values.Count);
     195      for (int col = 0; col < values.Count; col++) {
     196
     197        if (values[col] is List<double>)
     198          normalizedValues.Add(new List<double>());
     199        else if (values[col] is List<DateTime>)
     200          normalizedValues.Add(new List<DateTime>());
     201        else if (values[col] is List<string>)
     202          normalizedValues.Add(new List<string>());
     203        else
     204          throw new InvalidOperationException();
     205      }
     206      for (int i = 0; i < values.First().Count; i++) {
     207        if (taken[values[column][i]] < min) {
     208          taken[values[column][i]]++;
     209          for (int col = 0; col < values.Count; col++) {
     210            normalizedValues[col].Add(values[col][i]);
     211          }
     212        }
     213      }
     214      return normalizedValues;
     215    }
     216
     217    private int GetLastDoubleColumn(List<IList> values) {
     218      for (int i = values.Count - 1; i >= 0; i--) {
     219        if (values[i] is List<double>) {
     220          return i;
     221        }
     222      }
     223      throw new ArgumentException("No possible Target Variable could be found!");
     224    }
     225
    128226    public override bool CanExportData {
    129227      get { return true; }
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ClassificationInstanceProvider.cs

    r8598 r8701  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public abstract class ClassificationInstanceProvider : DataAnalysisInstanceProvider<IClassificationProblemData> {
     25  public abstract class ClassificationInstanceProvider : DataAnalysisInstanceProvider<IClassificationProblemData, ClassificationImportType> {
    2626  }
    2727}
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs

    r8685 r8701  
    9191    }
    9292
    93     public override IClusteringProblemData ImportData(string path, DataAnalysisImportType type) {
    94       TableFileParser csvFileParser = new TableFileParser();
    95       csvFileParser.Parse(path);
    96 
     93    protected override IClusteringProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) {
    9794      List<IList> values = csvFileParser.Values;
    9895      if (type.Shuffle) {
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/ClusteringInstanceProvider.cs

    r8598 r8701  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public abstract class ClusteringInstanceProvider : DataAnalysisInstanceProvider<IClusteringProblemData> {
     25  public abstract class ClusteringInstanceProvider : DataAnalysisInstanceProvider<IClusteringProblemData, DataAnalysisImportType> {
    2626  }
    2727}
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/DataAnalysisInstanceProvider.cs

    r8598 r8701  
    2727
    2828namespace HeuristicLab.Problems.Instances.DataAnalysis {
    29   public abstract class DataAnalysisInstanceProvider<TData> : ProblemInstanceProvider<TData>
    30     where TData : class, IDataAnalysisProblemData {
     29  public abstract class DataAnalysisInstanceProvider<TData, ImportType> : ProblemInstanceProvider<TData>
     30    where TData : class, IDataAnalysisProblemData
     31    where ImportType : DataAnalysisImportType {
    3132
    3233    // has to be implemented, if CanImportData is true
    33     public virtual TData ImportData(string path, DataAnalysisImportType type) {
     34    public TData ImportData(string path, ImportType type) {
     35      TableFileParser csvFileParser = new TableFileParser();
     36      csvFileParser.Parse(path);
     37      return ImportData(path, type, csvFileParser);
     38    }
     39    public TData ImportData(string path, ImportType type, DataAnalysisCSVFormat csvFormat) {
     40      TableFileParser csvFileParser = new TableFileParser();
     41      csvFileParser.Parse(path, csvFormat.NumberFormatInfo, csvFormat.DateTimeFormatInfo, csvFormat.Separator);
     42      return ImportData(path, type, csvFileParser);
     43    }
     44    protected virtual TData ImportData(string path, ImportType type, TableFileParser csvFileParser) {
    3445      throw new NotSupportedException();
    3546    }
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj

    r8693 r8701  
    1818    <DebugType>full</DebugType>
    1919    <Optimize>false</Optimize>
    20     <OutputPath>..\..\bin\</OutputPath>
     20    <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
    2121    <DefineConstants>DEBUG;TRACE</DefineConstants>
    2222    <ErrorReport>prompt</ErrorReport>
     
    2626    <DebugType>pdbonly</DebugType>
    2727    <Optimize>true</Optimize>
    28     <OutputPath>..\..\bin\</OutputPath>
     28    <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
    2929    <DefineConstants>TRACE</DefineConstants>
    3030    <ErrorReport>prompt</ErrorReport>
     
    3939  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
    4040    <DebugSymbols>true</DebugSymbols>
    41     <OutputPath>..\..\bin\</OutputPath>
     41    <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
    4242    <DefineConstants>DEBUG;TRACE</DefineConstants>
    4343    <DebugType>full</DebugType>
     
    5252  </PropertyGroup>
    5353  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
    54     <OutputPath>..\..\bin\</OutputPath>
     54    <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
    5555    <DefineConstants>TRACE</DefineConstants>
    5656    <Optimize>true</Optimize>
     
    6969  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
    7070    <DebugSymbols>true</DebugSymbols>
    71     <OutputPath>..\..\bin\</OutputPath>
     71    <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
    7272    <DefineConstants>DEBUG;TRACE</DefineConstants>
    7373    <DebugType>full</DebugType>
     
    8282  </PropertyGroup>
    8383  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
    84     <OutputPath>..\..\bin\</OutputPath>
     84    <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
    8585    <DefineConstants>TRACE</DefineConstants>
    8686    <Optimize>true</Optimize>
     
    9898  </PropertyGroup>
    9999  <ItemGroup>
     100    <Reference Include="HeuristicLab.Collections-3.3">
     101      <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Collections-3.3.dll</HintPath>
     102      <Private>False</Private>
     103    </Reference>
     104    <Reference Include="HeuristicLab.Common-3.3">
     105      <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Common-3.3.dll</HintPath>
     106      <Private>False</Private>
     107    </Reference>
     108    <Reference Include="HeuristicLab.Core-3.3">
     109      <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Core-3.3.dll</HintPath>
     110      <Private>False</Private>
     111    </Reference>
     112    <Reference Include="HeuristicLab.Data-3.3">
     113      <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Data-3.3.dll</HintPath>
     114      <Private>False</Private>
     115    </Reference>
     116    <Reference Include="HeuristicLab.PluginInfrastructure-3.3">
     117      <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.PluginInfrastructure-3.3.dll</HintPath>
     118      <Private>False</Private>
     119    </Reference>
     120    <Reference Include="HeuristicLab.Problems.DataAnalysis-3.4">
     121      <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath>
     122      <Private>False</Private>
     123    </Reference>
     124    <Reference Include="HeuristicLab.Problems.Instances-3.3">
     125      <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath>
     126      <Private>False</Private>
     127    </Reference>
     128    <Reference Include="HeuristicLab.Random-3.3">
     129      <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Random-3.3.dll</HintPath>
     130      <Private>False</Private>
     131    </Reference>
    100132    <Reference Include="ICSharpCode.SharpZipLib">
    101       <HintPath>..\..\HeuristicLab.PluginInfrastructure\3.3\ICSharpCode.SharpZipLib.dll</HintPath>
     133      <HintPath>..\..\..\..\trunk\sources\bin\ICSharpCode.SharpZipLib.dll</HintPath>
    102134      <Private>False</Private>
    103135    </Reference>
     
    117149    <Compile Include="Classification\ClassificationInstanceProvider.cs" />
    118150    <Compile Include="Classification\CSV\ClassifiactionCSVInstanceProvider.cs" />
     151    <Compile Include="Classification\ClassificationImportType.cs" />
    119152    <Compile Include="Classification\ResourceClassificationDataDescriptor.cs" />
    120153    <Compile Include="Classification\ResourceClassificationInstanceProvider.cs" />
     
    124157    <Compile Include="Clustering\ClusteringInstanceProvider.cs" />
    125158    <Compile Include="Clustering\CSV\ClusteringCSVInstanceProvider.cs" />
     159    <Compile Include="DataAnalysisCSVFormat.cs" />
    126160    <Compile Include="DataAnalysisImportType.cs" />
    127161    <Compile Include="DataAnalysisInstanceProvider.cs" />
     
    205239    <EmbeddedResource Include="Regression\Data\RegressionRealWorld.zip" />
    206240  </ItemGroup>
    207   <ItemGroup>
    208     <ProjectReference Include="..\..\HeuristicLab.Collections\3.3\HeuristicLab.Collections-3.3.csproj">
    209       <Project>{958B43BC-CC5C-4FA2-8628-2B3B01D890B6}</Project>
    210       <Name>HeuristicLab.Collections-3.3</Name>
    211       <Private>False</Private>
    212     </ProjectReference>
    213     <ProjectReference Include="..\..\HeuristicLab.Common\3.3\HeuristicLab.Common-3.3.csproj">
    214       <Project>{A9AD58B9-3EF9-4CC1-97E5-8D909039FF5C}</Project>
    215       <Name>HeuristicLab.Common-3.3</Name>
    216       <Private>False</Private>
    217     </ProjectReference>
    218     <ProjectReference Include="..\..\HeuristicLab.Core\3.3\HeuristicLab.Core-3.3.csproj">
    219       <Project>{C36BD924-A541-4A00-AFA8-41701378DDC5}</Project>
    220       <Name>HeuristicLab.Core-3.3</Name>
    221       <Private>False</Private>
    222     </ProjectReference>
    223     <ProjectReference Include="..\..\HeuristicLab.Data\3.3\HeuristicLab.Data-3.3.csproj">
    224       <Project>{BBAB9DF5-5EF3-4BA8-ADE9-B36E82114937}</Project>
    225       <Name>HeuristicLab.Data-3.3</Name>
    226       <Private>False</Private>
    227     </ProjectReference>
    228     <ProjectReference Include="..\..\HeuristicLab.PluginInfrastructure\3.3\HeuristicLab.PluginInfrastructure-3.3.csproj">
    229       <Project>{94186A6A-5176-4402-AE83-886557B53CCA}</Project>
    230       <Name>HeuristicLab.PluginInfrastructure-3.3</Name>
    231       <Private>False</Private>
    232     </ProjectReference>
    233     <ProjectReference Include="..\..\HeuristicLab.Problems.DataAnalysis\3.4\HeuristicLab.Problems.DataAnalysis-3.4.csproj">
    234       <Project>{DF87C13E-A889-46FF-8153-66DCAA8C5674}</Project>
    235       <Name>HeuristicLab.Problems.DataAnalysis-3.4</Name>
    236       <Private>False</Private>
    237     </ProjectReference>
    238     <ProjectReference Include="..\..\HeuristicLab.Problems.Instances\3.3\HeuristicLab.Problems.Instances-3.3.csproj">
    239       <Project>{3540E29E-4793-49E7-8EE2-FEA7F61C3994}</Project>
    240       <Name>HeuristicLab.Problems.Instances-3.3</Name>
    241       <Private>False</Private>
    242     </ProjectReference>
    243     <ProjectReference Include="..\..\HeuristicLab.Random\3.3\HeuristicLab.Random-3.3.csproj">
    244       <Project>{F4539FB6-4708-40C9-BE64-0A1390AEA197}</Project>
    245       <Name>HeuristicLab.Random-3.3</Name>
    246       <Private>False</Private>
    247     </ProjectReference>
    248   </ItemGroup>
    249241  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
    250242  <PropertyGroup>
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs

    r8685 r8701  
    8686    }
    8787
    88     public override IRegressionProblemData ImportData(string path, DataAnalysisImportType type) {
    89       TableFileParser csvFileParser = new TableFileParser();
    90       csvFileParser.Parse(path);
    91 
     88    protected override IRegressionProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) {
    9289      List<IList> values = csvFileParser.Values;
    9390      if (type.Shuffle) {
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/RegressionInstanceProvider.cs

    r8598 r8701  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public abstract class RegressionInstanceProvider : DataAnalysisInstanceProvider<IRegressionProblemData> {
     25  public abstract class RegressionInstanceProvider : DataAnalysisInstanceProvider<IRegressionProblemData, DataAnalysisImportType> {
    2626  }
    2727}
Note: See TracChangeset for help on using the changeset viewer.