Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
10/02/12 09:49:43 (12 years ago)
Author:
sforsten
Message:

#1942:

  • added csv import dialog for regression
  • improved existing dialog (tool tip, design, preview of dataset)
Location:
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3
Files:
1 added
6 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8701 r8715  
    7676        }
    7777      } else {
    78         allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));
     78        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
    7979      }
    8080
     
    9696      List<IList> values = csvFileParser.Values;
    9797      if (type.Shuffle) {
    98         values = Shuffle(values);
     98        values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)),
     99                         type.Training, out trainingPartEnd);
    99100      }
    100101
    101102      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
    102       string targetVar = dataset.DoubleVariables.Last();
    103103
    104104      // turn of input variables that are constant in the training partition
    105105      var allowedInputVars = new List<string>();
    106106      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
    107       foreach (var variableName in dataset.DoubleVariables) {
    108         if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
    109           variableName != targetVar)
    110           allowedInputVars.Add(variableName);
    111       }
    112 
    113       ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     107      if (trainingIndizes.Count() >= 2) {
     108        foreach (var variableName in dataset.DoubleVariables) {
     109          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     110            variableName != type.TargetVariable)
     111            allowedInputVars.Add(variableName);
     112        }
     113      } else {
     114        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
     115      }
     116
     117      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable);
    114118
    115119      classificationData.TrainingPartition.Start = 0;
     
    123127    }
    124128
    125     protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, int trainingPartEnd) {
    126       target = 5;
     129    protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, out int trainingPartEnd) {
    127130      IList targetValues = values[target];
    128131      var group = targetValues.Cast<double>().GroupBy(x => x).Select(g => new { Key = g.Key, Count = g.Count() }).ToList();
    129132      Dictionary<double, double> taken = new Dictionary<double, double>();
    130133      foreach (var classCount in group) {
    131         taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100;
     134        taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100.0;
    132135      }
    133136
     
    143146        }
    144147      }
     148
     149      trainingPartEnd = training.First().Count;
    145150
    146151      training = Shuffle(training);
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ClassificationImportType.cs

    r8701 r8715  
    2222namespace HeuristicLab.Problems.Instances.DataAnalysis {
    2323  public class ClassificationImportType : DataAnalysisImportType {
    24     public string Variable { get; set; }
     24    public string TargetVariable { get; set; }
    2525  }
    2626}
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs

    r8701 r8715  
    7575        }
    7676      } else {
    77         allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));
     77        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
    7878      }
    7979
     
    104104      int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100;
    105105      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
    106       foreach (var variableName in dataset.DoubleVariables) {
    107         if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
    108           variableName != targetVar)
    109           allowedInputVars.Add(variableName);
     106      if (trainingIndizes.Count() >= 2) {
     107        foreach (var variableName in dataset.DoubleVariables) {
     108          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     109            variableName != targetVar)
     110            allowedInputVars.Add(variableName);
     111        }
     112      } else {
     113        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
    110114      }
    111115
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj

    r8701 r8715  
    9898  </PropertyGroup>
    9999  <ItemGroup>
    100     <Reference Include="HeuristicLab.Collections-3.3">
    101       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Collections-3.3.dll</HintPath>
    102       <Private>False</Private>
    103     </Reference>
    104     <Reference Include="HeuristicLab.Common-3.3">
    105       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Common-3.3.dll</HintPath>
    106       <Private>False</Private>
    107     </Reference>
    108     <Reference Include="HeuristicLab.Core-3.3">
    109       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Core-3.3.dll</HintPath>
     100    <Reference Include="HeuristicLab.Common-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     101      <Private>False</Private>
     102    </Reference>
     103    <Reference Include="HeuristicLab.Core-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    110104      <Private>False</Private>
    111105    </Reference>
     
    164158    <Compile Include="Regression\ArtificialRegressionDataDescriptor.cs" />
    165159    <Compile Include="Regression\ArtificialRegressionInstanceProvider.cs" />
     160    <Compile Include="Regression\RegressionImportType.cs" />
    166161    <Compile Include="Regression\CSV\RegressionCSVInstanceProvider.cs" />
    167162    <Compile Include="Regression\Keijzer\KeijzerFunctionFourteen.cs" />
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs

    r8701 r8715  
    6767      var allowedInputVars = new List<string>();
    6868      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
    69       foreach (var variableName in dataset.DoubleVariables) {
    70         if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
    71           variableName != targetVar)
    72           allowedInputVars.Add(variableName);
     69      if (trainingIndizes.Count() >= 2) {
     70        foreach (var variableName in dataset.DoubleVariables) {
     71          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     72            variableName != targetVar)
     73            allowedInputVars.Add(variableName);
     74        }
     75      } else {
     76        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
    7377      }
    7478
     
    8690    }
    8791
    88     protected override IRegressionProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) {
     92    protected override IRegressionProblemData ImportData(string path, RegressionImportType type, TableFileParser csvFileParser) {
    8993      List<IList> values = csvFileParser.Values;
    9094      if (type.Shuffle) {
     
    9296      }
    9397      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
    94       string targetVar = dataset.DoubleVariables.Last();
    9598
    9699      // turn of input variables that are constant in the training partition
     
    102105        foreach (var variableName in dataset.DoubleVariables) {
    103106          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
    104             variableName != targetVar)
     107            variableName != type.TargetVariable)
    105108            allowedInputVars.Add(variableName);
    106109        }
    107110      } else {
    108         allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));
     111        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
    109112      }
    110113
    111       RegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
     114      RegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, type.TargetVariable);
    112115
    113116      regressionData.TrainingPartition.Start = 0;
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/RegressionInstanceProvider.cs

    r8701 r8715  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public abstract class RegressionInstanceProvider : DataAnalysisInstanceProvider<IRegressionProblemData, DataAnalysisImportType> {
     25  public abstract class RegressionInstanceProvider : DataAnalysisInstanceProvider<IRegressionProblemData, RegressionImportType> {
    2626  }
    2727}
Note: See TracChangeset for help on using the changeset viewer.