Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
10/02/12 09:49:43 (12 years ago)
Author:
sforsten
Message:

#1942:

  • added csv import dialog for regression
  • improved existing dialog (tool tip, design, preview of dataset)
File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8701 r8715  
    7676        }
    7777      } else {
    78         allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));
     78        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
    7979      }
    8080
     
    9696      List<IList> values = csvFileParser.Values;
    9797      if (type.Shuffle) {
    98         values = Shuffle(values);
     98        values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)),
     99                         type.Training, out trainingPartEnd);
    99100      }
    100101
    101102      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
    102       string targetVar = dataset.DoubleVariables.Last();
    103103
    104104      // turn of input variables that are constant in the training partition
    105105      var allowedInputVars = new List<string>();
    106106      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
    107       foreach (var variableName in dataset.DoubleVariables) {
    108         if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
    109           variableName != targetVar)
    110           allowedInputVars.Add(variableName);
    111       }
    112 
    113       ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     107      if (trainingIndizes.Count() >= 2) {
     108        foreach (var variableName in dataset.DoubleVariables) {
     109          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     110            variableName != type.TargetVariable)
     111            allowedInputVars.Add(variableName);
     112        }
     113      } else {
     114        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
     115      }
     116
     117      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable);
    114118
    115119      classificationData.TrainingPartition.Start = 0;
     
    123127    }
    124128
    125     protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, int trainingPartEnd) {
    126       target = 5;
     129    protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, out int trainingPartEnd) {
    127130      IList targetValues = values[target];
    128131      var group = targetValues.Cast<double>().GroupBy(x => x).Select(g => new { Key = g.Key, Count = g.Count() }).ToList();
    129132      Dictionary<double, double> taken = new Dictionary<double, double>();
    130133      foreach (var classCount in group) {
    131         taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100;
     134        taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100.0;
    132135      }
    133136
     
    143146        }
    144147      }
     148
     149      trainingPartEnd = training.First().Count;
    145150
    146151      training = Shuffle(training);
Note: See TracChangeset for help on using the changeset viewer.