Changeset 8715 for branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs
- Timestamp:
- 10/02/12 09:49:43 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs
r8701 r8715 76 76 } 77 77 } else { 78 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));78 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar))); 79 79 } 80 80 … … 96 96 List<IList> values = csvFileParser.Values; 97 97 if (type.Shuffle) { 98 values = Shuffle(values); 98 values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)), 99 type.Training, out trainingPartEnd); 99 100 } 100 101 101 102 Dataset dataset = new Dataset(csvFileParser.VariableNames, values); 102 string targetVar = dataset.DoubleVariables.Last();103 103 104 104 // turn of input variables that are constant in the training partition 105 105 var allowedInputVars = new List<string>(); 106 106 var trainingIndizes = Enumerable.Range(0, trainingPartEnd); 107 foreach (var variableName in dataset.DoubleVariables) { 108 if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 109 variableName != targetVar) 110 allowedInputVars.Add(variableName); 111 } 112 113 ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar); 107 if (trainingIndizes.Count() >= 2) { 108 foreach (var variableName in dataset.DoubleVariables) { 109 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 110 variableName != type.TargetVariable) 111 allowedInputVars.Add(variableName); 112 } 113 } else { 114 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); 115 } 116 117 ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable); 114 118 115 119 classificationData.TrainingPartition.Start = 0; … … 123 127 } 124 128 125 protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, int trainingPartEnd) { 126 target = 5; 129 protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, out int trainingPartEnd) { 127 130 IList targetValues = values[target]; 128 131 var group = targetValues.Cast<double>().GroupBy(x => x).Select(g => new { Key = g.Key, Count = g.Count() }).ToList(); 129 132 Dictionary<double, double> taken = new Dictionary<double, double>(); 130 133 foreach (var classCount in group) { 131 taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100 ;134 taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100.0; 132 135 } 133 136 … … 143 146 } 144 147 } 148 149 trainingPartEnd = training.First().Count; 145 150 146 151 training = Shuffle(training);
Note: See TracChangeset
for help on using the changeset viewer.