Changeset 8660 for branches/GP-MoveOperators/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification
- Timestamp:
- 09/14/12 18:58:15 (12 years ago)
- Location:
- branches/GP-MoveOperators
- Files:
-
- 2 deleted
- 3 edited
- 2 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/GP-MoveOperators
- Property svn:ignore
-
old new 21 21 protoc.exe 22 22 _ReSharper.HeuristicLab 3.3 Tests 23 Google.ProtocolBuffers-2.4.1.473.dll
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/GP-MoveOperators/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs
r8206 r8660 21 21 22 22 using System; 23 using System.Collections; 23 24 using System.Collections.Generic; 25 using System.Globalization; 24 26 using System.IO; 25 27 using System.Linq; 26 28 using System.Text; 29 using HeuristicLab.Common; 27 30 using HeuristicLab.Problems.DataAnalysis; 28 31 … … 30 33 public class ClassificationCSVInstanceProvider : ClassificationInstanceProvider { 31 34 public override string Name { 32 get { return "C omma-separated ValuesFile"; }35 get { return "CSV File"; } 33 36 } 34 37 public override string Description { … … 61 64 62 65 Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); 63 string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last(); 64 IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar)); 66 string targetVar = dataset.DoubleVariables.Last(); 65 67 66 ClassificationProblemData claData = new ClassificationProblemData(dataset, allowedInputVars, targetVar); 67 68 int trainingPartEnd = csvFileParser.Rows * 2 / 3; 69 claData.TrainingPartition.Start = 0; 70 claData.TrainingPartition.End = trainingPartEnd; 71 claData.TestPartition.Start = trainingPartEnd; 72 claData.TestPartition.End = csvFileParser.Rows; 73 int pos = path.LastIndexOf('\\'); 74 if (pos < 0) 75 claData.Name = path; 76 else { 77 pos++; 78 claData.Name = path.Substring(pos, path.Length - pos); 68 // turn of input variables that are constant in the training partition 69 var allowedInputVars = new List<string>(); 70 var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3); 71 if (trainingIndizes.Count() >= 2) { 72 foreach (var variableName in dataset.DoubleVariables) { 73 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 74 variableName != targetVar) 75 allowedInputVars.Add(variableName); 76 } 77 } else { 78 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar))); 79 79 } 80 80 81 return claData; 81 ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar); 82 83 int trainingPartEnd = trainingIndizes.Last(); 84 classificationData.TrainingPartition.Start = trainingIndizes.First(); 85 classificationData.TrainingPartition.End = trainingPartEnd; 86 classificationData.TestPartition.Start = trainingPartEnd; 87 classificationData.TestPartition.End = csvFileParser.Rows; 88 89 classificationData.Name = Path.GetFileName(path); 90 91 return classificationData; 92 } 93 94 public override IClassificationProblemData ImportData(string path, DataAnalysisImportType type) { 95 TableFileParser csvFileParser = new TableFileParser(); 96 csvFileParser.Parse(path); 97 98 List<IList> values = csvFileParser.Values; 99 if (type.Shuffle) { 100 values = Shuffle(values); 101 } 102 103 Dataset dataset = new Dataset(csvFileParser.VariableNames, values); 104 string targetVar = dataset.DoubleVariables.Last(); 105 106 // turn of input variables that are constant in the training partition 107 var allowedInputVars = new List<string>(); 108 int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100; 109 var trainingIndizes = Enumerable.Range(0, trainingPartEnd); 110 foreach (var variableName in dataset.DoubleVariables) { 111 if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 112 variableName != targetVar) 113 allowedInputVars.Add(variableName); 114 } 115 116 ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar); 117 118 classificationData.TrainingPartition.Start = 0; 119 classificationData.TrainingPartition.End = trainingPartEnd; 120 classificationData.TestPartition.Start = trainingPartEnd; 121 classificationData.TestPartition.End = csvFileParser.Rows; 122 123 classificationData.Name = Path.GetFileName(path); 124 125 return classificationData; 82 126 } 83 127 … … 86 130 } 87 131 public override void ExportData(IClassificationProblemData instance, string path) { 88 StringBuilder strBuilder = new StringBuilder();132 var strBuilder = new StringBuilder(); 89 133 90 134 foreach (var variable in instance.InputVariables) { 91 strBuilder.Append(variable + ";");135 strBuilder.Append(variable + CultureInfo.CurrentCulture.TextInfo.ListSeparator); 92 136 } 93 strBuilder.Remove(strBuilder.Length - 1, 1);137 strBuilder.Remove(strBuilder.Length - CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length, CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length); 94 138 strBuilder.AppendLine(); 95 139 96 Datasetdataset = instance.Dataset;140 var dataset = instance.Dataset; 97 141 98 142 for (int i = 0; i < dataset.Rows; i++) { 99 143 for (int j = 0; j < dataset.Columns; j++) { 100 strBuilder.Append(dataset.GetValue(i, j) + ";"); 144 if (j > 0) strBuilder.Append(CultureInfo.CurrentCulture.TextInfo.ListSeparator); 145 strBuilder.Append(dataset.GetValue(i, j)); 101 146 } 102 strBuilder.Remove(strBuilder.Length - 1, 1);103 147 strBuilder.AppendLine(); 104 148 } 105 149 106 using ( StreamWriter writer = new StreamWriter(path)) {150 using (var writer = new StreamWriter(path)) { 107 151 writer.Write(strBuilder); 108 152 } -
branches/GP-MoveOperators/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ClassificationInstanceProvider.cs
r8206 r8660 23 23 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public abstract class ClassificationInstanceProvider : ProblemInstanceProvider<IClassificationProblemData> {25 public abstract class ClassificationInstanceProvider : DataAnalysisInstanceProvider<IClassificationProblemData> { 26 26 } 27 27 }
Note: See TracChangeset
for help on using the changeset viewer.