Changeset 8660 for branches/GP-MoveOperators/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs
- Timestamp:
- 09/14/12 18:58:15 (12 years ago)
- Location:
- branches/GP-MoveOperators
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/GP-MoveOperators
- Property svn:ignore
-
old new 21 21 protoc.exe 22 22 _ReSharper.HeuristicLab 3.3 Tests 23 Google.ProtocolBuffers-2.4.1.473.dll
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/GP-MoveOperators/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs
r8199 r8660 21 21 22 22 using System; 23 using System.Collections; 23 24 using System.Collections.Generic; 25 using System.Globalization; 24 26 using System.IO; 27 using System.Linq; 25 28 using System.Text; 29 using HeuristicLab.Common; 26 30 using HeuristicLab.Problems.DataAnalysis; 27 31 … … 29 33 public class ClusteringCSVInstanceProvider : ClusteringInstanceProvider { 30 34 public override string Name { 31 get { return "C omma-separated ValuesFile"; }35 get { return "CSV File"; } 32 36 } 33 37 public override string Description { … … 56 60 public override IClusteringProblemData ImportData(string path) { 57 61 var csvFileParser = new TableFileParser(); 58 59 62 csvFileParser.Parse(path); 60 63 61 vardataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);62 var claData = new ClusteringProblemData(dataset, dataset.DoubleVariables);64 Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); 65 string targetVar = dataset.DoubleVariables.Last(); 63 66 64 int trainingPartEnd = csvFileParser.Rows * 2 / 3;65 claData.TrainingPartition.Start = 0;66 claData.TrainingPartition.End = trainingPartEnd;67 claData.TestPartition.Start = trainingPartEnd;68 claData.TestPartition.End = csvFileParser.Rows;69 int pos = path.LastIndexOf('\\');70 if (pos < 0)71 claData.Name = path;72 else {73 pos++;74 claData.Name = path.Substring(pos, path.Length - pos);67 // turn of input variables that are constant in the training partition 68 var allowedInputVars = new List<string>(); 69 var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3); 70 if (trainingIndizes.Count() >= 2) { 71 foreach (var variableName in dataset.DoubleVariables) { 72 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 73 variableName != targetVar) 74 allowedInputVars.Add(variableName); 75 } 76 } else { 77 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar))); 75 78 } 76 79 77 return claData; 80 ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); 81 82 int trainingPartEnd = trainingIndizes.Last(); 83 clusteringData.TrainingPartition.Start = trainingIndizes.First(); 84 clusteringData.TrainingPartition.End = trainingPartEnd; 85 clusteringData.TestPartition.Start = trainingPartEnd; 86 clusteringData.TestPartition.End = csvFileParser.Rows; 87 88 clusteringData.Name = Path.GetFileName(path); 89 90 return clusteringData; 91 } 92 93 public override IClusteringProblemData ImportData(string path, DataAnalysisImportType type) { 94 TableFileParser csvFileParser = new TableFileParser(); 95 csvFileParser.Parse(path); 96 97 List<IList> values = csvFileParser.Values; 98 if (type.Shuffle) { 99 values = Shuffle(values); 100 } 101 102 Dataset dataset = new Dataset(csvFileParser.VariableNames, values); 103 string targetVar = dataset.DoubleVariables.Last(); 104 105 // turn of input variables that are constant in the training partition 106 var allowedInputVars = new List<string>(); 107 int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100; 108 var trainingIndizes = Enumerable.Range(0, trainingPartEnd); 109 foreach (var variableName in dataset.DoubleVariables) { 110 if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 111 variableName != targetVar) 112 allowedInputVars.Add(variableName); 113 } 114 115 ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); 116 117 clusteringData.TrainingPartition.Start = 0; 118 clusteringData.TrainingPartition.End = trainingPartEnd; 119 clusteringData.TestPartition.Start = trainingPartEnd; 120 clusteringData.TestPartition.End = csvFileParser.Rows; 121 122 clusteringData.Name = Path.GetFileName(path); 123 124 return clusteringData; 78 125 } 79 126 … … 85 132 86 133 foreach (var variable in instance.InputVariables) { 87 strBuilder.Append(variable + ";");134 strBuilder.Append(variable + CultureInfo.CurrentCulture.TextInfo.ListSeparator); 88 135 } 89 strBuilder.Remove(strBuilder.Length - 1, 1);136 strBuilder.Remove(strBuilder.Length - CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length, CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length); 90 137 strBuilder.AppendLine(); 91 138 … … 94 141 for (int i = 0; i < dataset.Rows; i++) { 95 142 for (int j = 0; j < dataset.Columns; j++) { 96 strBuilder.Append(dataset.GetValue(i, j) + ";"); 143 if (j > 0) strBuilder.Append(CultureInfo.CurrentCulture.TextInfo.ListSeparator); 144 strBuilder.Append(dataset.GetValue(i, j)); 97 145 } 98 strBuilder.Remove(strBuilder.Length - 1, 1);99 146 strBuilder.AppendLine(); 100 147 }
Note: See TracChangeset
for help on using the changeset viewer.