Changeset 8742 for branches/HeuristicLab.TimeSeries/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV
- Timestamp:
- 10/05/12 11:58:17 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.TimeSeries/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs
r8211 r8742 21 21 22 22 using System; 23 using System.Collections; 23 24 using System.Collections.Generic; 25 using System.Globalization; 24 26 using System.IO; 27 using System.Linq; 25 28 using System.Text; 29 using HeuristicLab.Common; 26 30 using HeuristicLab.Problems.DataAnalysis; 27 31 … … 56 60 public override IClusteringProblemData ImportData(string path) { 57 61 var csvFileParser = new TableFileParser(); 58 59 62 csvFileParser.Parse(path); 60 63 61 vardataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);62 var claData = new ClusteringProblemData(dataset, dataset.DoubleVariables);64 Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); 65 string targetVar = dataset.DoubleVariables.Last(); 63 66 64 int trainingPartEnd = csvFileParser.Rows * 2 / 3;65 claData.TrainingPartition.Start = 0;66 claData.TrainingPartition.End = trainingPartEnd;67 claData.TestPartition.Start = trainingPartEnd;68 claData.TestPartition.End = csvFileParser.Rows;69 int pos = path.LastIndexOf('\\');70 if (pos < 0)71 claData.Name = path;72 else {73 pos++;74 claData.Name = path.Substring(pos, path.Length - pos);67 // turn of input variables that are constant in the training partition 68 var allowedInputVars = new List<string>(); 69 var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3); 70 if (trainingIndizes.Count() >= 2) { 71 foreach (var variableName in dataset.DoubleVariables) { 72 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 73 variableName != targetVar) 74 allowedInputVars.Add(variableName); 75 } 76 } else { 77 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar))); 75 78 } 76 79 77 return claData; 80 ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); 81 82 int trainingPartEnd = trainingIndizes.Last(); 83 clusteringData.TrainingPartition.Start = trainingIndizes.First(); 84 clusteringData.TrainingPartition.End = trainingPartEnd; 85 clusteringData.TestPartition.Start = trainingPartEnd; 86 clusteringData.TestPartition.End = csvFileParser.Rows; 87 88 clusteringData.Name = Path.GetFileName(path); 89 90 return clusteringData; 91 } 92 93 public override IClusteringProblemData ImportData(string path, DataAnalysisImportType type) { 94 TableFileParser csvFileParser = new TableFileParser(); 95 csvFileParser.Parse(path); 96 97 List<IList> values = csvFileParser.Values; 98 if (type.Shuffle) { 99 values = Shuffle(values); 100 } 101 102 Dataset dataset = new Dataset(csvFileParser.VariableNames, values); 103 string targetVar = dataset.DoubleVariables.Last(); 104 105 // turn of input variables that are constant in the training partition 106 var allowedInputVars = new List<string>(); 107 int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100; 108 var trainingIndizes = Enumerable.Range(0, trainingPartEnd); 109 foreach (var variableName in dataset.DoubleVariables) { 110 if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 111 variableName != targetVar) 112 allowedInputVars.Add(variableName); 113 } 114 115 ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); 116 117 clusteringData.TrainingPartition.Start = 0; 118 clusteringData.TrainingPartition.End = trainingPartEnd; 119 clusteringData.TestPartition.Start = trainingPartEnd; 120 clusteringData.TestPartition.End = csvFileParser.Rows; 121 122 clusteringData.Name = Path.GetFileName(path); 123 124 return clusteringData; 78 125 } 79 126 … … 83 130 public override void ExportData(IClusteringProblemData instance, string path) { 84 131 var strBuilder = new StringBuilder(); 85 86 foreach (var variable in instance. InputVariables) {87 strBuilder.Append(variable + ";");132 var colSep = CultureInfo.CurrentCulture.TextInfo.ListSeparator; 133 foreach (var variable in instance.Dataset.VariableNames) { 134 strBuilder.Append(variable.Replace(colSep, String.Empty) + colSep); 88 135 } 89 strBuilder.Remove(strBuilder.Length - 1, 1);136 strBuilder.Remove(strBuilder.Length - colSep.Length, colSep.Length); 90 137 strBuilder.AppendLine(); 91 138 … … 94 141 for (int i = 0; i < dataset.Rows; i++) { 95 142 for (int j = 0; j < dataset.Columns; j++) { 96 strBuilder.Append(dataset.GetValue(i, j) + ";"); 143 if (j > 0) strBuilder.Append(colSep); 144 strBuilder.Append(dataset.GetValue(i, j)); 97 145 } 98 strBuilder.Remove(strBuilder.Length - 1, 1);99 146 strBuilder.AppendLine(); 100 147 }
Note: See TracChangeset
for help on using the changeset viewer.