Changeset 8660 for branches/GP-MoveOperators/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV
- Timestamp:
- 09/14/12 18:58:15 (12 years ago)
- Location:
- branches/GP-MoveOperators
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/GP-MoveOperators
- Property svn:ignore
-
old new 21 21 protoc.exe 22 22 _ReSharper.HeuristicLab 3.3 Tests 23 Google.ProtocolBuffers-2.4.1.473.dll
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/GP-MoveOperators/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs
r8206 r8660 21 21 22 22 using System; 23 using System.Collections; 23 24 using System.Collections.Generic; 25 using System.Globalization; 24 26 using System.IO; 25 27 using System.Linq; 26 28 using System.Text; 29 using HeuristicLab.Common; 27 30 using HeuristicLab.Problems.DataAnalysis; 28 31 … … 30 33 public class RegressionCSVInstanceProvider : RegressionInstanceProvider { 31 34 public override string Name { 32 get { return "C omma-separated ValuesFile"; }35 get { return "CSV File"; } 33 36 } 34 37 public override string Description { … … 59 62 60 63 Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); 61 string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();64 string targetVar = dataset.DoubleVariables.Last(); 62 65 63 IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar)); 66 // turn of input variables that are constant in the training partition 67 var allowedInputVars = new List<string>(); 68 var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3); 69 foreach (var variableName in dataset.DoubleVariables) { 70 if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 71 variableName != targetVar) 72 allowedInputVars.Add(variableName); 73 } 64 74 65 IRegressionProblemData reg Data = new RegressionProblemData(dataset, allowedInputVars, targetVar);75 IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar); 66 76 67 int trainingPartEnd = csvFileParser.Rows * 2 / 3;68 reg Data.TrainingPartition.Start = 0;69 reg Data.TrainingPartition.End = trainingPartEnd;70 reg Data.TestPartition.Start = trainingPartEnd;71 reg Data.TestPartition.End = csvFileParser.Rows;77 var trainingPartEnd = trainingIndizes.Last(); 78 regressionData.TrainingPartition.Start = trainingIndizes.First(); 79 regressionData.TrainingPartition.End = trainingPartEnd; 80 regressionData.TestPartition.Start = trainingPartEnd; 81 regressionData.TestPartition.End = csvFileParser.Rows; 72 82 73 int pos = path.LastIndexOf('\\'); 74 if (pos < 0) 75 regData.Name = path; 76 else { 77 pos++; 78 regData.Name = path.Substring(pos, path.Length - pos); 83 regressionData.Name = Path.GetFileName(path); 84 85 return regressionData; 86 } 87 88 public override IRegressionProblemData ImportData(string path, DataAnalysisImportType type) { 89 TableFileParser csvFileParser = new TableFileParser(); 90 csvFileParser.Parse(path); 91 92 List<IList> values = csvFileParser.Values; 93 if (type.Shuffle) { 94 values = Shuffle(values); 79 95 } 80 return regData; 96 Dataset dataset = new Dataset(csvFileParser.VariableNames, values); 97 string targetVar = dataset.DoubleVariables.Last(); 98 99 // turn of input variables that are constant in the training partition 100 var allowedInputVars = new List<string>(); 101 int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100; 102 trainingPartEnd = trainingPartEnd > 0 ? trainingPartEnd : 1; 103 var trainingIndizes = Enumerable.Range(0, trainingPartEnd); 104 if (trainingIndizes.Count() >= 2) { 105 foreach (var variableName in dataset.DoubleVariables) { 106 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 107 variableName != targetVar) 108 allowedInputVars.Add(variableName); 109 } 110 } else { 111 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar))); 112 } 113 114 RegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar); 115 116 regressionData.TrainingPartition.Start = 0; 117 regressionData.TrainingPartition.End = trainingPartEnd; 118 regressionData.TestPartition.Start = trainingPartEnd; 119 regressionData.TestPartition.End = csvFileParser.Rows; 120 121 regressionData.Name = Path.GetFileName(path); 122 123 return regressionData; 81 124 } 82 125 … … 85 128 } 86 129 public override void ExportData(IRegressionProblemData instance, string path) { 87 StringBuilder strBuilder = new StringBuilder();130 var strBuilder = new StringBuilder(); 88 131 89 132 foreach (var variable in instance.InputVariables) { 90 strBuilder.Append(variable + ";");133 strBuilder.Append(variable + CultureInfo.CurrentCulture.TextInfo.ListSeparator); 91 134 } 92 strBuilder.Remove(strBuilder.Length - 1, 1);135 strBuilder.Remove(strBuilder.Length - CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length, CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length); 93 136 strBuilder.AppendLine(); 94 137 95 Datasetdataset = instance.Dataset;138 var dataset = instance.Dataset; 96 139 97 140 for (int i = 0; i < dataset.Rows; i++) { 98 141 for (int j = 0; j < dataset.Columns; j++) { 99 strBuilder.Append(dataset.GetValue(i, j) + ";"); 142 if (j > 0) strBuilder.Append(CultureInfo.CurrentCulture.TextInfo.ListSeparator); 143 strBuilder.Append(dataset.GetValue(i, j)); 100 144 } 101 strBuilder.Remove(strBuilder.Length - 1, 1);102 145 strBuilder.AppendLine(); 103 146 } 104 147 105 using ( StreamWriter writer = new StreamWriter(path)) {148 using (var writer = new StreamWriter(path)) { 106 149 writer.Write(strBuilder); 107 150 }
Note: See TracChangeset
for help on using the changeset viewer.