- Timestamp:
- 04/16/13 13:13:41 (12 years ago)
- Location:
- branches/OaaS
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/OaaS
- Property svn:ignore
-
old new 21 21 protoc.exe 22 22 _ReSharper.HeuristicLab 3.3 Tests 23 Google.ProtocolBuffers-2.4.1.473.dll 23 24 packages
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/OaaS/HeuristicLab.Problems.Instances.DataAnalysis
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
/trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis merged eligible /branches/Algorithms.GradientDescent/HeuristicLab.Problems.Instances.DataAnalysis 5516-5520 /branches/Benchmarking/sources/HeuristicLab.Problems.Instances.DataAnalysis 6917-7005 /branches/CloningRefactoring/HeuristicLab.Problems.Instances.DataAnalysis 4656-4721 /branches/DataAnalysis Refactoring/HeuristicLab.Problems.Instances.DataAnalysis 5471-5808 /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Problems.Instances.DataAnalysis 5815-6180 /branches/DataAnalysis/HeuristicLab.Problems.Instances.DataAnalysis 4458-4459,4462,4464 /branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis 8695-8875 /branches/GP.Grammar.Editor/HeuristicLab.Problems.Instances.DataAnalysis 6284-6795 /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Problems.Instances.DataAnalysis 5060 /branches/HeuristicLab.TimeSeries/HeuristicLab.Problems.Instances.DataAnalysis 7889-8789 /branches/NET40/sources/HeuristicLab.Problems.Instances.DataAnalysis 5138-5162 /branches/ParallelEngine/HeuristicLab.Problems.Instances.DataAnalysis 5175-5192 /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Problems.Instances.DataAnalysis 7568-7810 /branches/QAPAlgorithms/HeuristicLab.Problems.Instances.DataAnalysis 6350-6627 /branches/Restructure trunk solution/HeuristicLab.Problems.Instances.DataAnalysis 6828 /branches/RuntimeOptimizer/HeuristicLab.Problems.Instances.DataAnalysis 8943-9078 /branches/ScatterSearch (trunk integration)/HeuristicLab.Problems.Instances.DataAnalysis 7787-8333 /branches/SlaveShutdown/HeuristicLab.Problems.Instances.DataAnalysis 8944-8956 /branches/SuccessProgressAnalysis/HeuristicLab.Problems.Instances.DataAnalysis 5370-5682 /branches/Trunk/HeuristicLab.Problems.Instances.DataAnalysis 6829-6865 /branches/UnloadJobs/HeuristicLab.Problems.Instances.DataAnalysis 9168-9215 /branches/VNS/HeuristicLab.Problems.Instances.DataAnalysis 5594-5752 /branches/histogram/HeuristicLab.Problems.Instances.DataAnalysis 5959-6341
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
-
branches/OaaS/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs
r8211 r9363 21 21 22 22 using System; 23 using System.Collections; 23 24 using System.Collections.Generic; 24 25 using System.IO; 25 using System.Text; 26 using System.Linq; 27 using HeuristicLab.Common; 26 28 using HeuristicLab.Problems.DataAnalysis; 27 29 … … 56 58 public override IClusteringProblemData ImportData(string path) { 57 59 var csvFileParser = new TableFileParser(); 58 59 60 csvFileParser.Parse(path); 60 61 61 vardataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);62 var claData = new ClusteringProblemData(dataset, dataset.DoubleVariables);62 Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); 63 string targetVar = dataset.DoubleVariables.Last(); 63 64 64 int trainingPartEnd = csvFileParser.Rows * 2 / 3;65 claData.TrainingPartition.Start = 0;66 claData.TrainingPartition.End = trainingPartEnd;67 claData.TestPartition.Start = trainingPartEnd;68 claData.TestPartition.End = csvFileParser.Rows;69 int pos = path.LastIndexOf('\\');70 if (pos < 0)71 claData.Name = path;72 else {73 pos++;74 claData.Name = path.Substring(pos, path.Length - pos);65 // turn of input variables that are constant in the training partition 66 var allowedInputVars = new List<string>(); 67 var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3); 68 if (trainingIndizes.Count() >= 2) { 69 foreach (var variableName in dataset.DoubleVariables) { 70 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 71 variableName != targetVar) 72 allowedInputVars.Add(variableName); 73 } 74 } else { 75 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar))); 75 76 } 76 77 77 return claData; 78 ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); 79 80 int trainingPartEnd = trainingIndizes.Last(); 81 clusteringData.TrainingPartition.Start = trainingIndizes.First(); 82 clusteringData.TrainingPartition.End = trainingPartEnd; 83 clusteringData.TestPartition.Start = trainingPartEnd; 84 clusteringData.TestPartition.End = csvFileParser.Rows; 85 86 clusteringData.Name = Path.GetFileName(path); 87 88 return clusteringData; 78 89 } 79 90 80 public override bool CanExportData { 81 get { return true; } 82 } 83 public override void ExportData(IClusteringProblemData instance, string path) { 84 var strBuilder = new StringBuilder(); 85 86 foreach (var variable in instance.InputVariables) { 87 strBuilder.Append(variable + ";"); 88 } 89 strBuilder.Remove(strBuilder.Length - 1, 1); 90 strBuilder.AppendLine(); 91 92 var dataset = instance.Dataset; 93 94 for (int i = 0; i < dataset.Rows; i++) { 95 for (int j = 0; j < dataset.Columns; j++) { 96 strBuilder.Append(dataset.GetValue(i, j) + ";"); 97 } 98 strBuilder.Remove(strBuilder.Length - 1, 1); 99 strBuilder.AppendLine(); 91 protected override IClusteringProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) { 92 List<IList> values = csvFileParser.Values; 93 if (type.Shuffle) { 94 values = Shuffle(values); 100 95 } 101 96 102 using (var writer = new StreamWriter(path)) { 103 writer.Write(strBuilder); 97 Dataset dataset = new Dataset(csvFileParser.VariableNames, values); 98 string targetVar = dataset.DoubleVariables.Last(); 99 100 // turn of input variables that are constant in the training partition 101 var allowedInputVars = new List<string>(); 102 int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; 103 var trainingIndizes = Enumerable.Range(0, trainingPartEnd); 104 if (trainingIndizes.Count() >= 2) { 105 foreach (var variableName in dataset.DoubleVariables) { 106 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 107 variableName != targetVar) 108 allowedInputVars.Add(variableName); 109 } 110 } else { 111 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar))); 104 112 } 113 114 ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); 115 116 clusteringData.TrainingPartition.Start = 0; 117 clusteringData.TrainingPartition.End = trainingPartEnd; 118 clusteringData.TestPartition.Start = trainingPartEnd; 119 clusteringData.TestPartition.End = csvFileParser.Rows; 120 121 clusteringData.Name = Path.GetFileName(path); 122 123 return clusteringData; 105 124 } 106 125 }
Note: See TracChangeset
for help on using the changeset viewer.