- Timestamp:
- 10/02/12 09:49:43 (12 years ago)
- Location:
- branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3
- Files:
-
- 1 added
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs
r8701 r8715 76 76 } 77 77 } else { 78 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));78 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar))); 79 79 } 80 80 … … 96 96 List<IList> values = csvFileParser.Values; 97 97 if (type.Shuffle) { 98 values = Shuffle(values); 98 values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)), 99 type.Training, out trainingPartEnd); 99 100 } 100 101 101 102 Dataset dataset = new Dataset(csvFileParser.VariableNames, values); 102 string targetVar = dataset.DoubleVariables.Last();103 103 104 104 // turn of input variables that are constant in the training partition 105 105 var allowedInputVars = new List<string>(); 106 106 var trainingIndizes = Enumerable.Range(0, trainingPartEnd); 107 foreach (var variableName in dataset.DoubleVariables) { 108 if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 109 variableName != targetVar) 110 allowedInputVars.Add(variableName); 111 } 112 113 ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar); 107 if (trainingIndizes.Count() >= 2) { 108 foreach (var variableName in dataset.DoubleVariables) { 109 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 110 variableName != type.TargetVariable) 111 allowedInputVars.Add(variableName); 112 } 113 } else { 114 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); 115 } 116 117 ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable); 114 118 115 119 classificationData.TrainingPartition.Start = 0; … … 123 127 } 124 128 125 protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, int trainingPartEnd) { 126 target = 5; 129 protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, out int trainingPartEnd) { 127 130 IList targetValues = values[target]; 128 131 var group = targetValues.Cast<double>().GroupBy(x => x).Select(g => new { Key = g.Key, Count = g.Count() }).ToList(); 129 132 Dictionary<double, double> taken = new Dictionary<double, double>(); 130 133 foreach (var classCount in group) { 131 taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100 ;134 taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100.0; 132 135 } 133 136 … … 143 146 } 144 147 } 148 149 trainingPartEnd = training.First().Count; 145 150 146 151 training = Shuffle(training); -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ClassificationImportType.cs
r8701 r8715 22 22 namespace HeuristicLab.Problems.Instances.DataAnalysis { 23 23 public class ClassificationImportType : DataAnalysisImportType { 24 public string Variable { get; set; }24 public string TargetVariable { get; set; } 25 25 } 26 26 } -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs
r8701 r8715 75 75 } 76 76 } else { 77 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));77 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar))); 78 78 } 79 79 … … 104 104 int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100; 105 105 var trainingIndizes = Enumerable.Range(0, trainingPartEnd); 106 foreach (var variableName in dataset.DoubleVariables) { 107 if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 108 variableName != targetVar) 109 allowedInputVars.Add(variableName); 106 if (trainingIndizes.Count() >= 2) { 107 foreach (var variableName in dataset.DoubleVariables) { 108 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 109 variableName != targetVar) 110 allowedInputVars.Add(variableName); 111 } 112 } else { 113 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar))); 110 114 } 111 115 -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj
r8701 r8715 98 98 </PropertyGroup> 99 99 <ItemGroup> 100 <Reference Include="HeuristicLab.Collections-3.3"> 101 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Collections-3.3.dll</HintPath> 102 <Private>False</Private> 103 </Reference> 104 <Reference Include="HeuristicLab.Common-3.3"> 105 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Common-3.3.dll</HintPath> 106 <Private>False</Private> 107 </Reference> 108 <Reference Include="HeuristicLab.Core-3.3"> 109 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Core-3.3.dll</HintPath> 100 <Reference Include="HeuristicLab.Common-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 101 <Private>False</Private> 102 </Reference> 103 <Reference Include="HeuristicLab.Core-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 110 104 <Private>False</Private> 111 105 </Reference> … … 164 158 <Compile Include="Regression\ArtificialRegressionDataDescriptor.cs" /> 165 159 <Compile Include="Regression\ArtificialRegressionInstanceProvider.cs" /> 160 <Compile Include="Regression\RegressionImportType.cs" /> 166 161 <Compile Include="Regression\CSV\RegressionCSVInstanceProvider.cs" /> 167 162 <Compile Include="Regression\Keijzer\KeijzerFunctionFourteen.cs" /> -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs
r8701 r8715 67 67 var allowedInputVars = new List<string>(); 68 68 var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3); 69 foreach (var variableName in dataset.DoubleVariables) { 70 if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 71 variableName != targetVar) 72 allowedInputVars.Add(variableName); 69 if (trainingIndizes.Count() >= 2) { 70 foreach (var variableName in dataset.DoubleVariables) { 71 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 72 variableName != targetVar) 73 allowedInputVars.Add(variableName); 74 } 75 } else { 76 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar))); 73 77 } 74 78 … … 86 90 } 87 91 88 protected override IRegressionProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) {92 protected override IRegressionProblemData ImportData(string path, RegressionImportType type, TableFileParser csvFileParser) { 89 93 List<IList> values = csvFileParser.Values; 90 94 if (type.Shuffle) { … … 92 96 } 93 97 Dataset dataset = new Dataset(csvFileParser.VariableNames, values); 94 string targetVar = dataset.DoubleVariables.Last();95 98 96 99 // turn of input variables that are constant in the training partition … … 102 105 foreach (var variableName in dataset.DoubleVariables) { 103 106 if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && 104 variableName != t argetVar)107 variableName != type.TargetVariable) 105 108 allowedInputVars.Add(variableName); 106 109 } 107 110 } else { 108 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));111 allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); 109 112 } 110 113 111 RegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, t argetVar);114 RegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, type.TargetVariable); 112 115 113 116 regressionData.TrainingPartition.Start = 0; -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/RegressionInstanceProvider.cs
r8701 r8715 23 23 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public abstract class RegressionInstanceProvider : DataAnalysisInstanceProvider<IRegressionProblemData, DataAnalysisImportType> {25 public abstract class RegressionInstanceProvider : DataAnalysisInstanceProvider<IRegressionProblemData, RegressionImportType> { 26 26 } 27 27 }
Note: See TracChangeset
for help on using the changeset viewer.