- Timestamp:
- 10/05/11 21:55:55 (13 years ago)
- Location:
- branches/GeneralizedQAP
- Files:
-
- 3 deleted
- 13 edited
- 15 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/GeneralizedQAP
- Property svn:ignore
-
old new 1 *.docstates 2 *.psess 1 3 *.resharper 2 4 *.suo 5 *.vsp 3 6 Google.ProtocolBuffers-0.9.1.dll 7 HeuristicLab 3.3.5.1.ReSharper.user 8 HeuristicLab 3.3.6.0.ReSharper.user 4 9 HeuristicLab.4.5.resharper.user 5 10 HeuristicLab.resharper.user … … 8 13 _ReSharper.HeuristicLab 9 14 _ReSharper.HeuristicLab 3.3 15 _ReSharper.HeuristicLab.ExtLibs 16 bin 10 17 protoc.exe 11 HeuristicLab 3.3.5.1.ReSharper.user 12 *.psess 13 *.vsp 14 *.docstates 18 HeuristicLab.ExtLibs.6.0.ReSharper.user
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis
- Property svn:mergeinfo changed
-
Property
svn:ignore
set to
bin
-
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4
- Property svn:ignore
-
old new 4 4 obj 5 5 *.vs10x 6 Plugin.cs
-
- Property svn:ignore
-
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs
r5847 r6878 21 21 22 22 using System; 23 using System.Collections; 23 24 using System.Collections.Generic; 25 using System.Collections.ObjectModel; 24 26 using System.Linq; 25 27 using HeuristicLab.Common; … … 36 38 private Dataset(Dataset original, Cloner cloner) 37 39 : base(original, cloner) { 38 variableNameToVariableIndexMapping = original.variableNameToVariableIndexMapping; 39 data = original.data; 40 } 41 public override IDeepCloneable Clone(Cloner cloner) { 42 return new Dataset(this, cloner); 43 } 40 variableValues = new Dictionary<string, IList>(original.variableValues); 41 variableNames = new List<string>(original.variableNames); 42 rows = original.rows; 43 } 44 public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); } 44 45 45 46 public Dataset() … … 47 48 Name = "-"; 48 49 VariableNames = Enumerable.Empty<string>(); 49 data = new double[0, 0]; 50 } 51 52 public Dataset(IEnumerable<string> variableNames, double[,] data) 50 variableValues = new Dictionary<string, IList>(); 51 rows = 0; 52 } 53 54 public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) 53 55 : base() { 54 56 Name = "-"; 55 if (variableNames.Count() != data.GetLength(1)) { 56 throw new ArgumentException("Number of variable names doesn't match the number of columns of data"); 57 } 58 this.data = (double[,])data.Clone(); 59 VariableNames = variableNames; 60 } 61 62 63 private Dictionary<string, int> variableNameToVariableIndexMapping; 64 private Dictionary<int, string> variableIndexToVariableNameMapping; 57 if (!variableNames.Any()) { 58 this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList(); 59 } else if (variableNames.Count() != variableValues.Count()) { 60 throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); 61 } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) { 62 throw new ArgumentException("The number of values must be equal for every variable"); 63 } else if (variableNames.Distinct().Count() != variableNames.Count()) { 64 var duplicateVariableNames = 65 variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); 66 string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; 67 foreach (var duplicateVariableName in duplicateVariableNames) 68 message += duplicateVariableName + Environment.NewLine; 69 throw new ArgumentException(message); 70 } 71 72 rows = variableValues.First().Count; 73 this.variableNames = new List<string>(variableNames); 74 this.variableValues = new Dictionary<string, IList>(); 75 for (int i = 0; i < this.variableNames.Count; i++) { 76 var values = variableValues.ElementAt(i); 77 IList clonedValues = null; 78 if (values is List<double>) 79 clonedValues = new List<double>(values.Cast<double>()); 80 else if (values is List<string>) 81 clonedValues = new List<string>(values.Cast<string>()); 82 else if (values is List<DateTime>) 83 clonedValues = new List<DateTime>(values.Cast<DateTime>()); 84 else { 85 this.variableNames = new List<string>(); 86 this.variableValues = new Dictionary<string, IList>(); 87 throw new ArgumentException("The variable values must be of type List<double>, List<string> or List<DateTime>"); 88 } 89 this.variableValues.Add(this.variableNames[i], clonedValues); 90 } 91 } 92 93 public Dataset(IEnumerable<string> variableNames, double[,] variableValues) { 94 Name = "-"; 95 if (variableNames.Count() != variableValues.GetLength(1)) { 96 throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); 97 } 98 if (variableNames.Distinct().Count() != variableNames.Count()) { 99 var duplicateVariableNames = variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); 100 string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; 101 foreach (var duplicateVariableName in duplicateVariableNames) 102 message += duplicateVariableName + Environment.NewLine; 103 throw new ArgumentException(message); 104 } 105 106 rows = variableValues.GetLength(0); 107 this.variableNames = new List<string>(variableNames); 108 109 this.variableValues = new Dictionary<string, IList>(); 110 for (int col = 0; col < variableValues.GetLength(1); col++) { 111 string columName = this.variableNames[col]; 112 var values = new List<double>(); 113 for (int row = 0; row < variableValues.GetLength(0); row++) { 114 values.Add(variableValues[row, col]); 115 } 116 this.variableValues.Add(columName, values); 117 } 118 } 119 120 #region Backwards compatible code, remove with 3.5 121 private double[,] storableData; 122 //name alias used to suppport backwards compatibility 123 [Storable(Name = "data", AllowOneWay = true)] 124 private double[,] StorableData { set { storableData = value; } } 125 126 [StorableHook(HookType.AfterDeserialization)] 127 private void AfterDeserialization() { 128 if (variableValues == null) { 129 rows = storableData.GetLength(0); 130 variableValues = new Dictionary<string, IList>(); 131 for (int col = 0; col < storableData.GetLength(1); col++) { 132 string columName = variableNames[col]; 133 var values = new List<double>(); 134 for (int row = 0; row < storableData.GetLength(0); row++) { 135 values.Add(storableData[row, col]); 136 } 137 variableValues.Add(columName, values); 138 } 139 storableData = null; 140 } 141 } 142 #endregion 143 144 [Storable(Name = "VariableValues")] 145 private Dictionary<string, IList> variableValues; 146 147 private List<string> variableNames; 65 148 [Storable] 66 149 public IEnumerable<string> VariableNames { 67 get { 68 // convert KeyCollection to an array first for persistence 69 return variableNameToVariableIndexMapping.Keys.ToArray(); 70 } 150 get { return variableNames; } 71 151 private set { 72 if (variableNameToVariableIndexMapping != null) throw new InvalidOperationException("VariableNames can only be set once."); 73 this.variableNameToVariableIndexMapping = new Dictionary<string, int>(); 74 this.variableIndexToVariableNameMapping = new Dictionary<int, string>(); 75 int i = 0; 76 foreach (string variableName in value) { 77 this.variableNameToVariableIndexMapping.Add(variableName, i); 78 this.variableIndexToVariableNameMapping.Add(i, variableName); 79 i++; 80 } 81 } 82 } 83 152 if (variableNames != null) throw new InvalidOperationException(); 153 variableNames = new List<string>(value); 154 } 155 } 156 157 public IEnumerable<string> DoubleVariables { 158 get { return variableValues.Where(p => p.Value is List<double>).Select(p => p.Key); } 159 } 160 161 public IEnumerable<double> GetDoubleValues(string variableName) { 162 IList list; 163 if (!variableValues.TryGetValue(variableName, out list)) 164 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 165 List<double> values = list as List<double>; 166 if (values == null) throw new ArgumentException("The variable " + variableName + " is not a double variable."); 167 168 //mkommend yield return used to enable lazy evaluation 169 foreach (double value in values) 170 yield return value; 171 } 172 public ReadOnlyCollection<double> GetReadOnlyDoubleValues(string variableName) { 173 IList list; 174 if (!variableValues.TryGetValue(variableName, out list)) 175 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 176 List<double> values = list as List<double>; 177 if (values == null) throw new ArgumentException("The variable " + variableName + " is not a double variable."); 178 return values.AsReadOnly(); 179 } 180 public double GetDoubleValue(string variableName, int row) { 181 IList list; 182 if (!variableValues.TryGetValue(variableName, out list)) 183 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 184 List<double> values = list as List<double>; 185 if (values == null) throw new ArgumentException("The variable " + variableName + " is not a double variable."); 186 return values[row]; 187 } 188 public IEnumerable<double> GetDoubleValues(string variableName, IEnumerable<int> rows) { 189 IList list; 190 if (!variableValues.TryGetValue(variableName, out list)) 191 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 192 List<double> values = list as List<double>; 193 if (values == null) throw new ArgumentException("The varialbe " + variableName + " is not a double variable."); 194 195 foreach (int index in rows) 196 yield return values[index]; 197 } 198 199 #region IStringConvertibleMatrix Members 84 200 [Storable] 85 private double[,] data; 86 private double[,] Data { 87 get { return data; } 88 } 89 90 // elementwise access 91 public double this[int rowIndex, int columnIndex] { 92 get { return data[rowIndex, columnIndex]; } 93 } 94 public double this[string variableName, int rowIndex] { 95 get { 96 int columnIndex = GetVariableIndex(variableName); 97 return data[rowIndex, columnIndex]; 98 } 99 } 100 101 public double[] GetVariableValues(int variableIndex) { 102 return GetVariableValues(variableIndex, 0, Rows); 103 } 104 public double[] GetVariableValues(string variableName) { 105 return GetVariableValues(GetVariableIndex(variableName), 0, Rows); 106 } 107 public double[] GetVariableValues(int variableIndex, int start, int end) { 108 return GetEnumeratedVariableValues(variableIndex, start, end).ToArray(); 109 } 110 public double[] GetVariableValues(string variableName, int start, int end) { 111 return GetVariableValues(GetVariableIndex(variableName), start, end); 112 } 113 114 public IEnumerable<double> GetEnumeratedVariableValues(int variableIndex) { 115 return GetEnumeratedVariableValues(variableIndex, 0, Rows); 116 } 117 public IEnumerable<double> GetEnumeratedVariableValues(int variableIndex, int start, int end) { 118 if (start < 0 || !(start <= end)) 119 throw new ArgumentException("Start must be between 0 and end (" + end + ")."); 120 if (end > Rows || end < start) 121 throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ")."); 122 123 for (int i = start; i < end; i++) 124 yield return data[i, variableIndex]; 125 } 126 public IEnumerable<double> GetEnumeratedVariableValues(int variableIndex, IEnumerable<int> rows) { 127 foreach (int row in rows) 128 yield return data[row, variableIndex]; 129 } 130 131 public IEnumerable<double> GetEnumeratedVariableValues(string variableName) { 132 return GetEnumeratedVariableValues(GetVariableIndex(variableName), 0, Rows); 133 } 134 public IEnumerable<double> GetEnumeratedVariableValues(string variableName, int start, int end) { 135 return GetEnumeratedVariableValues(GetVariableIndex(variableName), start, end); 136 } 137 public IEnumerable<double> GetEnumeratedVariableValues(string variableName, IEnumerable<int> rows) { 138 return GetEnumeratedVariableValues(GetVariableIndex(variableName), rows); 139 } 140 141 public string GetVariableName(int variableIndex) { 142 try { 143 return variableIndexToVariableNameMapping[variableIndex]; 144 } 145 catch (KeyNotFoundException ex) { 146 throw new ArgumentException("The variable index " + variableIndex + " was not found.", ex); 147 } 148 } 149 public int GetVariableIndex(string variableName) { 150 try { 151 return variableNameToVariableIndexMapping[variableName]; 152 } 153 catch (KeyNotFoundException ex) { 154 throw new ArgumentException("The variable name " + variableName + " was not found.", ex); 155 } 156 } 157 158 #region IStringConvertibleMatrix Members 201 private int rows; 159 202 public int Rows { 160 get { return data.GetLength(0); }203 get { return rows; } 161 204 set { throw new NotSupportedException(); } 162 205 } 163 206 public int Columns { 164 get { return data.GetLength(1); }207 get { return variableNames.Count; } 165 208 set { throw new NotSupportedException(); } 166 209 } … … 184 227 185 228 public string GetValue(int rowIndex, int columnIndex) { 186 return data[rowIndex, columnIndex].ToString();229 return variableValues[variableNames[columnIndex]][rowIndex].ToString(); 187 230 } 188 231 public bool SetValue(string value, int rowIndex, int columnIndex) { -
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj
r6643 r6878 41 41 <DebugType>full</DebugType> 42 42 <Optimize>false</Optimize> 43 <OutputPath> bin\Debug\</OutputPath>43 <OutputPath>$(SolutionDir)\bin\</OutputPath> 44 44 <DefineConstants>DEBUG;TRACE</DefineConstants> 45 45 <ErrorReport>prompt</ErrorReport> … … 50 50 <DebugType>pdbonly</DebugType> 51 51 <Optimize>true</Optimize> 52 <OutputPath> bin\Release\</OutputPath>52 <OutputPath>$(SolutionDir)\bin\</OutputPath> 53 53 <DefineConstants>TRACE</DefineConstants> 54 54 <ErrorReport>prompt</ErrorReport> … … 58 58 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' "> 59 59 <DebugSymbols>true</DebugSymbols> 60 <OutputPath> bin\x64\Debug\</OutputPath>60 <OutputPath>$(SolutionDir)\bin\</OutputPath> 61 61 <DefineConstants>DEBUG;TRACE</DefineConstants> 62 62 <DebugType>full</DebugType> … … 66 66 </PropertyGroup> 67 67 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' "> 68 <OutputPath> bin\x64\Release\</OutputPath>68 <OutputPath>$(SolutionDir)\bin\</OutputPath> 69 69 <DefineConstants>TRACE</DefineConstants> 70 70 <Optimize>true</Optimize> … … 76 76 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' "> 77 77 <DebugSymbols>true</DebugSymbols> 78 <OutputPath> bin\x86\Debug\</OutputPath>78 <OutputPath>$(SolutionDir)\bin\</OutputPath> 79 79 <DefineConstants>DEBUG;TRACE</DefineConstants> 80 80 <DebugType>full</DebugType> … … 84 84 </PropertyGroup> 85 85 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' "> 86 <OutputPath> bin\x86\Release\</OutputPath>86 <OutputPath>$(SolutionDir)\bin\</OutputPath> 87 87 <DefineConstants>TRACE</DefineConstants> 88 88 <Optimize>true</Optimize> … … 127 127 </Compile> 128 128 <Compile Include="Implementation\Regression\RegressionEnsembleSolution.cs" /> 129 <Compile Include="Implementation\TimeSeriesPrognosis\TimeSeriesPrognosisProblem.cs" /> 130 <Compile Include="Implementation\TimeSeriesPrognosis\TimeSeriesPrognosisProblemData.cs" /> 131 <Compile Include="Implementation\TimeSeriesPrognosis\TimeSeriesPrognosisSolution.cs" /> 132 <Compile Include="Implementation\TimeSeriesPrognosis\TimeSeriesPrognosisSolutionBase.cs" /> 129 133 <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs"> 130 134 <SubType>Code</SubType> … … 139 143 <Compile Include="Interfaces\Regression\IRegressionEnsembleSolution.cs" /> 140 144 <Compile Include="Implementation\Regression\RegressionSolutionBase.cs" /> 145 <Compile Include="Interfaces\TimeSeriesPrognosis\ITimeSeriesPrognosisModel.cs" /> 146 <Compile Include="Interfaces\TimeSeriesPrognosis\ITimeSeriesPrognosisProblem.cs" /> 147 <Compile Include="Interfaces\TimeSeriesPrognosis\ITimeSeriesPrognosisProblemData.cs" /> 148 <Compile Include="Interfaces\TimeSeriesPrognosis\ITimeSeriesPrognosisSolution.cs" /> 149 <Compile Include="OnlineCalculators\OnlineDirectionalSymmetryCalculator.cs" /> 141 150 <Compile Include="OnlineCalculators\OnlineMeanAbsoluteErrorCalculator.cs" /> 142 151 <Compile Include="OnlineCalculators\OnlineLinearScalingParameterCalculator.cs" /> … … 174 183 <Compile Include="OnlineCalculators\OnlinePearsonsRSquaredCalculator.cs" /> 175 184 <Compile Include="Implementation\Regression\RegressionSolution.cs" /> 185 <Compile Include="OnlineCalculators\OnlineTheilsUStatisticCalculator.cs" /> 186 <Compile Include="OnlineCalculators\OnlineWeightedDirectionalSymmetryCalculator.cs" /> 187 <Compile Include="Plugin.cs" /> 176 188 <Compile Include="TableFileParser.cs" /> 177 189 <Compile Include="Implementation\Classification\ThresholdCalculators\AccuracyMaximizationThresholdCalculator.cs" /> … … 179 191 <Compile Include="Implementation\Classification\ThresholdCalculators\ThresholdCalculator.cs" /> 180 192 <None Include="HeuristicLab.snk" /> 181 <None Include="HeuristicLabProblemsDataAnalysisPlugin.cs.frame" /> 182 <None Include="Properties\AssemblyInfo.frame" /> 183 <Compile Include="HeuristicLabProblemsDataAnalysisPlugin.cs" /> 193 <None Include="Plugin.cs.frame" /> 194 <None Include="Properties\AssemblyInfo.cs.frame" /> 184 195 <Compile Include="Interfaces\IDataAnalysisModel.cs" /> 185 196 <Compile Include="Interfaces\IDataAnalysisProblem.cs" /> … … 190 201 <Project>{958B43BC-CC5C-4FA2-8628-2B3B01D890B6}</Project> 191 202 <Name>HeuristicLab.Collections-3.3</Name> 203 <Private>False</Private> 192 204 </ProjectReference> 193 205 <ProjectReference Include="..\..\HeuristicLab.Common.Resources\3.3\HeuristicLab.Common.Resources-3.3.csproj"> 194 206 <Project>{0E27A536-1C4A-4624-A65E-DC4F4F23E3E1}</Project> 195 207 <Name>HeuristicLab.Common.Resources-3.3</Name> 208 <Private>False</Private> 196 209 </ProjectReference> 197 210 <ProjectReference Include="..\..\HeuristicLab.Common\3.3\HeuristicLab.Common-3.3.csproj"> 198 211 <Project>{A9AD58B9-3EF9-4CC1-97E5-8D909039FF5C}</Project> 199 212 <Name>HeuristicLab.Common-3.3</Name> 213 <Private>False</Private> 200 214 </ProjectReference> 201 215 <ProjectReference Include="..\..\HeuristicLab.Core\3.3\HeuristicLab.Core-3.3.csproj"> 202 216 <Project>{C36BD924-A541-4A00-AFA8-41701378DDC5}</Project> 203 217 <Name>HeuristicLab.Core-3.3</Name> 218 <Private>False</Private> 204 219 </ProjectReference> 205 220 <ProjectReference Include="..\..\HeuristicLab.Data\3.3\HeuristicLab.Data-3.3.csproj"> 206 221 <Project>{BBAB9DF5-5EF3-4BA8-ADE9-B36E82114937}</Project> 207 222 <Name>HeuristicLab.Data-3.3</Name> 223 <Private>False</Private> 208 224 </ProjectReference> 209 225 <ProjectReference Include="..\..\HeuristicLab.Optimization\3.3\HeuristicLab.Optimization-3.3.csproj"> 210 226 <Project>{14AB8D24-25BC-400C-A846-4627AA945192}</Project> 211 227 <Name>HeuristicLab.Optimization-3.3</Name> 228 <Private>False</Private> 212 229 </ProjectReference> 213 230 <ProjectReference Include="..\..\HeuristicLab.Parameters\3.3\HeuristicLab.Parameters-3.3.csproj"> 214 231 <Project>{56F9106A-079F-4C61-92F6-86A84C2D84B7}</Project> 215 232 <Name>HeuristicLab.Parameters-3.3</Name> 233 <Private>False</Private> 216 234 </ProjectReference> 217 235 <ProjectReference Include="..\..\HeuristicLab.Persistence\3.3\HeuristicLab.Persistence-3.3.csproj"> 218 236 <Project>{102BC7D3-0EF9-439C-8F6D-96FF0FDB8E1B}</Project> 219 237 <Name>HeuristicLab.Persistence-3.3</Name> 238 <Private>False</Private> 220 239 </ProjectReference> 221 240 <ProjectReference Include="..\..\HeuristicLab.PluginInfrastructure\3.3\HeuristicLab.PluginInfrastructure-3.3.csproj"> 222 241 <Project>{94186A6A-5176-4402-AE83-886557B53CCA}</Project> 223 242 <Name>HeuristicLab.PluginInfrastructure-3.3</Name> 243 <Private>False</Private> 224 244 </ProjectReference> 225 245 </ItemGroup> … … 257 277 258 278 call PreBuildEvent.cmd 259 SubWCRev "%25ProjectDir%25\" "%25ProjectDir%25\HeuristicLabProblemsDataAnalysisPlugin.cs.frame" "%25ProjectDir%25\HeuristicLabProblemsDataAnalysisPlugin.cs"</PreBuildEvent>279 </PreBuildEvent> 260 280 </PropertyGroup> 261 281 </Project> -
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs
r6685 r6878 226 226 get { 227 227 if (classValues == null) { 228 classValues = Dataset.Get EnumeratedVariableValues(TargetVariableParameter.Value.Value).Distinct().ToList();228 classValues = Dataset.GetDoubleValues(TargetVariableParameter.Value.Value).Distinct().ToList(); 229 229 classValues.Sort(); 230 230 } … … 291 291 private static IEnumerable<string> CheckVariablesForPossibleTargetVariables(Dataset dataset) { 292 292 int maxSamples = Math.Min(InspectedRowsToDetermineTargets, dataset.Rows); 293 var validTargetVariables = (from v in dataset. VariableNames294 let distinctValues = dataset.Get EnumeratedVariableValues(v)293 var validTargetVariables = (from v in dataset.DoubleVariables 294 let distinctValues = dataset.GetDoubleValues(v) 295 295 .Take(maxSamples) 296 296 .Distinct() … … 410 410 dataset.Name = Path.GetFileName(fileName); 411 411 412 ClassificationProblemData problemData = new ClassificationProblemData(dataset, dataset. VariableNames.Skip(1), dataset.VariableNames.First());412 ClassificationProblemData problemData = new ClassificationProblemData(dataset, dataset.DoubleVariables.Skip(1), dataset.DoubleVariables.First()); 413 413 problemData.Name = "Data imported from " + Path.GetFileName(fileName); 414 414 return problemData; -
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionBase.cs
r6685 r6878 67 67 protected void CalculateResults() { 68 68 double[] estimatedTrainingClassValues = EstimatedTrainingClassValues.ToArray(); // cache values 69 double[] originalTrainingClassValues = ProblemData.Dataset.Get EnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();69 double[] originalTrainingClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray(); 70 70 double[] estimatedTestClassValues = EstimatedTestClassValues.ToArray(); // cache values 71 double[] originalTestClassValues = ProblemData.Dataset.Get EnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();71 double[] originalTestClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray(); 72 72 73 73 OnlineCalculatorError errorState; -
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationSolutionBase.cs
r6606 r6878 103 103 protected void CalculateRegressionResults() { 104 104 double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values 105 double[] originalTrainingValues = ProblemData.Dataset.Get EnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();105 double[] originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray(); 106 106 double[] estimatedTestValues = EstimatedTestValues.ToArray(); // cache values 107 double[] originalTestValues = ProblemData.Dataset.Get EnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();107 double[] originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray(); 108 108 109 109 OnlineCalculatorError errorState; … … 132 132 double[] classValues; 133 133 double[] thresholds; 134 var targetClassValues = ProblemData.Dataset.Get EnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);134 var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes); 135 135 AccuracyMaximizationThresholdCalculator.CalculateThresholds(ProblemData, EstimatedTrainingValues, targetClassValues, out classValues, out thresholds); 136 136 … … 141 141 double[] classValues; 142 142 double[] thresholds; 143 var targetClassValues = ProblemData.Dataset.Get EnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);143 var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes); 144 144 NormalDistributionCutPointsThresholdCalculator.CalculateThresholds(ProblemData, EstimatedTrainingValues, targetClassValues, out classValues, out thresholds); 145 145 -
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringProblemData.cs
r5809 r6878 20 20 #endregion 21 21 22 using System;23 22 using System.Collections.Generic; 24 23 using System.IO; 25 using System.Linq;26 24 using HeuristicLab.Common; 27 25 using HeuristicLab.Core; 28 using HeuristicLab.Data;29 using HeuristicLab.Parameters;30 26 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 31 27 … … 103 99 dataset.Name = Path.GetFileName(fileName); 104 100 105 ClusteringProblemData problemData = new ClusteringProblemData(dataset, dataset. VariableNames);101 ClusteringProblemData problemData = new ClusteringProblemData(dataset, dataset.DoubleVariables); 106 102 problemData.Name = "Data imported from " + Path.GetFileName(fileName); 107 103 return problemData; -
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs
r6685 r6878 116 116 if (allowedInputVariables == null) throw new ArgumentNullException("The allowedInputVariables must not be null."); 117 117 118 if (allowedInputVariables.Except(dataset. VariableNames).Any())119 throw new ArgumentException("All allowed input variables must be present in the dataset .");118 if (allowedInputVariables.Except(dataset.DoubleVariables).Any()) 119 throw new ArgumentException("All allowed input variables must be present in the dataset and of type double."); 120 120 121 var inputVariables = new CheckedItemList<StringValue>(dataset. VariableNames.Select(x => new StringValue(x)));121 var inputVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Select(x => new StringValue(x))); 122 122 foreach (StringValue x in inputVariables) 123 123 inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value)); -
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs
r6685 r6878 144 144 dataset.Name = Path.GetFileName(fileName); 145 145 146 RegressionProblemData problemData = new RegressionProblemData(dataset, dataset. VariableNames.Skip(1), dataset.VariableNames.First());146 RegressionProblemData problemData = new RegressionProblemData(dataset, dataset.DoubleVariables.Skip(1), dataset.DoubleVariables.First()); 147 147 problemData.Name = "Data imported from " + Path.GetFileName(fileName); 148 148 return problemData; -
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionBase.cs
r6685 r6878 127 127 OnlineCalculatorError errorState; 128 128 Add(new Result(TrainingMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the training partition", new DoubleValue())); 129 double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.Get EnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes), out errorState);129 double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes), out errorState); 130 130 TrainingMeanAbsoluteError = errorState == OnlineCalculatorError.None ? trainingMAE : double.NaN; 131 131 } … … 134 134 OnlineCalculatorError errorState; 135 135 Add(new Result(TestMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the test partition", new DoubleValue())); 136 double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.Get EnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TestIndizes), out errorState);136 double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes), out errorState); 137 137 TestMeanAbsoluteError = errorState == OnlineCalculatorError.None ? testMAE : double.NaN; 138 138 } … … 142 142 protected void CalculateResults() { 143 143 double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values 144 double[] originalTrainingValues = ProblemData.Dataset.Get EnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();144 double[] originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray(); 145 145 double[] estimatedTestValues = EstimatedTestValues.ToArray(); // cache values 146 double[] originalTestValues = ProblemData.Dataset.Get EnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();146 double[] originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray(); 147 147 148 148 OnlineCalculatorError errorState; -
branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/TableFileParser.cs
r5809 r6878 21 21 22 22 using System; 23 using System.Collections; 23 24 using System.Collections.Generic; 24 25 using System.Globalization; … … 33 34 private readonly char[] POSSIBLE_SEPARATORS = new char[] { ',', ';', '\t' }; 34 35 private Tokenizer tokenizer; 35 private List<List< double>> rowValues;36 private List<List<object>> rowValues; 36 37 37 38 private int rows; … … 47 48 } 48 49 49 private double[,]values;50 public double[,]Values {50 private List<IList> values; 51 public List<IList> Values { 51 52 get { 52 53 return values; … … 69 70 70 71 public TableFileParser() { 71 rowValues = new List<List< double>>();72 rowValues = new List<List<object>>(); 72 73 variableNames = new List<string>(); 73 74 } … … 75 76 public void Parse(string fileName) { 76 77 NumberFormatInfo numberFormat; 78 DateTimeFormatInfo dateTimeFormatInfo; 77 79 char separator; 78 DetermineFileFormat(fileName, out numberFormat, out separator);80 DetermineFileFormat(fileName, out numberFormat, out dateTimeFormatInfo, out separator); 79 81 using (StreamReader reader = new StreamReader(fileName)) { 80 tokenizer = new Tokenizer(reader, numberFormat, separator);82 tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator); 81 83 // parse the file 82 84 Parse(); … … 86 88 rows = rowValues.Count; 87 89 columns = rowValues[0].Count; 88 values = new double[rows, columns]; 89 90 int rowIndex = 0; 91 int columnIndex = 0; 92 foreach (List<double> row in rowValues) { 93 columnIndex = 0; 94 foreach (double element in row) { 95 values[rowIndex, columnIndex++] = element; 96 } 97 rowIndex++; 98 } 99 } 100 101 private void DetermineFileFormat(string fileName, out NumberFormatInfo numberFormat, out char separator) { 90 values = new List<IList>(); 91 92 //create columns 93 for (int col = 0; col < columns; col++) { 94 var types = rowValues.Select(r => r[col]).Where(v => v != null && v as string != string.Empty).Take(10).Select(v => v.GetType()); 95 if (!types.Any()) { 96 values.Add(new List<string>()); 97 continue; 98 } 99 100 var columnType = types.GroupBy(v => v).OrderBy(v => v.Count()).Last().Key; 101 if (columnType == typeof(double)) values.Add(new List<double>()); 102 else if (columnType == typeof(DateTime)) values.Add(new List<DateTime>()); 103 else if (columnType == typeof(string)) values.Add(new List<string>()); 104 else throw new InvalidOperationException(); 105 } 106 107 108 109 //fill with values 110 foreach (List<object> row in rowValues) { 111 int columnIndex = 0; 112 foreach (object element in row) { 113 if (values[columnIndex] is List<double> && !(element is double)) 114 values[columnIndex].Add(double.NaN); 115 else if (values[columnIndex] is List<DateTime> && !(element is DateTime)) 116 values[columnIndex].Add(DateTime.MinValue); 117 else if (values[columnIndex] is List<string> && !(element is string)) 118 values[columnIndex].Add(string.Empty); 119 else 120 values[columnIndex].Add(element); 121 columnIndex++; 122 } 123 } 124 } 125 126 private void DetermineFileFormat(string fileName, out NumberFormatInfo numberFormat, out DateTimeFormatInfo dateTimeFormatInfo, out char separator) { 102 127 using (StreamReader reader = new StreamReader(fileName)) { 103 128 // skip first line … … 123 148 if (OccurrencesOf(charCounts, '.') > 10) { 124 149 numberFormat = NumberFormatInfo.InvariantInfo; 150 dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo; 125 151 separator = POSSIBLE_SEPARATORS 126 152 .Where(c => OccurrencesOf(charCounts, c) > 10) … … 139 165 // English format (only integer values) with ',' as separator 140 166 numberFormat = NumberFormatInfo.InvariantInfo; 167 dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo; 141 168 separator = ','; 142 169 } else { … … 144 171 // German format (real values) 145 172 numberFormat = NumberFormatInfo.GetInstance(new CultureInfo("de-DE")); 173 dateTimeFormatInfo = DateTimeFormatInfo.GetInstance(new CultureInfo("de-DE")); 146 174 separator = POSSIBLE_SEPARATORS 147 175 .Except(disallowedSeparators) … … 154 182 // no points and no commas => English format 155 183 numberFormat = NumberFormatInfo.InvariantInfo; 184 dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo; 156 185 separator = POSSIBLE_SEPARATORS 157 186 .Where(c => OccurrencesOf(charCounts, c) > 10) … … 169 198 #region tokenizer 170 199 internal enum TokenTypeEnum { 171 NewLine, Separator, String, Double 200 NewLine, Separator, String, Double, DateTime 172 201 } 173 202 … … 176 205 public string stringValue; 177 206 public double doubleValue; 207 public DateTime dateTimeValue; 178 208 179 209 public Token(TokenTypeEnum type, string value) { 180 210 this.type = type; 181 211 stringValue = value; 212 dateTimeValue = DateTime.MinValue; 182 213 doubleValue = 0.0; 183 214 } … … 193 224 private List<Token> tokens; 194 225 private NumberFormatInfo numberFormatInfo; 226 private DateTimeFormatInfo dateTimeFormatInfo; 195 227 private char separator; 196 228 private const string INTERNAL_SEPARATOR = "#"; … … 218 250 } 219 251 220 public Tokenizer(StreamReader reader, NumberFormatInfo numberFormatInfo, char separator) {252 public Tokenizer(StreamReader reader, NumberFormatInfo numberFormatInfo, DateTimeFormatInfo dateTimeFormatInfo, char separator) { 221 253 this.reader = reader; 222 254 this.numberFormatInfo = numberFormatInfo; 255 this.dateTimeFormatInfo = dateTimeFormatInfo; 223 256 this.separator = separator; 224 257 separatorToken = new Token(TokenTypeEnum.Separator, INTERNAL_SEPARATOR); … … 264 297 token.type = TokenTypeEnum.Double; 265 298 return token; 266 } 267 268 // couldn't parse the token as an int or float number so return a string token 299 } else if (DateTime.TryParse(strToken, dateTimeFormatInfo, DateTimeStyles.None, out token.dateTimeValue)) { 300 token.type = TokenTypeEnum.DateTime; 301 return token; 302 } 303 304 // couldn't parse the token as an int or float number or datetime value so return a string token 269 305 return token; 270 306 } … … 299 335 private void ParseValues() { 300 336 while (tokenizer.HasNext()) { 301 List<double> row = new List<double>(); 302 row.Add(NextValue(tokenizer)); 303 while (tokenizer.HasNext() && tokenizer.Peek() == tokenizer.SeparatorToken) { 304 Expect(tokenizer.SeparatorToken); 305 row.Add(NextValue(tokenizer)); 306 } 307 Expect(tokenizer.NewlineToken); 308 // all rows have to have the same number of values 309 // the first row defines how many samples are needed 310 if (rowValues.Count > 0 && rowValues[0].Count != row.Count) { 311 Error("The first row of the dataset has " + rowValues[0].Count + " columns." + 312 "\nLine " + tokenizer.CurrentLineNumber + " has " + row.Count + " columns.", "", tokenizer.CurrentLineNumber); 313 } 314 // add the current row to the collection of rows and start a new row 315 rowValues.Add(row); 316 row = new List<double>(); 317 } 318 } 319 320 private double NextValue(Tokenizer tokenizer) { 321 if (tokenizer.Peek() == tokenizer.SeparatorToken || tokenizer.Peek() == tokenizer.NewlineToken) return double.NaN; 337 if (tokenizer.Peek() == tokenizer.NewlineToken) { 338 tokenizer.Next(); 339 } else { 340 List<object> row = new List<object>(); 341 object value = NextValue(tokenizer); 342 row.Add(value); 343 while (tokenizer.HasNext() && tokenizer.Peek() == tokenizer.SeparatorToken) { 344 Expect(tokenizer.SeparatorToken); 345 row.Add(NextValue(tokenizer)); 346 } 347 Expect(tokenizer.NewlineToken); 348 // all rows have to have the same number of values 349 // the first row defines how many samples are needed 350 if (rowValues.Count > 0 && rowValues[0].Count != row.Count) { 351 Error("The first row of the dataset has " + rowValues[0].Count + " columns." + 352 "\nLine " + tokenizer.CurrentLineNumber + " has " + row.Count + " columns.", "", 353 tokenizer.CurrentLineNumber); 354 } 355 rowValues.Add(row); 356 } 357 } 358 } 359 360 private object NextValue(Tokenizer tokenizer) { 361 if (tokenizer.Peek() == tokenizer.SeparatorToken || tokenizer.Peek() == tokenizer.NewlineToken) return string.Empty; 322 362 Token current = tokenizer.Next(); 323 if (current.type == TokenTypeEnum.Separator || current.type == TokenTypeEnum.String) {363 if (current.type == TokenTypeEnum.Separator) { 324 364 return double.NaN; 365 } else if (current.type == TokenTypeEnum.String) { 366 return current.stringValue; 325 367 } else if (current.type == TokenTypeEnum.Double) { 326 // just take the value327 368 return current.doubleValue; 369 } else if (current.type == TokenTypeEnum.DateTime) { 370 return current.dateTimeValue; 328 371 } 329 372 // found an unexpected token => throw error … … 334 377 335 378 private void ParseVariableNames() { 336 // if the first line doesn't start with a double value then we assume that the 337 // first line contains variable names 338 if (tokenizer.HasNext() && tokenizer.Peek().type != TokenTypeEnum.Double) { 339 340 List<Token> tokens = new List<Token>(); 341 Token valueToken; 379 //if first token is double no variables names are given 380 if (tokenizer.Peek().type == TokenTypeEnum.Double) return; 381 382 // the first line must contain variable names 383 List<Token> tokens = new List<Token>(); 384 Token valueToken; 385 valueToken = tokenizer.Next(); 386 tokens.Add(valueToken); 387 while (tokenizer.HasNext() && tokenizer.Peek() == tokenizer.SeparatorToken) { 388 Expect(tokenizer.SeparatorToken); 342 389 valueToken = tokenizer.Next(); 343 tokens.Add(valueToken);344 while (tokenizer.HasNext() && tokenizer.Peek() == tokenizer.SeparatorToken) {345 Expect(tokenizer.SeparatorToken);346 valueToken = tokenizer.Next();347 if (valueToken != tokenizer.NewlineToken) {348 tokens.Add(valueToken);349 }350 }351 390 if (valueToken != tokenizer.NewlineToken) { 352 Expect(tokenizer.NewlineToken); 353 } 354 variableNames = tokens.Select(x => x.stringValue.Trim()).ToList(); 355 } 391 tokens.Add(valueToken); 392 } 393 } 394 if (valueToken != tokenizer.NewlineToken) { 395 Expect(tokenizer.NewlineToken); 396 } 397 variableNames = tokens.Select(x => x.stringValue.Trim()).ToList(); 356 398 } 357 399
Note: See TracChangeset
for help on using the changeset viewer.