- Timestamp:
- 09/25/12 16:16:23 (12 years ago)
- Location:
- branches/DataAnalysisCSVImport
- Files:
-
- 2 added
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysisCSVImport
-
Property
svn:ignore
set to
*.suo
-
Property
svn:ignore
set to
-
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs
r8693 r8701 92 92 } 93 93 94 public override IClassificationProblemData ImportData(string path, DataAnalysisImportType type) { 95 TableFileParser csvFileParser = new TableFileParser(); 96 csvFileParser.Parse(path); 97 94 protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) { 98 95 int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100; 99 96 List<IList> values = csvFileParser.Values; … … 126 123 } 127 124 125 protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, int trainingPartEnd) { 126 target = 5; 127 IList targetValues = values[target]; 128 var group = targetValues.Cast<double>().GroupBy(x => x).Select(g => new { Key = g.Key, Count = g.Count() }).ToList(); 129 Dictionary<double, double> taken = new Dictionary<double, double>(); 130 foreach (var classCount in group) { 131 taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100; 132 } 133 134 List<IList> training = GetListOfIListCopy(values); 135 List<IList> test = GetListOfIListCopy(values); 136 137 for (int i = 0; i < targetValues.Count; i++) { 138 if (taken[(double)targetValues[i]] > 0) { 139 AddRow(training, values, i); 140 taken[(double)targetValues[i]]--; 141 } else { 142 AddRow(test, values, i); 143 } 144 } 145 146 training = Shuffle(training); 147 test = Shuffle(test); 148 for (int i = 0; i < training.Count; i++) { 149 for (int j = 0; j < test[i].Count; j++) { 150 training[i].Add(test[i][j]); 151 } 152 } 153 154 return training; 155 } 156 157 private void AddRow(List<IList> destination, List<IList> source, int index) { 158 for (int i = 0; i < source.Count; i++) { 159 destination[i].Add(source[i][index]); 160 } 161 } 162 163 private List<IList> GetListOfIListCopy(List<IList> values) { 164 List<IList> newList = new List<IList>(values.Count); 165 for (int col = 0; col < values.Count; col++) { 166 167 if (values[col] is List<double>) 168 newList.Add(new List<double>()); 169 else if (values[col] is List<DateTime>) 170 newList.Add(new List<DateTime>()); 171 else if (values[col] is List<string>) 172 newList.Add(new List<string>()); 173 else 174 throw new InvalidOperationException(); 175 } 176 return newList; 177 } 178 179 private List<IList> NormalizeClasses(List<IList> values) { 180 int column = GetLastDoubleColumn(values); 181 Dictionary<object, int> count = new Dictionary<object, int>(); 182 foreach (var item in values[column]) { 183 if (count.Keys.Contains(item)) { 184 count[item]++; 185 } else { 186 count.Add(item, 1); 187 } 188 } 189 int min = count.Values.Min(); 190 Dictionary<object, int> taken = new Dictionary<object, int>(); 191 foreach (var key in count.Keys) { 192 taken[key] = 0; 193 } 194 List<IList> normalizedValues = new List<IList>(values.Count); 195 for (int col = 0; col < values.Count; col++) { 196 197 if (values[col] is List<double>) 198 normalizedValues.Add(new List<double>()); 199 else if (values[col] is List<DateTime>) 200 normalizedValues.Add(new List<DateTime>()); 201 else if (values[col] is List<string>) 202 normalizedValues.Add(new List<string>()); 203 else 204 throw new InvalidOperationException(); 205 } 206 for (int i = 0; i < values.First().Count; i++) { 207 if (taken[values[column][i]] < min) { 208 taken[values[column][i]]++; 209 for (int col = 0; col < values.Count; col++) { 210 normalizedValues[col].Add(values[col][i]); 211 } 212 } 213 } 214 return normalizedValues; 215 } 216 217 private int GetLastDoubleColumn(List<IList> values) { 218 for (int i = values.Count - 1; i >= 0; i--) { 219 if (values[i] is List<double>) { 220 return i; 221 } 222 } 223 throw new ArgumentException("No possible Target Variable could be found!"); 224 } 225 128 226 public override bool CanExportData { 129 227 get { return true; } -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ClassificationInstanceProvider.cs
r8598 r8701 23 23 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public abstract class ClassificationInstanceProvider : DataAnalysisInstanceProvider<IClassificationProblemData > {25 public abstract class ClassificationInstanceProvider : DataAnalysisInstanceProvider<IClassificationProblemData, ClassificationImportType> { 26 26 } 27 27 } -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs
r8685 r8701 91 91 } 92 92 93 public override IClusteringProblemData ImportData(string path, DataAnalysisImportType type) { 94 TableFileParser csvFileParser = new TableFileParser(); 95 csvFileParser.Parse(path); 96 93 protected override IClusteringProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) { 97 94 List<IList> values = csvFileParser.Values; 98 95 if (type.Shuffle) { -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/ClusteringInstanceProvider.cs
r8598 r8701 23 23 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public abstract class ClusteringInstanceProvider : DataAnalysisInstanceProvider<IClusteringProblemData > {25 public abstract class ClusteringInstanceProvider : DataAnalysisInstanceProvider<IClusteringProblemData, DataAnalysisImportType> { 26 26 } 27 27 } -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/DataAnalysisInstanceProvider.cs
r8598 r8701 27 27 28 28 namespace HeuristicLab.Problems.Instances.DataAnalysis { 29 public abstract class DataAnalysisInstanceProvider<TData> : ProblemInstanceProvider<TData> 30 where TData : class, IDataAnalysisProblemData { 29 public abstract class DataAnalysisInstanceProvider<TData, ImportType> : ProblemInstanceProvider<TData> 30 where TData : class, IDataAnalysisProblemData 31 where ImportType : DataAnalysisImportType { 31 32 32 33 // has to be implemented, if CanImportData is true 33 public virtual TData ImportData(string path, DataAnalysisImportType type) { 34 public TData ImportData(string path, ImportType type) { 35 TableFileParser csvFileParser = new TableFileParser(); 36 csvFileParser.Parse(path); 37 return ImportData(path, type, csvFileParser); 38 } 39 public TData ImportData(string path, ImportType type, DataAnalysisCSVFormat csvFormat) { 40 TableFileParser csvFileParser = new TableFileParser(); 41 csvFileParser.Parse(path, csvFormat.NumberFormatInfo, csvFormat.DateTimeFormatInfo, csvFormat.Separator); 42 return ImportData(path, type, csvFileParser); 43 } 44 protected virtual TData ImportData(string path, ImportType type, TableFileParser csvFileParser) { 34 45 throw new NotSupportedException(); 35 46 } -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj
r8693 r8701 18 18 <DebugType>full</DebugType> 19 19 <Optimize>false</Optimize> 20 <OutputPath>..\..\ bin\</OutputPath>20 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 21 21 <DefineConstants>DEBUG;TRACE</DefineConstants> 22 22 <ErrorReport>prompt</ErrorReport> … … 26 26 <DebugType>pdbonly</DebugType> 27 27 <Optimize>true</Optimize> 28 <OutputPath>..\..\ bin\</OutputPath>28 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 29 29 <DefineConstants>TRACE</DefineConstants> 30 30 <ErrorReport>prompt</ErrorReport> … … 39 39 <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'"> 40 40 <DebugSymbols>true</DebugSymbols> 41 <OutputPath>..\..\ bin\</OutputPath>41 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 42 42 <DefineConstants>DEBUG;TRACE</DefineConstants> 43 43 <DebugType>full</DebugType> … … 52 52 </PropertyGroup> 53 53 <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'"> 54 <OutputPath>..\..\ bin\</OutputPath>54 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 55 55 <DefineConstants>TRACE</DefineConstants> 56 56 <Optimize>true</Optimize> … … 69 69 <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'"> 70 70 <DebugSymbols>true</DebugSymbols> 71 <OutputPath>..\..\ bin\</OutputPath>71 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 72 72 <DefineConstants>DEBUG;TRACE</DefineConstants> 73 73 <DebugType>full</DebugType> … … 82 82 </PropertyGroup> 83 83 <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'"> 84 <OutputPath>..\..\ bin\</OutputPath>84 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 85 85 <DefineConstants>TRACE</DefineConstants> 86 86 <Optimize>true</Optimize> … … 98 98 </PropertyGroup> 99 99 <ItemGroup> 100 <Reference Include="HeuristicLab.Collections-3.3"> 101 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Collections-3.3.dll</HintPath> 102 <Private>False</Private> 103 </Reference> 104 <Reference Include="HeuristicLab.Common-3.3"> 105 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Common-3.3.dll</HintPath> 106 <Private>False</Private> 107 </Reference> 108 <Reference Include="HeuristicLab.Core-3.3"> 109 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Core-3.3.dll</HintPath> 110 <Private>False</Private> 111 </Reference> 112 <Reference Include="HeuristicLab.Data-3.3"> 113 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Data-3.3.dll</HintPath> 114 <Private>False</Private> 115 </Reference> 116 <Reference Include="HeuristicLab.PluginInfrastructure-3.3"> 117 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.PluginInfrastructure-3.3.dll</HintPath> 118 <Private>False</Private> 119 </Reference> 120 <Reference Include="HeuristicLab.Problems.DataAnalysis-3.4"> 121 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath> 122 <Private>False</Private> 123 </Reference> 124 <Reference Include="HeuristicLab.Problems.Instances-3.3"> 125 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath> 126 <Private>False</Private> 127 </Reference> 128 <Reference Include="HeuristicLab.Random-3.3"> 129 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Random-3.3.dll</HintPath> 130 <Private>False</Private> 131 </Reference> 100 132 <Reference Include="ICSharpCode.SharpZipLib"> 101 <HintPath>..\..\ HeuristicLab.PluginInfrastructure\3.3\ICSharpCode.SharpZipLib.dll</HintPath>133 <HintPath>..\..\..\..\trunk\sources\bin\ICSharpCode.SharpZipLib.dll</HintPath> 102 134 <Private>False</Private> 103 135 </Reference> … … 117 149 <Compile Include="Classification\ClassificationInstanceProvider.cs" /> 118 150 <Compile Include="Classification\CSV\ClassifiactionCSVInstanceProvider.cs" /> 151 <Compile Include="Classification\ClassificationImportType.cs" /> 119 152 <Compile Include="Classification\ResourceClassificationDataDescriptor.cs" /> 120 153 <Compile Include="Classification\ResourceClassificationInstanceProvider.cs" /> … … 124 157 <Compile Include="Clustering\ClusteringInstanceProvider.cs" /> 125 158 <Compile Include="Clustering\CSV\ClusteringCSVInstanceProvider.cs" /> 159 <Compile Include="DataAnalysisCSVFormat.cs" /> 126 160 <Compile Include="DataAnalysisImportType.cs" /> 127 161 <Compile Include="DataAnalysisInstanceProvider.cs" /> … … 205 239 <EmbeddedResource Include="Regression\Data\RegressionRealWorld.zip" /> 206 240 </ItemGroup> 207 <ItemGroup>208 <ProjectReference Include="..\..\HeuristicLab.Collections\3.3\HeuristicLab.Collections-3.3.csproj">209 <Project>{958B43BC-CC5C-4FA2-8628-2B3B01D890B6}</Project>210 <Name>HeuristicLab.Collections-3.3</Name>211 <Private>False</Private>212 </ProjectReference>213 <ProjectReference Include="..\..\HeuristicLab.Common\3.3\HeuristicLab.Common-3.3.csproj">214 <Project>{A9AD58B9-3EF9-4CC1-97E5-8D909039FF5C}</Project>215 <Name>HeuristicLab.Common-3.3</Name>216 <Private>False</Private>217 </ProjectReference>218 <ProjectReference Include="..\..\HeuristicLab.Core\3.3\HeuristicLab.Core-3.3.csproj">219 <Project>{C36BD924-A541-4A00-AFA8-41701378DDC5}</Project>220 <Name>HeuristicLab.Core-3.3</Name>221 <Private>False</Private>222 </ProjectReference>223 <ProjectReference Include="..\..\HeuristicLab.Data\3.3\HeuristicLab.Data-3.3.csproj">224 <Project>{BBAB9DF5-5EF3-4BA8-ADE9-B36E82114937}</Project>225 <Name>HeuristicLab.Data-3.3</Name>226 <Private>False</Private>227 </ProjectReference>228 <ProjectReference Include="..\..\HeuristicLab.PluginInfrastructure\3.3\HeuristicLab.PluginInfrastructure-3.3.csproj">229 <Project>{94186A6A-5176-4402-AE83-886557B53CCA}</Project>230 <Name>HeuristicLab.PluginInfrastructure-3.3</Name>231 <Private>False</Private>232 </ProjectReference>233 <ProjectReference Include="..\..\HeuristicLab.Problems.DataAnalysis\3.4\HeuristicLab.Problems.DataAnalysis-3.4.csproj">234 <Project>{DF87C13E-A889-46FF-8153-66DCAA8C5674}</Project>235 <Name>HeuristicLab.Problems.DataAnalysis-3.4</Name>236 <Private>False</Private>237 </ProjectReference>238 <ProjectReference Include="..\..\HeuristicLab.Problems.Instances\3.3\HeuristicLab.Problems.Instances-3.3.csproj">239 <Project>{3540E29E-4793-49E7-8EE2-FEA7F61C3994}</Project>240 <Name>HeuristicLab.Problems.Instances-3.3</Name>241 <Private>False</Private>242 </ProjectReference>243 <ProjectReference Include="..\..\HeuristicLab.Random\3.3\HeuristicLab.Random-3.3.csproj">244 <Project>{F4539FB6-4708-40C9-BE64-0A1390AEA197}</Project>245 <Name>HeuristicLab.Random-3.3</Name>246 <Private>False</Private>247 </ProjectReference>248 </ItemGroup>249 241 <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> 250 242 <PropertyGroup> -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs
r8685 r8701 86 86 } 87 87 88 public override IRegressionProblemData ImportData(string path, DataAnalysisImportType type) { 89 TableFileParser csvFileParser = new TableFileParser(); 90 csvFileParser.Parse(path); 91 88 protected override IRegressionProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) { 92 89 List<IList> values = csvFileParser.Values; 93 90 if (type.Shuffle) { -
branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/RegressionInstanceProvider.cs
r8598 r8701 23 23 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public abstract class RegressionInstanceProvider : DataAnalysisInstanceProvider<IRegressionProblemData > {25 public abstract class RegressionInstanceProvider : DataAnalysisInstanceProvider<IRegressionProblemData, DataAnalysisImportType> { 26 26 } 27 27 }
Note: See TracChangeset
for help on using the changeset viewer.