Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/16/13 13:13:41 (11 years ago)
Author:
spimming
Message:

#1888:

  • Merged revisions from trunk
Location:
branches/OaaS
Files:
2 deleted
7 edited
4 copied

Legend:

Unmodified
Added
Removed
  • branches/OaaS

  • branches/OaaS/HeuristicLab.Problems.Instances.DataAnalysis

    • Property svn:mergeinfo set to (toggle deleted branches)
      /trunk/sources/HeuristicLab.Problems.Instances.DataAnalysismergedeligible
      /branches/Algorithms.GradientDescent/HeuristicLab.Problems.Instances.DataAnalysis5516-5520
      /branches/Benchmarking/sources/HeuristicLab.Problems.Instances.DataAnalysis6917-7005
      /branches/CloningRefactoring/HeuristicLab.Problems.Instances.DataAnalysis4656-4721
      /branches/DataAnalysis Refactoring/HeuristicLab.Problems.Instances.DataAnalysis5471-5808
      /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Problems.Instances.DataAnalysis5815-6180
      /branches/DataAnalysis/HeuristicLab.Problems.Instances.DataAnalysis4458-4459,​4462,​4464
      /branches/DataAnalysisCSVImport/HeuristicLab.Problems.Instances.DataAnalysis8695-8875
      /branches/GP.Grammar.Editor/HeuristicLab.Problems.Instances.DataAnalysis6284-6795
      /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Problems.Instances.DataAnalysis5060
      /branches/HeuristicLab.TimeSeries/HeuristicLab.Problems.Instances.DataAnalysis7889-8789
      /branches/NET40/sources/HeuristicLab.Problems.Instances.DataAnalysis5138-5162
      /branches/ParallelEngine/HeuristicLab.Problems.Instances.DataAnalysis5175-5192
      /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Problems.Instances.DataAnalysis7568-7810
      /branches/QAPAlgorithms/HeuristicLab.Problems.Instances.DataAnalysis6350-6627
      /branches/Restructure trunk solution/HeuristicLab.Problems.Instances.DataAnalysis6828
      /branches/RuntimeOptimizer/HeuristicLab.Problems.Instances.DataAnalysis8943-9078
      /branches/ScatterSearch (trunk integration)/HeuristicLab.Problems.Instances.DataAnalysis7787-8333
      /branches/SlaveShutdown/HeuristicLab.Problems.Instances.DataAnalysis8944-8956
      /branches/SuccessProgressAnalysis/HeuristicLab.Problems.Instances.DataAnalysis5370-5682
      /branches/Trunk/HeuristicLab.Problems.Instances.DataAnalysis6829-6865
      /branches/UnloadJobs/HeuristicLab.Problems.Instances.DataAnalysis9168-9215
      /branches/VNS/HeuristicLab.Problems.Instances.DataAnalysis5594-5752
      /branches/histogram/HeuristicLab.Problems.Instances.DataAnalysis5959-6341
  • branches/OaaS/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ArtificialClassificationDataDescriptor.cs

    r7849 r9363  
    2929
    3030    protected abstract string TargetVariable { get; }
    31     protected abstract string[] InputVariables { get; }
     31    protected abstract string[] VariableNames { get; }
    3232    protected abstract string[] AllowedInputVariables { get; }
    3333    protected abstract int TrainingPartitionStart { get; }
     
    3737
    3838    public IClassificationProblemData GenerateClassificationData() {
    39       Dataset dataset = new Dataset(InputVariables, this.GenerateValues());
     39      Dataset dataset = new Dataset(VariableNames, this.GenerateValues());
    4040
    4141      ClassificationProblemData claData = new ClassificationProblemData(dataset, AllowedInputVariables, TargetVariable);
  • branches/OaaS/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8211 r9363  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
    2425using System.IO;
    2526using System.Linq;
    26 using System.Text;
     27using HeuristicLab.Common;
    2728using HeuristicLab.Problems.DataAnalysis;
    2829
     
    6162
    6263      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    63       string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
    64       IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     64      string targetVar = dataset.DoubleVariables.Last();
    6565
    66       ClassificationProblemData claData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
    67 
    68       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    69       claData.TrainingPartition.Start = 0;
    70       claData.TrainingPartition.End = trainingPartEnd;
    71       claData.TestPartition.Start = trainingPartEnd;
    72       claData.TestPartition.End = csvFileParser.Rows;
    73       int pos = path.LastIndexOf('\\');
    74       if (pos < 0)
    75         claData.Name = path;
    76       else {
    77         pos++;
    78         claData.Name = path.Substring(pos, path.Length - pos);
     66      // turn of input variables that are constant in the training partition
     67      var allowedInputVars = new List<string>();
     68      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     69      if (trainingIndizes.Count() >= 2) {
     70        foreach (var variableName in dataset.DoubleVariables) {
     71          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     72            variableName != targetVar)
     73            allowedInputVars.Add(variableName);
     74        }
     75      } else {
     76        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
    7977      }
    8078
    81       return claData;
     79      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     80
     81      int trainingPartEnd = trainingIndizes.Last();
     82      classificationData.TrainingPartition.Start = trainingIndizes.First();
     83      classificationData.TrainingPartition.End = trainingPartEnd;
     84      classificationData.TestPartition.Start = trainingPartEnd;
     85      classificationData.TestPartition.End = csvFileParser.Rows;
     86
     87      classificationData.Name = Path.GetFileName(path);
     88
     89      return classificationData;
    8290    }
    8391
    84     public override bool CanExportData {
    85       get { return true; }
    86     }
    87     public override void ExportData(IClassificationProblemData instance, string path) {
    88       StringBuilder strBuilder = new StringBuilder();
    89 
    90       foreach (var variable in instance.InputVariables) {
    91         strBuilder.Append(variable + ";");
    92       }
    93       strBuilder.Remove(strBuilder.Length - 1, 1);
    94       strBuilder.AppendLine();
    95 
    96       Dataset dataset = instance.Dataset;
    97 
    98       for (int i = 0; i < dataset.Rows; i++) {
    99         for (int j = 0; j < dataset.Columns; j++) {
    100           strBuilder.Append(dataset.GetValue(i, j) + ";");
     92    protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) {
     93      int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100;
     94      List<IList> values = csvFileParser.Values;
     95      if (type.Shuffle) {
     96        values = Shuffle(values);
     97        if (type.UniformlyDistributeClasses) {
     98          values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)),
     99                           type.TrainingPercentage, out trainingPartEnd);
    101100        }
    102         strBuilder.Remove(strBuilder.Length - 1, 1);
    103         strBuilder.AppendLine();
    104101      }
    105102
    106       using (StreamWriter writer = new StreamWriter(path)) {
    107         writer.Write(strBuilder);
     103      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
     104
     105      // turn of input variables that are constant in the training partition
     106      var allowedInputVars = new List<string>();
     107      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
     108      if (trainingIndizes.Count() >= 2) {
     109        foreach (var variableName in dataset.DoubleVariables) {
     110          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     111            variableName != type.TargetVariable)
     112            allowedInputVars.Add(variableName);
     113        }
     114      } else {
     115        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
    108116      }
     117
     118      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable);
     119
     120      classificationData.TrainingPartition.Start = 0;
     121      classificationData.TrainingPartition.End = trainingPartEnd;
     122      classificationData.TestPartition.Start = trainingPartEnd;
     123      classificationData.TestPartition.End = csvFileParser.Rows;
     124
     125      classificationData.Name = Path.GetFileName(path);
     126
     127      return classificationData;
     128    }
     129
     130    protected List<IList> Shuffle(List<IList> values, int target, int trainingPercentage, out int trainingPartEnd) {
     131      IList targetValues = values[target];
     132      var group = targetValues.Cast<double>().GroupBy(x => x).Select(g => new { Key = g.Key, Count = g.Count() }).ToList();
     133      Dictionary<double, double> taken = new Dictionary<double, double>();
     134      foreach (var classCount in group) {
     135        taken[classCount.Key] = (classCount.Count * trainingPercentage) / 100.0;
     136      }
     137
     138      List<IList> training = GetListOfIListCopy(values);
     139      List<IList> test = GetListOfIListCopy(values);
     140
     141      for (int i = 0; i < targetValues.Count; i++) {
     142        if (taken[(double)targetValues[i]] > 0) {
     143          AddRow(training, values, i);
     144          taken[(double)targetValues[i]]--;
     145        } else {
     146          AddRow(test, values, i);
     147        }
     148      }
     149
     150      trainingPartEnd = training.First().Count;
     151
     152      for (int i = 0; i < training.Count; i++) {
     153        for (int j = 0; j < test[i].Count; j++) {
     154          training[i].Add(test[i][j]);
     155        }
     156      }
     157
     158      return training;
     159    }
     160
     161    private void AddRow(List<IList> destination, List<IList> source, int index) {
     162      for (int i = 0; i < source.Count; i++) {
     163        destination[i].Add(source[i][index]);
     164      }
     165    }
     166
     167    private List<IList> GetListOfIListCopy(List<IList> values) {
     168      List<IList> newList = new List<IList>(values.Count);
     169      foreach (IList t in values) {
     170        if (t is List<double>)
     171          newList.Add(new List<double>());
     172        else if (t is List<DateTime>)
     173          newList.Add(new List<DateTime>());
     174        else if (t is List<string>)
     175          newList.Add(new List<string>());
     176        else
     177          throw new InvalidOperationException();
     178      }
     179      return newList;
    109180    }
    110181  }
  • branches/OaaS/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ClassificationInstanceProvider.cs

    r8192 r9363  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public abstract class ClassificationInstanceProvider : ProblemInstanceProvider<IClassificationProblemData> {
     25  public abstract class ClassificationInstanceProvider : DataAnalysisInstanceProvider<IClassificationProblemData, ClassificationImportType> {
    2626  }
    2727}
  • branches/OaaS/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ResourceClassificationDataDescriptor.cs

    r7849 r9363  
    2020#endregion
    2121
     22using System.Collections.Generic;
     23using System.Linq;
    2224
    2325namespace HeuristicLab.Problems.Instances.DataAnalysis {
    24   internal class ResourceClassificationDataDescriptor : IDataDescriptor {
    25     public string Name { get; internal set; }
    26     public string Description { get; internal set; }
     26  public abstract class ResourceClassificationDataDescriptor : ClassificationDataDescriptor {
     27    internal string ResourceName { get; set; }
    2728
    28     internal string ResourceName { get; set; }
    29     internal ResourceClassificationDataDescriptor(string name, string description, string resourceName) {
    30       Name = name;
    31       Description = description;
    32       ResourceName = resourceName;
     29    public bool CheckVariableNames(IEnumerable<string> VariableNames) {
     30      return this.VariableNames.All(x => VariableNames.Contains(x));
    3331    }
    3432  }
  • branches/OaaS/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ResourceClassificationInstanceProvider.cs

    r7965 r9363  
    2121
    2222using System;
    23 using System.Collections.Generic;
    2423using System.Globalization;
    2524using System.IO;
     
    3433
    3534    protected abstract string FileName { get; }
    36 
    37     public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
    38       var solutionsArchiveName = GetResourceName(FileName + @"\.zip");
    39       if (!String.IsNullOrEmpty(solutionsArchiveName)) {
    40         using (var solutionsZipFile = new ZipInputStream(GetType().Assembly.GetManifestResourceStream(solutionsArchiveName))) {
    41           IList<string> entries = new List<string>();
    42           ZipEntry curEntry;
    43           while ((curEntry = solutionsZipFile.GetNextEntry()) != null) {
    44             entries.Add(curEntry.Name);
    45           }
    46           foreach (var entry in entries.OrderBy(x => x)) {
    47             yield return new ResourceClassificationDataDescriptor(Path.GetFileNameWithoutExtension(entry), Description, entry);
    48           }
    49         }
    50       }
    51     }
    5235
    5336    public override IClassificationProblemData LoadData(IDataDescriptor id) {
     
    7053
    7154        Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    72         string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
    73         IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     55        if (!descriptor.CheckVariableNames(csvFileParser.VariableNames)) {
     56          throw new ArgumentException("Parsed file contains variables which are not in the descriptor.");
     57        }
    7458
    75         ClassificationProblemData claData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
    76 
    77         int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    78         claData.TrainingPartition.Start = 0;
    79         claData.TrainingPartition.End = trainingPartEnd;
    80         claData.TestPartition.Start = trainingPartEnd;
    81         claData.TestPartition.End = csvFileParser.Rows;
    82 
    83         claData.Name = descriptor.Name;
    84         claData.Description = descriptor.Description;
    85         return claData;
     59        return descriptor.GenerateClassificationData(dataset);
    8660      }
    8761    }
Note: See TracChangeset for help on using the changeset viewer.