Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/08/12 16:43:08 (11 years ago)
Author:
sforsten
Message:

#1942:

  • implemented changes suggested by mkommend in comment:15:ticket:1942 except the first remark
  • TimeSeriesPrognosisInstanceProvider has been adapted to work similar to other DataAnalysisInstanceProvider, also views have been created for it
Location:
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3
Files:
1 added
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8877 r8885  
    9494      List<IList> values = csvFileParser.Values;
    9595      if (type.Shuffle) {
    96         values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)),
    97                          type.Training, out trainingPartEnd);
     96        values = Shuffle(values);
     97        if (type.UniformlyDistributeClasses) {
     98          values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)),
     99                           type.Training, out trainingPartEnd);
     100        }
    98101      }
    99102
     
    147150      trainingPartEnd = training.First().Count;
    148151
    149       training = Shuffle(training);
    150       test = Shuffle(test);
    151152      for (int i = 0; i < training.Count; i++) {
    152153        for (int j = 0; j < test[i].Count; j++) {
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ClassificationImportType.cs

    r8877 r8885  
    2323  public class ClassificationImportType : DataAnalysisImportType {
    2424    public string TargetVariable { get; set; }
     25    public bool UniformlyDistributeClasses { get; set; }
    2526  }
    2627}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj

    r8877 r8885  
    203203    <Compile Include="TableFileParser.cs" />
    204204    <Compile Include="TimeSeries\CSV\TimeSeriesPrognosisCSVInstanceProvider.cs" />
     205    <Compile Include="TimeSeries\TimeSeriesPrognosisImportType.cs" />
    205206    <Compile Include="TimeSeries\TimeSeriesPrognosisInstanceProvider.cs" />
    206207  </ItemGroup>
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs

    r8564 r8885  
    8383      DateTimeFormatInfo dateTimeFormatInfo;
    8484      char separator;
    85       DetermineFileFormat(new FileStream(fileName, FileMode.Open), out numberFormat, out dateTimeFormatInfo, out separator);
    86       Parse(new FileStream(fileName, FileMode.Open), numberFormat, dateTimeFormatInfo, separator);
     85      DetermineFileFormat(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), out numberFormat, out dateTimeFormatInfo, out separator);
     86      Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator);
    8787    }
    8888
     
    9595    /// <param name="separator">defines the separator</param>
    9696    public void Parse(string fileName, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator) {
    97       Parse(new FileStream(fileName, FileMode.Open), numberFormat, dateTimeFormatInfo, separator);
     97      Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator);
    9898    }
    9999
     
    163163
    164164    public static void DetermineFileFormat(string path, out NumberFormatInfo numberFormat, out DateTimeFormatInfo dateTimeFormatInfo, out char separator) {
    165       DetermineFileFormat(new FileStream(path, FileMode.Open), out numberFormat, out dateTimeFormatInfo, out separator);
     165      DetermineFileFormat(new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), out numberFormat, out dateTimeFormatInfo, out separator);
    166166    }
    167167
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TimeSeries/CSV/TimeSeriesPrognosisCSVInstanceProvider.cs

    r7890 r8885  
    2020#endregion
    2121
     22using System;
     23using System.Collections;
     24using System.Collections.Generic;
     25using System.IO;
     26using System.Linq;
     27using HeuristicLab.Common;
     28using HeuristicLab.Problems.DataAnalysis;
    2229
    23 using System;
    24 using System.Collections.Generic;
    25 using HeuristicLab.Problems.DataAnalysis;
    2630namespace HeuristicLab.Problems.Instances.DataAnalysis {
    2731  public class TimeSeriesPrognosisCSVInstanceProvider : TimeSeriesPrognosisInstanceProvider {
     
    4852      throw new NotImplementedException();
    4953    }
     54
     55    public override bool CanImportData { get { return true; } }
     56
     57    public override ITimeSeriesPrognosisProblemData ImportData(string path) {
     58      TableFileParser csvFileParser = new TableFileParser();
     59      csvFileParser.Parse(path);
     60
     61      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     62      string targetVar = csvFileParser.VariableNames.Last();
     63
     64      IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     65
     66      ITimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, targetVar);
     67
     68      int trainingPartEnd = csvFileParser.Rows * 2 / 3;
     69      timeSeriesPrognosisData.TrainingPartition.Start = 0;
     70      timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd;
     71      timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd;
     72      timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows;
     73
     74      int pos = path.LastIndexOf('\\');
     75      if (pos < 0)
     76        timeSeriesPrognosisData.Name = path;
     77      else {
     78        pos++;
     79        timeSeriesPrognosisData.Name = path.Substring(pos, path.Length - pos);
     80      }
     81      return timeSeriesPrognosisData;
     82    }
     83
     84    protected override ITimeSeriesPrognosisProblemData ImportData(string path, TimeSeriesPrognosisImportType type, TableFileParser csvFileParser) {
     85      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     86
     87      // turn of input variables that are constant in the training partition
     88      var allowedInputVars = new List<string>();
     89      int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100;
     90      trainingPartEnd = trainingPartEnd > 0 ? trainingPartEnd : 1;
     91      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
     92      if (trainingIndizes.Count() >= 2) {
     93        foreach (var variableName in dataset.DoubleVariables) {
     94          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     95            variableName != type.TargetVariable)
     96            allowedInputVars.Add(variableName);
     97        }
     98      } else {
     99        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
     100      }
     101
     102      TimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, type.TargetVariable);
     103
     104      timeSeriesPrognosisData.TrainingPartition.Start = 0;
     105      timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd;
     106      timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd;
     107      timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows;
     108
     109      timeSeriesPrognosisData.Name = Path.GetFileName(path);
     110
     111      return timeSeriesPrognosisData;
     112    }
    50113  }
    51114}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TimeSeries/TimeSeriesPrognosisInstanceProvider.cs

    r8430 r8885  
    2020#endregion
    2121
    22 using System;
    23 using System.Collections.Generic;
    24 using System.IO;
    25 using System.Linq;
    26 using System.Text;
    2722using HeuristicLab.Problems.DataAnalysis;
    2823
    2924namespace HeuristicLab.Problems.Instances.DataAnalysis {
    30   public abstract class TimeSeriesPrognosisInstanceProvider : IProblemInstanceProvider<ITimeSeriesPrognosisProblemData> {
    31     public bool CanImportData { get { return true; } }
    32     public bool CanExportData { get { return true; } }
    33 
    34 
    35     public ITimeSeriesPrognosisProblemData ImportData(string path) {
    36       TableFileParser csvFileParser = new TableFileParser();
    37       csvFileParser.Parse(path);
    38 
    39       Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    40       string targetVar = csvFileParser.VariableNames.Last();
    41 
    42       IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
    43 
    44       ITimeSeriesPrognosisProblemData regData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, targetVar);
    45 
    46       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    47       regData.TrainingPartition.Start = 0;
    48       regData.TrainingPartition.End = trainingPartEnd;
    49       regData.TestPartition.Start = trainingPartEnd;
    50       regData.TestPartition.End = csvFileParser.Rows;
    51 
    52       int pos = path.LastIndexOf('\\');
    53       if (pos < 0)
    54         regData.Name = path;
    55       else {
    56         pos++;
    57         regData.Name = path.Substring(pos, path.Length - pos);
    58       }
    59       return regData;
    60     }
    61 
    62     public void ExportData(ITimeSeriesPrognosisProblemData instance, string path) {
    63       StringBuilder strBuilder = new StringBuilder();
    64 
    65       foreach (var variable in instance.InputVariables) {
    66         strBuilder.Append(variable + ";");
    67       }
    68       strBuilder.Remove(strBuilder.Length - 1, 1);
    69       strBuilder.AppendLine();
    70 
    71       Dataset dataset = instance.Dataset;
    72 
    73       for (int i = 0; i < dataset.Rows; i++) {
    74         for (int j = 0; j < dataset.Columns; j++) {
    75           strBuilder.Append(dataset.GetValue(i, j) + ";");
    76         }
    77         strBuilder.Remove(strBuilder.Length - 1, 1);
    78         strBuilder.AppendLine();
    79       }
    80 
    81       using (StreamWriter writer = new StreamWriter(path)) {
    82         writer.Write(strBuilder);
    83       }
    84     }
    85 
    86     public abstract IEnumerable<IDataDescriptor> GetDataDescriptors();
    87     public abstract ITimeSeriesPrognosisProblemData LoadData(IDataDescriptor descriptor);
    88 
    89     public abstract string Name { get; }
    90     public abstract string Description { get; }
    91     public abstract Uri WebLink { get; }
    92     public abstract string ReferencePublication { get; }
     25  public abstract class TimeSeriesPrognosisInstanceProvider : DataAnalysisInstanceProvider<ITimeSeriesPrognosisProblemData, TimeSeriesPrognosisImportType> {
    9326  }
    9427}
Note: See TracChangeset for help on using the changeset viewer.