Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
10/05/11 21:55:55 (13 years ago)
Author:
abeham
Message:

#1614

  • updated branch from trunk
Location:
branches/GeneralizedQAP
Files:
3 deleted
13 edited
15 copied

Legend:

Unmodified
Added
Removed
  • branches/GeneralizedQAP

  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis

  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4

    • Property svn:ignore
      •  

        old new  
        44obj
        55*.vs10x
         6Plugin.cs
  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs

    r5847 r6878  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
     25using System.Collections.ObjectModel;
    2426using System.Linq;
    2527using HeuristicLab.Common;
     
    3638    private Dataset(Dataset original, Cloner cloner)
    3739      : base(original, cloner) {
    38       variableNameToVariableIndexMapping = original.variableNameToVariableIndexMapping;
    39       data = original.data;
    40     }
    41     public override IDeepCloneable Clone(Cloner cloner) {
    42       return new Dataset(this, cloner);
    43     }
     40      variableValues = new Dictionary<string, IList>(original.variableValues);
     41      variableNames = new List<string>(original.variableNames);
     42      rows = original.rows;
     43    }
     44    public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); }
    4445
    4546    public Dataset()
     
    4748      Name = "-";
    4849      VariableNames = Enumerable.Empty<string>();
    49       data = new double[0, 0];
    50     }
    51 
    52     public Dataset(IEnumerable<string> variableNames, double[,] data)
     50      variableValues = new Dictionary<string, IList>();
     51      rows = 0;
     52    }
     53
     54    public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues)
    5355      : base() {
    5456      Name = "-";
    55       if (variableNames.Count() != data.GetLength(1)) {
    56         throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
    57       }
    58       this.data = (double[,])data.Clone();
    59       VariableNames = variableNames;
    60     }
    61 
    62 
    63     private Dictionary<string, int> variableNameToVariableIndexMapping;
    64     private Dictionary<int, string> variableIndexToVariableNameMapping;
     57      if (!variableNames.Any()) {
     58        this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList();
     59      } else if (variableNames.Count() != variableValues.Count()) {
     60        throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
     61      } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) {
     62        throw new ArgumentException("The number of values must be equal for every variable");
     63      } else if (variableNames.Distinct().Count() != variableNames.Count()) {
     64        var duplicateVariableNames =
     65          variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
     66        string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
     67        foreach (var duplicateVariableName in duplicateVariableNames)
     68          message += duplicateVariableName + Environment.NewLine;
     69        throw new ArgumentException(message);
     70      }
     71
     72      rows = variableValues.First().Count;
     73      this.variableNames = new List<string>(variableNames);
     74      this.variableValues = new Dictionary<string, IList>();
     75      for (int i = 0; i < this.variableNames.Count; i++) {
     76        var values = variableValues.ElementAt(i);
     77        IList clonedValues = null;
     78        if (values is List<double>)
     79          clonedValues = new List<double>(values.Cast<double>());
     80        else if (values is List<string>)
     81          clonedValues = new List<string>(values.Cast<string>());
     82        else if (values is List<DateTime>)
     83          clonedValues = new List<DateTime>(values.Cast<DateTime>());
     84        else {
     85          this.variableNames = new List<string>();
     86          this.variableValues = new Dictionary<string, IList>();
     87          throw new ArgumentException("The variable values must be of type List<double>, List<string> or List<DateTime>");
     88        }
     89        this.variableValues.Add(this.variableNames[i], clonedValues);
     90      }
     91    }
     92
     93    public Dataset(IEnumerable<string> variableNames, double[,] variableValues) {
     94      Name = "-";
     95      if (variableNames.Count() != variableValues.GetLength(1)) {
     96        throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
     97      }
     98      if (variableNames.Distinct().Count() != variableNames.Count()) {
     99        var duplicateVariableNames = variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
     100        string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
     101        foreach (var duplicateVariableName in duplicateVariableNames)
     102          message += duplicateVariableName + Environment.NewLine;
     103        throw new ArgumentException(message);
     104      }
     105
     106      rows = variableValues.GetLength(0);
     107      this.variableNames = new List<string>(variableNames);
     108
     109      this.variableValues = new Dictionary<string, IList>();
     110      for (int col = 0; col < variableValues.GetLength(1); col++) {
     111        string columName = this.variableNames[col];
     112        var values = new List<double>();
     113        for (int row = 0; row < variableValues.GetLength(0); row++) {
     114          values.Add(variableValues[row, col]);
     115        }
     116        this.variableValues.Add(columName, values);
     117      }
     118    }
     119
     120    #region Backwards compatible code, remove with 3.5
     121    private double[,] storableData;
     122    //name alias used to suppport backwards compatibility
     123    [Storable(Name = "data", AllowOneWay = true)]
     124    private double[,] StorableData { set { storableData = value; } }
     125
     126    [StorableHook(HookType.AfterDeserialization)]
     127    private void AfterDeserialization() {
     128      if (variableValues == null) {
     129        rows = storableData.GetLength(0);
     130        variableValues = new Dictionary<string, IList>();
     131        for (int col = 0; col < storableData.GetLength(1); col++) {
     132          string columName = variableNames[col];
     133          var values = new List<double>();
     134          for (int row = 0; row < storableData.GetLength(0); row++) {
     135            values.Add(storableData[row, col]);
     136          }
     137          variableValues.Add(columName, values);
     138        }
     139        storableData = null;
     140      }
     141    }
     142    #endregion
     143
     144    [Storable(Name = "VariableValues")]
     145    private Dictionary<string, IList> variableValues;
     146
     147    private List<string> variableNames;
    65148    [Storable]
    66149    public IEnumerable<string> VariableNames {
    67       get {
    68         // convert KeyCollection to an array first for persistence
    69         return variableNameToVariableIndexMapping.Keys.ToArray();
    70       }
     150      get { return variableNames; }
    71151      private set {
    72         if (variableNameToVariableIndexMapping != null) throw new InvalidOperationException("VariableNames can only be set once.");
    73         this.variableNameToVariableIndexMapping = new Dictionary<string, int>();
    74         this.variableIndexToVariableNameMapping = new Dictionary<int, string>();
    75         int i = 0;
    76         foreach (string variableName in value) {
    77           this.variableNameToVariableIndexMapping.Add(variableName, i);
    78           this.variableIndexToVariableNameMapping.Add(i, variableName);
    79           i++;
    80         }
    81       }
    82     }
    83 
     152        if (variableNames != null) throw new InvalidOperationException();
     153        variableNames = new List<string>(value);
     154      }
     155    }
     156
     157    public IEnumerable<string> DoubleVariables {
     158      get { return variableValues.Where(p => p.Value is List<double>).Select(p => p.Key); }
     159    }
     160
     161    public IEnumerable<double> GetDoubleValues(string variableName) {
     162      IList list;
     163      if (!variableValues.TryGetValue(variableName, out list))
     164        throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
     165      List<double> values = list as List<double>;
     166      if (values == null) throw new ArgumentException("The variable " + variableName + " is not a double variable.");
     167
     168      //mkommend yield return used to enable lazy evaluation
     169      foreach (double value in values)
     170        yield return value;
     171    }
     172    public ReadOnlyCollection<double> GetReadOnlyDoubleValues(string variableName) {
     173      IList list;
     174      if (!variableValues.TryGetValue(variableName, out list))
     175        throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
     176      List<double> values = list as List<double>;
     177      if (values == null) throw new ArgumentException("The variable " + variableName + " is not a double variable.");
     178      return values.AsReadOnly();
     179    }
     180    public double GetDoubleValue(string variableName, int row) {
     181      IList list;
     182      if (!variableValues.TryGetValue(variableName, out list))
     183        throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
     184      List<double> values = list as List<double>;
     185      if (values == null) throw new ArgumentException("The variable " + variableName + " is not a double variable.");
     186      return values[row];
     187    }
     188    public IEnumerable<double> GetDoubleValues(string variableName, IEnumerable<int> rows) {
     189      IList list;
     190      if (!variableValues.TryGetValue(variableName, out list))
     191        throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
     192      List<double> values = list as List<double>;
     193      if (values == null) throw new ArgumentException("The varialbe " + variableName + " is not a double variable.");
     194
     195      foreach (int index in rows)
     196        yield return values[index];
     197    }
     198
     199    #region IStringConvertibleMatrix Members
    84200    [Storable]
    85     private double[,] data;
    86     private double[,] Data {
    87       get { return data; }
    88     }
    89 
    90     // elementwise access
    91     public double this[int rowIndex, int columnIndex] {
    92       get { return data[rowIndex, columnIndex]; }
    93     }
    94     public double this[string variableName, int rowIndex] {
    95       get {
    96         int columnIndex = GetVariableIndex(variableName);
    97         return data[rowIndex, columnIndex];
    98       }
    99     }
    100 
    101     public double[] GetVariableValues(int variableIndex) {
    102       return GetVariableValues(variableIndex, 0, Rows);
    103     }
    104     public double[] GetVariableValues(string variableName) {
    105       return GetVariableValues(GetVariableIndex(variableName), 0, Rows);
    106     }
    107     public double[] GetVariableValues(int variableIndex, int start, int end) {
    108       return GetEnumeratedVariableValues(variableIndex, start, end).ToArray();
    109     }
    110     public double[] GetVariableValues(string variableName, int start, int end) {
    111       return GetVariableValues(GetVariableIndex(variableName), start, end);
    112     }
    113 
    114     public IEnumerable<double> GetEnumeratedVariableValues(int variableIndex) {
    115       return GetEnumeratedVariableValues(variableIndex, 0, Rows);
    116     }
    117     public IEnumerable<double> GetEnumeratedVariableValues(int variableIndex, int start, int end) {
    118       if (start < 0 || !(start <= end))
    119         throw new ArgumentException("Start must be between 0 and end (" + end + ").");
    120       if (end > Rows || end < start)
    121         throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
    122 
    123       for (int i = start; i < end; i++)
    124         yield return data[i, variableIndex];
    125     }
    126     public IEnumerable<double> GetEnumeratedVariableValues(int variableIndex, IEnumerable<int> rows) {
    127       foreach (int row in rows)
    128         yield return data[row, variableIndex];
    129     }
    130 
    131     public IEnumerable<double> GetEnumeratedVariableValues(string variableName) {
    132       return GetEnumeratedVariableValues(GetVariableIndex(variableName), 0, Rows);
    133     }
    134     public IEnumerable<double> GetEnumeratedVariableValues(string variableName, int start, int end) {
    135       return GetEnumeratedVariableValues(GetVariableIndex(variableName), start, end);
    136     }
    137     public IEnumerable<double> GetEnumeratedVariableValues(string variableName, IEnumerable<int> rows) {
    138       return GetEnumeratedVariableValues(GetVariableIndex(variableName), rows);
    139     }
    140 
    141     public string GetVariableName(int variableIndex) {
    142       try {
    143         return variableIndexToVariableNameMapping[variableIndex];
    144       }
    145       catch (KeyNotFoundException ex) {
    146         throw new ArgumentException("The variable index " + variableIndex + " was not found.", ex);
    147       }
    148     }
    149     public int GetVariableIndex(string variableName) {
    150       try {
    151         return variableNameToVariableIndexMapping[variableName];
    152       }
    153       catch (KeyNotFoundException ex) {
    154         throw new ArgumentException("The variable name " + variableName + " was not found.", ex);
    155       }
    156     }
    157 
    158     #region IStringConvertibleMatrix Members
     201    private int rows;
    159202    public int Rows {
    160       get { return data.GetLength(0); }
     203      get { return rows; }
    161204      set { throw new NotSupportedException(); }
    162205    }
    163206    public int Columns {
    164       get { return data.GetLength(1); }
     207      get { return variableNames.Count; }
    165208      set { throw new NotSupportedException(); }
    166209    }
     
    184227
    185228    public string GetValue(int rowIndex, int columnIndex) {
    186       return data[rowIndex, columnIndex].ToString();
     229      return variableValues[variableNames[columnIndex]][rowIndex].ToString();
    187230    }
    188231    public bool SetValue(string value, int rowIndex, int columnIndex) {
  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj

    r6643 r6878  
    4141    <DebugType>full</DebugType>
    4242    <Optimize>false</Optimize>
    43     <OutputPath>bin\Debug\</OutputPath>
     43    <OutputPath>$(SolutionDir)\bin\</OutputPath>
    4444    <DefineConstants>DEBUG;TRACE</DefineConstants>
    4545    <ErrorReport>prompt</ErrorReport>
     
    5050    <DebugType>pdbonly</DebugType>
    5151    <Optimize>true</Optimize>
    52     <OutputPath>bin\Release\</OutputPath>
     52    <OutputPath>$(SolutionDir)\bin\</OutputPath>
    5353    <DefineConstants>TRACE</DefineConstants>
    5454    <ErrorReport>prompt</ErrorReport>
     
    5858  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
    5959    <DebugSymbols>true</DebugSymbols>
    60     <OutputPath>bin\x64\Debug\</OutputPath>
     60    <OutputPath>$(SolutionDir)\bin\</OutputPath>
    6161    <DefineConstants>DEBUG;TRACE</DefineConstants>
    6262    <DebugType>full</DebugType>
     
    6666  </PropertyGroup>
    6767  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
    68     <OutputPath>bin\x64\Release\</OutputPath>
     68    <OutputPath>$(SolutionDir)\bin\</OutputPath>
    6969    <DefineConstants>TRACE</DefineConstants>
    7070    <Optimize>true</Optimize>
     
    7676  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
    7777    <DebugSymbols>true</DebugSymbols>
    78     <OutputPath>bin\x86\Debug\</OutputPath>
     78    <OutputPath>$(SolutionDir)\bin\</OutputPath>
    7979    <DefineConstants>DEBUG;TRACE</DefineConstants>
    8080    <DebugType>full</DebugType>
     
    8484  </PropertyGroup>
    8585  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
    86     <OutputPath>bin\x86\Release\</OutputPath>
     86    <OutputPath>$(SolutionDir)\bin\</OutputPath>
    8787    <DefineConstants>TRACE</DefineConstants>
    8888    <Optimize>true</Optimize>
     
    127127    </Compile>
    128128    <Compile Include="Implementation\Regression\RegressionEnsembleSolution.cs" />
     129    <Compile Include="Implementation\TimeSeriesPrognosis\TimeSeriesPrognosisProblem.cs" />
     130    <Compile Include="Implementation\TimeSeriesPrognosis\TimeSeriesPrognosisProblemData.cs" />
     131    <Compile Include="Implementation\TimeSeriesPrognosis\TimeSeriesPrognosisSolution.cs" />
     132    <Compile Include="Implementation\TimeSeriesPrognosis\TimeSeriesPrognosisSolutionBase.cs" />
    129133    <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs">
    130134      <SubType>Code</SubType>
     
    139143    <Compile Include="Interfaces\Regression\IRegressionEnsembleSolution.cs" />
    140144    <Compile Include="Implementation\Regression\RegressionSolutionBase.cs" />
     145    <Compile Include="Interfaces\TimeSeriesPrognosis\ITimeSeriesPrognosisModel.cs" />
     146    <Compile Include="Interfaces\TimeSeriesPrognosis\ITimeSeriesPrognosisProblem.cs" />
     147    <Compile Include="Interfaces\TimeSeriesPrognosis\ITimeSeriesPrognosisProblemData.cs" />
     148    <Compile Include="Interfaces\TimeSeriesPrognosis\ITimeSeriesPrognosisSolution.cs" />
     149    <Compile Include="OnlineCalculators\OnlineDirectionalSymmetryCalculator.cs" />
    141150    <Compile Include="OnlineCalculators\OnlineMeanAbsoluteErrorCalculator.cs" />
    142151    <Compile Include="OnlineCalculators\OnlineLinearScalingParameterCalculator.cs" />
     
    174183    <Compile Include="OnlineCalculators\OnlinePearsonsRSquaredCalculator.cs" />
    175184    <Compile Include="Implementation\Regression\RegressionSolution.cs" />
     185    <Compile Include="OnlineCalculators\OnlineTheilsUStatisticCalculator.cs" />
     186    <Compile Include="OnlineCalculators\OnlineWeightedDirectionalSymmetryCalculator.cs" />
     187    <Compile Include="Plugin.cs" />
    176188    <Compile Include="TableFileParser.cs" />
    177189    <Compile Include="Implementation\Classification\ThresholdCalculators\AccuracyMaximizationThresholdCalculator.cs" />
     
    179191    <Compile Include="Implementation\Classification\ThresholdCalculators\ThresholdCalculator.cs" />
    180192    <None Include="HeuristicLab.snk" />
    181     <None Include="HeuristicLabProblemsDataAnalysisPlugin.cs.frame" />
    182     <None Include="Properties\AssemblyInfo.frame" />
    183     <Compile Include="HeuristicLabProblemsDataAnalysisPlugin.cs" />
     193    <None Include="Plugin.cs.frame" />
     194    <None Include="Properties\AssemblyInfo.cs.frame" />
    184195    <Compile Include="Interfaces\IDataAnalysisModel.cs" />
    185196    <Compile Include="Interfaces\IDataAnalysisProblem.cs" />
     
    190201      <Project>{958B43BC-CC5C-4FA2-8628-2B3B01D890B6}</Project>
    191202      <Name>HeuristicLab.Collections-3.3</Name>
     203      <Private>False</Private>
    192204    </ProjectReference>
    193205    <ProjectReference Include="..\..\HeuristicLab.Common.Resources\3.3\HeuristicLab.Common.Resources-3.3.csproj">
    194206      <Project>{0E27A536-1C4A-4624-A65E-DC4F4F23E3E1}</Project>
    195207      <Name>HeuristicLab.Common.Resources-3.3</Name>
     208      <Private>False</Private>
    196209    </ProjectReference>
    197210    <ProjectReference Include="..\..\HeuristicLab.Common\3.3\HeuristicLab.Common-3.3.csproj">
    198211      <Project>{A9AD58B9-3EF9-4CC1-97E5-8D909039FF5C}</Project>
    199212      <Name>HeuristicLab.Common-3.3</Name>
     213      <Private>False</Private>
    200214    </ProjectReference>
    201215    <ProjectReference Include="..\..\HeuristicLab.Core\3.3\HeuristicLab.Core-3.3.csproj">
    202216      <Project>{C36BD924-A541-4A00-AFA8-41701378DDC5}</Project>
    203217      <Name>HeuristicLab.Core-3.3</Name>
     218      <Private>False</Private>
    204219    </ProjectReference>
    205220    <ProjectReference Include="..\..\HeuristicLab.Data\3.3\HeuristicLab.Data-3.3.csproj">
    206221      <Project>{BBAB9DF5-5EF3-4BA8-ADE9-B36E82114937}</Project>
    207222      <Name>HeuristicLab.Data-3.3</Name>
     223      <Private>False</Private>
    208224    </ProjectReference>
    209225    <ProjectReference Include="..\..\HeuristicLab.Optimization\3.3\HeuristicLab.Optimization-3.3.csproj">
    210226      <Project>{14AB8D24-25BC-400C-A846-4627AA945192}</Project>
    211227      <Name>HeuristicLab.Optimization-3.3</Name>
     228      <Private>False</Private>
    212229    </ProjectReference>
    213230    <ProjectReference Include="..\..\HeuristicLab.Parameters\3.3\HeuristicLab.Parameters-3.3.csproj">
    214231      <Project>{56F9106A-079F-4C61-92F6-86A84C2D84B7}</Project>
    215232      <Name>HeuristicLab.Parameters-3.3</Name>
     233      <Private>False</Private>
    216234    </ProjectReference>
    217235    <ProjectReference Include="..\..\HeuristicLab.Persistence\3.3\HeuristicLab.Persistence-3.3.csproj">
    218236      <Project>{102BC7D3-0EF9-439C-8F6D-96FF0FDB8E1B}</Project>
    219237      <Name>HeuristicLab.Persistence-3.3</Name>
     238      <Private>False</Private>
    220239    </ProjectReference>
    221240    <ProjectReference Include="..\..\HeuristicLab.PluginInfrastructure\3.3\HeuristicLab.PluginInfrastructure-3.3.csproj">
    222241      <Project>{94186A6A-5176-4402-AE83-886557B53CCA}</Project>
    223242      <Name>HeuristicLab.PluginInfrastructure-3.3</Name>
     243      <Private>False</Private>
    224244    </ProjectReference>
    225245  </ItemGroup>
     
    257277
    258278call PreBuildEvent.cmd
    259 SubWCRev "%25ProjectDir%25\" "%25ProjectDir%25\HeuristicLabProblemsDataAnalysisPlugin.cs.frame" "%25ProjectDir%25\HeuristicLabProblemsDataAnalysisPlugin.cs"</PreBuildEvent>
     279</PreBuildEvent>
    260280  </PropertyGroup>
    261281</Project>
  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs

    r6685 r6878  
    226226      get {
    227227        if (classValues == null) {
    228           classValues = Dataset.GetEnumeratedVariableValues(TargetVariableParameter.Value.Value).Distinct().ToList();
     228          classValues = Dataset.GetDoubleValues(TargetVariableParameter.Value.Value).Distinct().ToList();
    229229          classValues.Sort();
    230230        }
     
    291291    private static IEnumerable<string> CheckVariablesForPossibleTargetVariables(Dataset dataset) {
    292292      int maxSamples = Math.Min(InspectedRowsToDetermineTargets, dataset.Rows);
    293       var validTargetVariables = (from v in dataset.VariableNames
    294                                   let distinctValues = dataset.GetEnumeratedVariableValues(v)
     293      var validTargetVariables = (from v in dataset.DoubleVariables
     294                                  let distinctValues = dataset.GetDoubleValues(v)
    295295                                    .Take(maxSamples)
    296296                                    .Distinct()
     
    410410      dataset.Name = Path.GetFileName(fileName);
    411411
    412       ClassificationProblemData problemData = new ClassificationProblemData(dataset, dataset.VariableNames.Skip(1), dataset.VariableNames.First());
     412      ClassificationProblemData problemData = new ClassificationProblemData(dataset, dataset.DoubleVariables.Skip(1), dataset.DoubleVariables.First());
    413413      problemData.Name = "Data imported from " + Path.GetFileName(fileName);
    414414      return problemData;
  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionBase.cs

    r6685 r6878  
    6767    protected void CalculateResults() {
    6868      double[] estimatedTrainingClassValues = EstimatedTrainingClassValues.ToArray(); // cache values
    69       double[] originalTrainingClassValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();
     69      double[] originalTrainingClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();
    7070      double[] estimatedTestClassValues = EstimatedTestClassValues.ToArray(); // cache values
    71       double[] originalTestClassValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();
     71      double[] originalTestClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();
    7272
    7373      OnlineCalculatorError errorState;
  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationSolutionBase.cs

    r6606 r6878  
    103103    protected void CalculateRegressionResults() {
    104104      double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values
    105       double[] originalTrainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();
     105      double[] originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();
    106106      double[] estimatedTestValues = EstimatedTestValues.ToArray(); // cache values
    107       double[] originalTestValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();
     107      double[] originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();
    108108
    109109      OnlineCalculatorError errorState;
     
    132132      double[] classValues;
    133133      double[] thresholds;
    134       var targetClassValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
     134      var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
    135135      AccuracyMaximizationThresholdCalculator.CalculateThresholds(ProblemData, EstimatedTrainingValues, targetClassValues, out classValues, out thresholds);
    136136
     
    141141      double[] classValues;
    142142      double[] thresholds;
    143       var targetClassValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
     143      var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
    144144      NormalDistributionCutPointsThresholdCalculator.CalculateThresholds(ProblemData, EstimatedTrainingValues, targetClassValues, out classValues, out thresholds);
    145145
  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringProblemData.cs

    r5809 r6878  
    2020#endregion
    2121
    22 using System;
    2322using System.Collections.Generic;
    2423using System.IO;
    25 using System.Linq;
    2624using HeuristicLab.Common;
    2725using HeuristicLab.Core;
    28 using HeuristicLab.Data;
    29 using HeuristicLab.Parameters;
    3026using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3127
     
    10399      dataset.Name = Path.GetFileName(fileName);
    104100
    105       ClusteringProblemData problemData = new ClusteringProblemData(dataset, dataset.VariableNames);
     101      ClusteringProblemData problemData = new ClusteringProblemData(dataset, dataset.DoubleVariables);
    106102      problemData.Name = "Data imported from " + Path.GetFileName(fileName);
    107103      return problemData;
  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs

    r6685 r6878  
    116116      if (allowedInputVariables == null) throw new ArgumentNullException("The allowedInputVariables must not be null.");
    117117
    118       if (allowedInputVariables.Except(dataset.VariableNames).Any())
    119         throw new ArgumentException("All allowed input variables must be present in the dataset.");
     118      if (allowedInputVariables.Except(dataset.DoubleVariables).Any())
     119        throw new ArgumentException("All allowed input variables must be present in the dataset and of type double.");
    120120
    121       var inputVariables = new CheckedItemList<StringValue>(dataset.VariableNames.Select(x => new StringValue(x)));
     121      var inputVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Select(x => new StringValue(x)));
    122122      foreach (StringValue x in inputVariables)
    123123        inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value));
  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs

    r6685 r6878  
    144144      dataset.Name = Path.GetFileName(fileName);
    145145
    146       RegressionProblemData problemData = new RegressionProblemData(dataset, dataset.VariableNames.Skip(1), dataset.VariableNames.First());
     146      RegressionProblemData problemData = new RegressionProblemData(dataset, dataset.DoubleVariables.Skip(1), dataset.DoubleVariables.First());
    147147      problemData.Name = "Data imported from " + Path.GetFileName(fileName);
    148148      return problemData;
  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionBase.cs

    r6685 r6878  
    127127        OnlineCalculatorError errorState;
    128128        Add(new Result(TrainingMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the training partition", new DoubleValue()));
    129         double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes), out errorState);
     129        double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes), out errorState);
    130130        TrainingMeanAbsoluteError = errorState == OnlineCalculatorError.None ? trainingMAE : double.NaN;
    131131      }
     
    134134        OnlineCalculatorError errorState;
    135135        Add(new Result(TestMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the test partition", new DoubleValue()));
    136         double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TestIndizes), out errorState);
     136        double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes), out errorState);
    137137        TestMeanAbsoluteError = errorState == OnlineCalculatorError.None ? testMAE : double.NaN;
    138138      }
     
    142142    protected void CalculateResults() {
    143143      double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values
    144       double[] originalTrainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();
     144      double[] originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();
    145145      double[] estimatedTestValues = EstimatedTestValues.ToArray(); // cache values
    146       double[] originalTestValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();
     146      double[] originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();
    147147
    148148      OnlineCalculatorError errorState;
  • branches/GeneralizedQAP/HeuristicLab.Problems.DataAnalysis/3.4/TableFileParser.cs

    r5809 r6878  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
    2425using System.Globalization;
     
    3334    private readonly char[] POSSIBLE_SEPARATORS = new char[] { ',', ';', '\t' };
    3435    private Tokenizer tokenizer;
    35     private List<List<double>> rowValues;
     36    private List<List<object>> rowValues;
    3637
    3738    private int rows;
     
    4748    }
    4849
    49     private double[,] values;
    50     public double[,] Values {
     50    private List<IList> values;
     51    public List<IList> Values {
    5152      get {
    5253        return values;
     
    6970
    7071    public TableFileParser() {
    71       rowValues = new List<List<double>>();
     72      rowValues = new List<List<object>>();
    7273      variableNames = new List<string>();
    7374    }
     
    7576    public void Parse(string fileName) {
    7677      NumberFormatInfo numberFormat;
     78      DateTimeFormatInfo dateTimeFormatInfo;
    7779      char separator;
    78       DetermineFileFormat(fileName, out numberFormat, out separator);
     80      DetermineFileFormat(fileName, out numberFormat, out dateTimeFormatInfo, out separator);
    7981      using (StreamReader reader = new StreamReader(fileName)) {
    80         tokenizer = new Tokenizer(reader, numberFormat, separator);
     82        tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator);
    8183        // parse the file
    8284        Parse();
     
    8688      rows = rowValues.Count;
    8789      columns = rowValues[0].Count;
    88       values = new double[rows, columns];
    89 
    90       int rowIndex = 0;
    91       int columnIndex = 0;
    92       foreach (List<double> row in rowValues) {
    93         columnIndex = 0;
    94         foreach (double element in row) {
    95           values[rowIndex, columnIndex++] = element;
    96         }
    97         rowIndex++;
    98       }
    99     }
    100 
    101     private void DetermineFileFormat(string fileName, out NumberFormatInfo numberFormat, out char separator) {
     90      values = new List<IList>();
     91
     92      //create columns
     93      for (int col = 0; col < columns; col++) {
     94        var types = rowValues.Select(r => r[col]).Where(v => v != null && v as string != string.Empty).Take(10).Select(v => v.GetType());
     95        if (!types.Any()) {
     96          values.Add(new List<string>());
     97          continue;
     98        }
     99
     100        var columnType = types.GroupBy(v => v).OrderBy(v => v.Count()).Last().Key;
     101        if (columnType == typeof(double)) values.Add(new List<double>());
     102        else if (columnType == typeof(DateTime)) values.Add(new List<DateTime>());
     103        else if (columnType == typeof(string)) values.Add(new List<string>());
     104        else throw new InvalidOperationException();
     105      }
     106
     107
     108
     109      //fill with values
     110      foreach (List<object> row in rowValues) {
     111        int columnIndex = 0;
     112        foreach (object element in row) {
     113          if (values[columnIndex] is List<double> && !(element is double))
     114            values[columnIndex].Add(double.NaN);
     115          else if (values[columnIndex] is List<DateTime> && !(element is DateTime))
     116            values[columnIndex].Add(DateTime.MinValue);
     117          else if (values[columnIndex] is List<string> && !(element is string))
     118            values[columnIndex].Add(string.Empty);
     119          else
     120            values[columnIndex].Add(element);
     121          columnIndex++;
     122        }
     123      }
     124    }
     125
     126    private void DetermineFileFormat(string fileName, out NumberFormatInfo numberFormat, out DateTimeFormatInfo dateTimeFormatInfo, out char separator) {
    102127      using (StreamReader reader = new StreamReader(fileName)) {
    103128        // skip first line
     
    123148        if (OccurrencesOf(charCounts, '.') > 10) {
    124149          numberFormat = NumberFormatInfo.InvariantInfo;
     150          dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo;
    125151          separator = POSSIBLE_SEPARATORS
    126152            .Where(c => OccurrencesOf(charCounts, c) > 10)
     
    139165            // English format (only integer values) with ',' as separator
    140166            numberFormat = NumberFormatInfo.InvariantInfo;
     167            dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo;
    141168            separator = ',';
    142169          } else {
     
    144171            // German format (real values)
    145172            numberFormat = NumberFormatInfo.GetInstance(new CultureInfo("de-DE"));
     173            dateTimeFormatInfo = DateTimeFormatInfo.GetInstance(new CultureInfo("de-DE"));
    146174            separator = POSSIBLE_SEPARATORS
    147175              .Except(disallowedSeparators)
     
    154182          // no points and no commas => English format
    155183          numberFormat = NumberFormatInfo.InvariantInfo;
     184          dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo;
    156185          separator = POSSIBLE_SEPARATORS
    157186            .Where(c => OccurrencesOf(charCounts, c) > 10)
     
    169198    #region tokenizer
    170199    internal enum TokenTypeEnum {
    171       NewLine, Separator, String, Double
     200      NewLine, Separator, String, Double, DateTime
    172201    }
    173202
     
    176205      public string stringValue;
    177206      public double doubleValue;
     207      public DateTime dateTimeValue;
    178208
    179209      public Token(TokenTypeEnum type, string value) {
    180210        this.type = type;
    181211        stringValue = value;
     212        dateTimeValue = DateTime.MinValue;
    182213        doubleValue = 0.0;
    183214      }
     
    193224      private List<Token> tokens;
    194225      private NumberFormatInfo numberFormatInfo;
     226      private DateTimeFormatInfo dateTimeFormatInfo;
    195227      private char separator;
    196228      private const string INTERNAL_SEPARATOR = "#";
     
    218250      }
    219251
    220       public Tokenizer(StreamReader reader, NumberFormatInfo numberFormatInfo, char separator) {
     252      public Tokenizer(StreamReader reader, NumberFormatInfo numberFormatInfo, DateTimeFormatInfo dateTimeFormatInfo, char separator) {
    221253        this.reader = reader;
    222254        this.numberFormatInfo = numberFormatInfo;
     255        this.dateTimeFormatInfo = dateTimeFormatInfo;
    223256        this.separator = separator;
    224257        separatorToken = new Token(TokenTypeEnum.Separator, INTERNAL_SEPARATOR);
     
    264297          token.type = TokenTypeEnum.Double;
    265298          return token;
    266         }
    267 
    268         // couldn't parse the token as an int or float number so return a string token
     299        } else if (DateTime.TryParse(strToken, dateTimeFormatInfo, DateTimeStyles.None, out token.dateTimeValue)) {
     300          token.type = TokenTypeEnum.DateTime;
     301          return token;
     302        }
     303
     304        // couldn't parse the token as an int or float number  or datetime value so return a string token
    269305        return token;
    270306      }
     
    299335    private void ParseValues() {
    300336      while (tokenizer.HasNext()) {
    301         List<double> row = new List<double>();
    302         row.Add(NextValue(tokenizer));
    303         while (tokenizer.HasNext() && tokenizer.Peek() == tokenizer.SeparatorToken) {
    304           Expect(tokenizer.SeparatorToken);
    305           row.Add(NextValue(tokenizer));
    306         }
    307         Expect(tokenizer.NewlineToken);
    308         // all rows have to have the same number of values           
    309         // the first row defines how many samples are needed
    310         if (rowValues.Count > 0 && rowValues[0].Count != row.Count) {
    311           Error("The first row of the dataset has " + rowValues[0].Count + " columns." +
    312             "\nLine " + tokenizer.CurrentLineNumber + " has " + row.Count + " columns.", "", tokenizer.CurrentLineNumber);
    313         }
    314         // add the current row to the collection of rows and start a new row
    315         rowValues.Add(row);
    316         row = new List<double>();
    317       }
    318     }
    319 
    320     private double NextValue(Tokenizer tokenizer) {
    321       if (tokenizer.Peek() == tokenizer.SeparatorToken || tokenizer.Peek() == tokenizer.NewlineToken) return double.NaN;
     337        if (tokenizer.Peek() == tokenizer.NewlineToken) {
     338          tokenizer.Next();
     339        } else {
     340          List<object> row = new List<object>();
     341          object value = NextValue(tokenizer);
     342          row.Add(value);
     343          while (tokenizer.HasNext() && tokenizer.Peek() == tokenizer.SeparatorToken) {
     344            Expect(tokenizer.SeparatorToken);
     345            row.Add(NextValue(tokenizer));
     346          }
     347          Expect(tokenizer.NewlineToken);
     348          // all rows have to have the same number of values           
     349          // the first row defines how many samples are needed
     350          if (rowValues.Count > 0 && rowValues[0].Count != row.Count) {
     351            Error("The first row of the dataset has " + rowValues[0].Count + " columns." +
     352                  "\nLine " + tokenizer.CurrentLineNumber + " has " + row.Count + " columns.", "",
     353                  tokenizer.CurrentLineNumber);
     354          }
     355          rowValues.Add(row);
     356        }
     357      }
     358    }
     359
     360    private object NextValue(Tokenizer tokenizer) {
     361      if (tokenizer.Peek() == tokenizer.SeparatorToken || tokenizer.Peek() == tokenizer.NewlineToken) return string.Empty;
    322362      Token current = tokenizer.Next();
    323       if (current.type == TokenTypeEnum.Separator || current.type == TokenTypeEnum.String) {
     363      if (current.type == TokenTypeEnum.Separator) {
    324364        return double.NaN;
     365      } else if (current.type == TokenTypeEnum.String) {
     366        return current.stringValue;
    325367      } else if (current.type == TokenTypeEnum.Double) {
    326         // just take the value
    327368        return current.doubleValue;
     369      } else if (current.type == TokenTypeEnum.DateTime) {
     370        return current.dateTimeValue;
    328371      }
    329372      // found an unexpected token => throw error
     
    334377
    335378    private void ParseVariableNames() {
    336       // if the first line doesn't start with a double value then we assume that the
    337       // first line contains variable names
    338       if (tokenizer.HasNext() && tokenizer.Peek().type != TokenTypeEnum.Double) {
    339 
    340         List<Token> tokens = new List<Token>();
    341         Token valueToken;
     379      //if first token is double no variables names are given
     380      if (tokenizer.Peek().type == TokenTypeEnum.Double) return;
     381
     382      // the first line must contain variable names
     383      List<Token> tokens = new List<Token>();
     384      Token valueToken;
     385      valueToken = tokenizer.Next();
     386      tokens.Add(valueToken);
     387      while (tokenizer.HasNext() && tokenizer.Peek() == tokenizer.SeparatorToken) {
     388        Expect(tokenizer.SeparatorToken);
    342389        valueToken = tokenizer.Next();
    343         tokens.Add(valueToken);
    344         while (tokenizer.HasNext() && tokenizer.Peek() == tokenizer.SeparatorToken) {
    345           Expect(tokenizer.SeparatorToken);
    346           valueToken = tokenizer.Next();
    347           if (valueToken != tokenizer.NewlineToken) {
    348             tokens.Add(valueToken);
    349           }
    350         }
    351390        if (valueToken != tokenizer.NewlineToken) {
    352           Expect(tokenizer.NewlineToken);
    353         }
    354         variableNames = tokens.Select(x => x.stringValue.Trim()).ToList();
    355       }
     391          tokens.Add(valueToken);
     392        }
     393      }
     394      if (valueToken != tokenizer.NewlineToken) {
     395        Expect(tokenizer.NewlineToken);
     396      }
     397      variableNames = tokens.Select(x => x.stringValue.Trim()).ToList();
    356398    }
    357399
Note: See TracChangeset for help on using the changeset viewer.