Changeset 14843


Ignore:
Timestamp:
04/11/17 15:55:44 (6 months ago)
Author:
gkronber
Message:

#2697: applied r14390, r14391, r14393, r14394, r14396 again (resolving conflicts)

Location:
trunk/sources
Files:
8 added
5 deleted
38 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs

    r14400 r14843  
    165165      try {
    166166        CalculateModel(ds, rows, scaleInputs);
    167       }
    168       catch (alglib.alglibexception ae) {
     167      } catch (alglib.alglibexception ae) {
    169168        // wrap exception so that calling code doesn't have to know about alglib implementation
    170169        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
     
    260259    private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
    261260      if (scaling != null) {
    262         return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);
     261        // TODO: completely remove Scaling class
     262        List<ITransformation<double>> transformations = new List<ITransformation<double>>();
     263
     264        foreach (var varName in allowedInputs) {
     265          double min;
     266          double max;
     267          scaling.GetScalingParameters(varName, out min, out max);
     268          var add = -min / (max - min);
     269          var mult = 1.0 / (max - min);
     270          transformations.Add(new LinearTransformation(allowedInputs) { Addend = add, Multiplier = mult });
     271        }
     272        return ds.ToArray(allowedInputs, transformations, rows);
    263273      } else {
    264         return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);
     274        return ds.ToArray(allowedInputs, rows);
    265275      }
    266276    }
     
    334344        return Enumerable.Range(0, newN)
    335345          .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));
    336       }
    337       catch (alglib.alglibexception ae) {
     346      } catch (alglib.alglibexception ae) {
    338347        // wrap exception so that calling code doesn't have to know about alglib implementation
    339348        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
     
    381390        }
    382391        return kss;
    383       }
    384       catch (alglib.alglibexception ae) {
     392      } catch (alglib.alglibexception ae) {
    385393        // wrap exception so that calling code doesn't have to know about alglib implementation
    386394        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r14826 r14843  
    246246      <SubType>Code</SubType>
    247247    </Compile>
    248     <Compile Include="Linear\AlglibUtil.cs" />
    249     <Compile Include="Linear\Scaling.cs" />
    250248    <Compile Include="Linear\LinearDiscriminantAnalysis.cs" />
    251249    <Compile Include="Linear\LinearRegression.cs">
     
    255253    <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" />
    256254    <Compile Include="Linear\MultinomialLogitModel.cs" />
     255    <Compile Include="Linear\Scaling.cs" />
    257256    <Compile Include="MctsSymbolicRegression\Automaton.cs" />
    258257    <Compile Include="MctsSymbolicRegression\CodeGenerator.cs" />
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs

    r14826 r14843  
    7373      var doubleVariableNames = allowedInputVariables.Where(dataset.VariableHasType<double>).ToArray();
    7474      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>).ToArray();
    75       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariableNames.Concat(new string[] { targetVariable }), rows);
     75      double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows);
    7676
    77       var factorVariables = AlglibUtil.GetFactorVariableValues(dataset, factorVariableNames, rows);
    78       double[,] factorMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows);
     77      var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
     78      var factorMatrix = dataset.ToArray(factorVariables, rows);
    7979
    8080      inputMatrix = factorMatrix.HorzCat(inputMatrix);
     
    9494      if (info < 1) throw new ArgumentException("Error in calculation of linear discriminant analysis solution");
    9595
    96       ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
    97       ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
    98       tree.Root.AddSubtree(startNode);
    99       ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
    100       startNode.AddSubtree(addition);
    101 
    102       int col = 0;
    103       foreach (var kvp in factorVariables) {
    104         var varName = kvp.Key;
    105         foreach (var cat in kvp.Value) {
    106           BinaryFactorVariableTreeNode vNode =
    107             (BinaryFactorVariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.BinaryFactorVariable().CreateTreeNode();
    108           vNode.VariableName = varName;
    109           vNode.VariableValue = cat;
    110           vNode.Weight = w[col];
    111           addition.AddSubtree(vNode);
    112           col++;
    113         }
    114       }
    115       foreach (string column in doubleVariableNames) {
    116         VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
    117         vNode.VariableName = column;
    118         vNode.Weight = w[col];
    119         addition.AddSubtree(vNode);
    120         col++;
    121       }
     96      var nFactorCoeff = factorMatrix.GetLength(1);
     97      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, w.Take(nFactorCoeff).ToArray(),
     98        doubleVariableNames, w.Skip(nFactorCoeff).Take(doubleVariableNames.Length).ToArray());
    12299
    123100      var model = CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter(), problemData, rows);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r14826 r14843  
    7676      var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
    7777      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
    78       var factorVariables = AlglibUtil.GetFactorVariableValues(dataset, factorVariableNames, rows);
    79       double[,] binaryMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows);
    80       double[,] doubleVarMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariables.Concat(new string[] { targetVariable }), rows);
     78      var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
     79      double[,] binaryMatrix = dataset.ToArray(factorVariables, rows);
     80      double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
    8181      var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
    8282
     
    9898      alglib.lrunpack(lm, out coefficients, out nFeatures);
    9999
    100       ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
    101       ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
    102       tree.Root.AddSubtree(startNode);
    103       ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
    104       startNode.AddSubtree(addition);
    105 
    106       int col = 0;
    107       foreach (var kvp in factorVariables) {
    108         var varName = kvp.Key;
    109         foreach (var cat in kvp.Value) {
    110           BinaryFactorVariableTreeNode vNode =
    111             (BinaryFactorVariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.BinaryFactorVariable().CreateTreeNode();
    112           vNode.VariableName = varName;
    113           vNode.VariableValue = cat;
    114           vNode.Weight = coefficients[col];
    115           addition.AddSubtree(vNode);
    116           col++;
    117         }
    118       }
    119       foreach (string column in doubleVariables) {
    120         VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
    121         vNode.VariableName = column;
    122         vNode.Weight = coefficients[col];
    123         addition.AddSubtree(vNode);
    124         col++;
    125       }
    126 
    127       ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode();
    128       cNode.Value = coefficients[coefficients.Length - 1];
    129       addition.AddSubtree(cNode);
    130 
     100      int nFactorCoeff = binaryMatrix.GetLength(1);
     101      int nVarCoeff = doubleVariables.Count();
     102      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
     103        doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
     104        @const: coefficients[nFeatures]);
     105     
    131106      SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());
    132107      solution.Model.Name = "Linear Regression Model";
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs

    r14826 r14843  
    7272      var factorVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<string>);
    7373      IEnumerable<int> rows = problemData.TrainingIndices;
    74       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariableNames.Concat(new string[] { targetVariable }), rows);
     74      double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows);
    7575
    76       var factorVariableValues = AlglibUtil.GetFactorVariableValues(dataset, factorVariableNames, rows);
    77       var factorMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariableValues, rows);
     76      var factorVariableValues = dataset.GetFactorVariableValues(factorVariableNames, rows);
     77      var factorMatrix = dataset.ToArray(factorVariableValues, rows);
    7878      inputMatrix = factorMatrix.HorzCat(inputMatrix);
    7979
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs

    r14826 r14843  
    9797    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    9898
    99       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    100       double[,] factorData = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows);
     99      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
     100      double[,] factorData = dataset.ToArray(factorVariables, rows);
    101101
    102102      inputData = factorData.HorzCat(inputData);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs

    r14400 r14843  
    2929
    3030namespace HeuristicLab.Algorithms.DataAnalysis {
     31  [Obsolete("Use transformation classes in Problems.DataAnalysis instead")]
    3132  [StorableClass]
    3233  [Item(Name = "Scaling", Description = "Contains information about scaling of variables for data-analysis algorithms.")]
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/LdaInitializer.cs

    r14400 r14843  
    4444      var attributes = data.AllowedInputVariables.Count();
    4545
    46       var ldaDs = AlglibUtil.PrepareInputMatrix(data.Dataset,
    47                                                 data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()),
    48                                                 data.TrainingIndices);
     46      var ldaDs = data.Dataset.ToArray(
     47                                       data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()),
     48                                       data.TrainingIndices);
    4949
    5050      // map class values to sequential natural numbers (required by alglib)
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/PcaInitializer.cs

    r14400 r14843  
    4444      var attributes = data.AllowedInputVariables.Count();
    4545
    46       var pcaDs = AlglibUtil.PrepareInputMatrix(data.Dataset, data.AllowedInputVariables, data.TrainingIndices);
     46      var pcaDs = data.Dataset.ToArray(data.AllowedInputVariables, data.TrainingIndices);
    4747
    4848      int info;
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaGradientCalculator.cs

    r14400 r14843  
    9999      }
    100100
    101       var data = AlglibUtil.PrepareInputMatrix(problemData.Dataset, problemData.AllowedInputVariables,
    102                                                problemData.TrainingIndices);
     101      var data = problemData.Dataset.ToArray(problemData.AllowedInputVariables,
     102                                             problemData.TrainingIndices);
    103103      var classes = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
    104104
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs

    r14400 r14843  
    8686
    8787    public double[,] Reduce(IDataset dataset, IEnumerable<int> rows) {
    88       var data = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     88      var data = dataset.ToArray(allowedInputVariables, rows);
    8989
    9090      var targets = dataset.GetDoubleValues(TargetVariable, rows).ToArray();
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r14826 r14843  
    119119      if (IsCompatibilityLoaded) {
    120120        // no scaling
    121         inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,
     121        inputMatrix = dataset.ToArray(
    122122          this.allowedInputVariables.Concat(new string[] { targetVariable }),
    123123          rows);
     
    167167
    168168    private static double[,] CreateScaledData(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, double[] offsets, double[] factors) {
    169       var x = new double[rows.Count(), variables.Count()];
    170       var colIdx = 0;
    171       foreach (var variableName in variables) {
    172         var rowIdx = 0;
    173         foreach (var val in dataset.GetDoubleValues(variableName, rows)) {
    174           x[rowIdx, colIdx] = (val + offsets[colIdx]) * factors[colIdx];
    175           rowIdx++;
    176         }
    177         colIdx++;
    178       }
    179       return x;
     169      var transforms =
     170        variables.Select(
     171          (_, colIdx) =>
     172            new LinearTransformation(variables) { Addend = offsets[colIdx] * factors[colIdx], Multiplier = factors[colIdx] });
     173      return dataset.ToArray(variables, transforms, rows);
    180174    }
    181175
     
    187181      double[,] inputData;
    188182      if (IsCompatibilityLoaded) {
    189         inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     183        inputData = dataset.ToArray(allowedInputVariables, rows);
    190184      } else {
    191185        inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
     
    223217      double[,] inputData;
    224218      if (IsCompatibilityLoaded) {
    225         inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     219        inputData = dataset.ToArray(allowedInputVariables, rows);
    226220      } else {
    227221        inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs

    r14523 r14843  
    184184      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    185185      IEnumerable<int> rows = problemData.TrainingIndices;
    186       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     186      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    187187      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    188188        throw new NotSupportedException("Neural network classification does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs

    r14523 r14843  
    125125    public NeuralNetworkEnsembleClassification()
    126126      : base() {
    127       var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 
    128         (IntValue)new IntValue(0).AsReadOnly(), 
    129         (IntValue)new IntValue(1).AsReadOnly(), 
     127      var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
     128        (IntValue)new IntValue(0).AsReadOnly(),
     129        (IntValue)new IntValue(1).AsReadOnly(),
    130130        (IntValue)new IntValue(2).AsReadOnly() });
    131131      var selectedHiddenLayerValue = (from v in validHiddenLayerValues
     
    170170      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    171171      IEnumerable<int> rows = problemData.TrainingIndices;
    172       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     172      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    173173      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    174174        throw new NotSupportedException("Neural network ensemble classification does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs

    r14400 r14843  
    9191
    9292    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    93       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     93      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
    9494
    9595      int n = inputData.GetLength(0);
     
    108108
    109109    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    110       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     110      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
    111111
    112112      int n = inputData.GetLength(0);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs

    r14523 r14843  
    169169      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    170170      IEnumerable<int> rows = problemData.TrainingIndices;
    171       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     171      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    172172      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    173173        throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs

    r14400 r14843  
    9595
    9696    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    97       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     97      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
    9898
    9999      int n = inputData.GetLength(0);
     
    112112
    113113    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    114       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     114      double[,] inputData = dataset.ToArray( allowedInputVariables, rows);
    115115
    116116      int n = inputData.GetLength(0);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs

    r14523 r14843  
    185185      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    186186      IEnumerable<int> rows = problemData.TrainingIndices;
    187       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     187      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    188188      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    189189        throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs

    r14400 r14843  
    139139
    140140    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    141       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
     141      double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
    142142      AssertInputMatrix(inputData);
    143143
     
    157157
    158158    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
    159       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
     159      double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
    160160      AssertInputMatrix(inputData);
    161161
     
    175175
    176176    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    177       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
     177      double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
    178178      AssertInputMatrix(inputData);
    179179
     
    294294      out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError) {
    295295      var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
    296       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices);
     296      double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);
    297297
    298298      alglib.dfreport rep;
     
    316316
    317317      var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
    318       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices);
     318      double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);
    319319
    320320      var classValues = problemData.ClassValues.ToArray();
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs

    r14523 r14843  
    115115      alglib.lrunpack(lm, out coefficients, out nFeatures);
    116116
    117 
    118       ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
    119       ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
    120       tree.Root.AddSubtree(startNode);
    121       ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
    122       startNode.AddSubtree(addition);
    123 
    124       for (int i = 0; i < timeOffset; i++) {
    125         LaggedVariableTreeNode node = (LaggedVariableTreeNode)new LaggedVariable().CreateTreeNode();
    126         node.VariableName = targetVariable;
    127         node.Weight = coefficients[i];
    128         node.Lag = (i + 1) * -1;
    129         addition.AddSubtree(node);
    130       }
    131 
    132       ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode();
    133       cNode.Value = coefficients[coefficients.Length - 1];
    134       addition.AddSubtree(cNode);
     117      var tree = LinearModelToTreeConverter.CreateTree(
     118        variableNames: Enumerable.Repeat(problemData.TargetVariable, nFeatures).ToArray(),
     119        lags: Enumerable.Range(0, timeOffset).Select(i => (i + 1) * -1).ToArray(),
     120        coefficients: coefficients.Take(nFeatures).ToArray(),
     121        @const: coefficients[nFeatures]
     122        );
    135123
    136124      var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs

    r14523 r14843  
    9090      double[,] centers;
    9191      int[] xyc;
    92       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     92      double[,] inputMatrix = dataset.ToArray(allowedInputVariables, rows);
    9393      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    9494        throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Common/3.3/EnumerableExtensions.cs

    r14769 r14843  
    8888    }
    8989
     90    public static IEnumerable<T> TakeEvery<T>(this IEnumerable<T> xs, int nth) {
     91      int i = 0;
     92      foreach (var x in xs) {
     93        if (i % nth == 0) yield return x;
     94        i++;
     95      }
     96    }
     97
    9098    /// <summary>
    9199    /// Compute the n-ary cartesian product of arbitrarily many sequences: http://blogs.msdn.com/b/ericlippert/archive/2010/06/28/computing-a-cartesian-product-with-linq.aspx
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/PreprocessingTransformator.cs

    r14400 r14843  
    113113      // don't apply when the check fails
    114114      if (success)
    115         return transformation.Apply(data);
     115        return transformation.ConfigureAndApply(data);
    116116      else
    117117        return data;
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/SymbolicRegressionSolutionErrorCharacteristicsCurveView.cs

    r14826 r14843  
    4848      if (!problemData.TrainingIndices.Any()) return null; // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model)
    4949
    50       var usedVariables = Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
    51         .OfType<IVariableTreeNode>()
    52         .Select(node => node.VariableName).ToArray();
     50      var usedVariables = Content.Model.VariablesUsedForPrediction;
    5351
    5452      var usedDoubleVariables = usedVariables
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs

    r14840 r14843  
    164164      // A dictionary is used to find parameters
    165165      double[] initialConstants;
    166       var parameters = new List<TreeToAutoDiffTermTransformator.DataForVariable>();
    167 
    168       TreeToAutoDiffTermTransformator.ParametricFunction func;
    169       TreeToAutoDiffTermTransformator.ParametricFunctionGradient func_grad;
    170       if (!TreeToAutoDiffTermTransformator.TryTransformToAutoDiff(tree, updateVariableWeights, out parameters, out initialConstants, out func, out func_grad))
     166      var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>();
     167
     168      TreeToAutoDiffTermConverter.ParametricFunction func;
     169      TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;
     170      if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, out parameters, out initialConstants, out func, out func_grad))
    171171        throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
    172172      if (parameters.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0
     
    175175
    176176      //extract inital constants
    177       double[] c = new double[initialConstants.Length];
     177      double[] c = new double[initialConstants.Length + 2];
    178178      {
    179179        c[0] = 0.0;
     
    256256    }
    257257
    258     private static alglib.ndimensional_pfunc CreatePFunc(TreeToAutoDiffTermTransformator.ParametricFunction func) {
     258    private static alglib.ndimensional_pfunc CreatePFunc(TreeToAutoDiffTermConverter.ParametricFunction func) {
    259259      return (double[] c, double[] x, ref double fx, object o) => {
    260260        fx = func(c, x);
     
    262262    }
    263263
    264     private static alglib.ndimensional_pgrad CreatePGrad(TreeToAutoDiffTermTransformator.ParametricFunctionGradient func_grad) {
     264    private static alglib.ndimensional_pgrad CreatePGrad(TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad) {
    265265      return (double[] c, double[] x, ref double fx, double[] grad, object o) => {
    266266        var tupel = func_grad(c, x);
     
    270270    }
    271271    public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) {
    272       return TreeToAutoDiffTermTransformator.IsCompatible(tree);
     272      return TreeToAutoDiffTermConverter.IsCompatible(tree);
    273273    }
    274274  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/InteractiveSymbolicDataAnalysisSolutionSimplifierView.cs

    r14826 r14843  
    266266
    267267    private void btnSimplify_Click(object sender, EventArgs e) {
    268       var simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();
     268      var simplifier = new TreeSimplifier();
    269269      var simplifiedExpressionTree = simplifier.Simplify(Content.Model.SymbolicExpressionTree);
    270270      UpdateModel(simplifiedExpressionTree);
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Formatters/SymbolicDataAnalysisExpressionExcelFormatter.cs

    r14826 r14843  
    5151      while (dividend > 0) {
    5252        int modulo = (dividend - 1) % 26;
    53         columnName = Convert.ToChar(65 + modulo) + columnName;
     53        columnName = System.Convert.ToChar(65 + modulo) + columnName;
    5454        dividend = (int)((dividend - modulo) / 26);
    5555      }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj

    r14840 r14843  
    137137      <SubType>Code</SubType>
    138138    </Compile>
     139    <Compile Include="Converters\Convert.cs" />
     140    <Compile Include="Converters\LinearModelToTreeConverter.cs" />
     141    <Compile Include="Converters\TreeSimplifier.cs" />
     142    <Compile Include="Converters\TreeToAutoDiffTermConverter.cs" />
    139143    <Compile Include="Formatters\InfixExpressionFormatter.cs" />
    140144    <Compile Include="Formatters\SymbolicDataAnalysisExpressionMathematicaFormatter.cs" />
     
    250254    <Compile Include="Symbols\VariableConditionTreeNode.cs" />
    251255    <Compile Include="Symbols\VariableTreeNode.cs" />
    252     <Compile Include="Transformation\SymbolicDataAnalysisExpressionTreeSimplifier.cs" />
    253     <Compile Include="Transformation\SymbolicExpressionTreeBacktransformator.cs" />
    254     <Compile Include="Transformation\TreeToAutoDiffTermTransformator.cs" />
    255     <Compile Include="Transformation\TransformationToSymbolicTreeMapper.cs" />
     256    <Compile Include="Transformations\SymbolicExpressionTreeBacktransformator.cs" />
     257    <Compile Include="Transformations\TransformationToSymbolicTreeMapper.cs" />
    256258    <Compile Include="TreeMatching\SymbolicExpressionTreeBottomUpSimilarityCalculator.cs" />
    257259    <Compile Include="TreeMatching\SymbolicExpressionTreeCanonicalSorter.cs" />
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeSimplificationOperator.cs

    r14400 r14843  
    3737    }
    3838
    39     private readonly SymbolicDataAnalysisExpressionTreeSimplifier simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();
     39    private readonly TreeSimplifier simplifier = new TreeSimplifier();
    4040
    4141    [StorableConstructor]
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/DatasetExtensions.cs

    r14400 r14843  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
     24using System.Linq;
    2325
    2426namespace HeuristicLab.Problems.DataAnalysis {
    2527  public static class DatasetExtensions {
    26     public static IEnumerable<T> TakeEvery<T>(this IEnumerable<T> xs, int nth) {
    27       int i = 0;
    28       foreach (var x in xs) {
    29         if (i % nth == 0) yield return x;
    30         i++;
     28    public static double[,] ToArray(this IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {
     29      return ToArray(dataset,
     30        variables,
     31        transformations: variables.Select(_ => (ITransformation<double>)null), // no transform
     32        rows: rows);
     33    }
     34    public static double[,] ToArray(this IDataset dataset, IEnumerable<string> variables,
     35      IEnumerable<ITransformation<double>> transformations, IEnumerable<int> rows) {
     36      string[] variablesArr = variables.ToArray();
     37      int[] rowsArr = rows.ToArray();
     38      ITransformation<double>[] transformArr = transformations.ToArray();
     39      if (transformArr.Length != variablesArr.Length)
     40        throw new ArgumentException("Number of variables and number of transformations must match.");
     41
     42      double[,] matrix = new double[rowsArr.Length, variablesArr.Length];
     43
     44      for (int i = 0; i < variablesArr.Length; i++) {
     45        var origValues = dataset.GetDoubleValues(variablesArr[i], rowsArr);
     46        var values = transformArr[i] != null ? transformArr[i].Apply(origValues) : origValues;
     47        int row = 0;
     48        foreach (var value in values) {
     49          matrix[row, i] = value;
     50          row++;
     51        }
    3152      }
     53
     54      return matrix;
     55    }
     56
     57    /// <summary>
     58    /// Prepares a binary data matrix from a number of factors and specified factor values
     59    /// </summary>
     60    /// <param name="dataset">A dataset that contains the variable values</param>
     61    /// <param name="factorVariables">An enumerable of categorical variables (factors). For each variable an enumerable of values must be specified.</param>
     62    /// <param name="rows">An enumerable of row indices for the dataset</param>
     63    /// <returns></returns>
     64    /// <remarks>Factor variables (categorical variables) are split up into multiple binary variables one for each specified value.</remarks>
     65    public static double[,] ToArray(
     66      this IDataset dataset,
     67      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables,
     68      IEnumerable<int> rows) {
     69      // check input variables. Only string variables are allowed.
     70      var invalidInputs =
     71        factorVariables.Select(kvp => kvp.Key).Where(name => !dataset.VariableHasType<string>(name));
     72      if (invalidInputs.Any())
     73        throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs));
     74
     75      int numBinaryColumns = factorVariables.Sum(kvp => kvp.Value.Count());
     76
     77      List<int> rowsList = rows.ToList();
     78      double[,] matrix = new double[rowsList.Count, numBinaryColumns];
     79
     80      int col = 0;
     81      foreach (var kvp in factorVariables) {
     82        var varName = kvp.Key;
     83        var cats = kvp.Value;
     84        if (!cats.Any()) continue;
     85        foreach (var cat in cats) {
     86          var values = dataset.GetStringValues(varName, rows);
     87          int row = 0;
     88          foreach (var value in values) {
     89            matrix[row, col] = value == cat ? 1 : 0;
     90            row++;
     91          }
     92          col++;
     93        }
     94      }
     95      return matrix;
     96    }
     97
     98    public static IEnumerable<KeyValuePair<string, IEnumerable<string>>> GetFactorVariableValues(
     99      this IDataset ds, IEnumerable<string> factorVariables, IEnumerable<int> rows) {
     100      return from factor in factorVariables
     101             let distinctValues = ds.GetStringValues(factor, rows).Distinct().ToArray()
     102             // 1 distinct value => skip (constant)
     103             // 2 distinct values => only take one of the two values
     104             // >=3 distinct values => create a binary value for each value
     105             let reducedValues = distinctValues.Length <= 2
     106               ? distinctValues.Take(distinctValues.Length - 1)
     107               : distinctValues
     108             select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues);
    32109    }
    33110  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs

    r14826 r14843  
    9494    }
    9595
     96    public double[,] AllowedInputsTrainingValues {
     97      get { return Dataset.ToArray(AllowedInputVariables, TrainingIndices); }
     98    }
     99
     100    public double[,] AllowedInputsTestValues { get { return Dataset.ToArray(AllowedInputVariables, TestIndices); } }
    96101    public IntRange TrainingPartition {
    97102      get { return TrainingPartitionParameter.Value; }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/LinearTransformation.cs

    r14400 r14843  
    5252    public double Multiplier {
    5353      get { return MultiplierParameter.Value.Value; }
    54       protected set {
     54      set {
    5555        MultiplierParameter.Value.Value = value;
    5656      }
     
    5959    public double Addend {
    6060      get { return AddendParameter.Value.Value; }
    61       protected set {
     61      set {
    6262        AddendParameter.Value.Value = value;
    6363      }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ShiftStandardDistributionTransformation.cs

    r14400 r14843  
    7171
    7272    public override IEnumerable<double> Apply(IEnumerable<double> data) {
    73       ConfigureParameters(data);
    7473      if (OriginalStandardDeviation.IsAlmost(0.0)) {
    7574        return data;
     
    9493    }
    9594
    96     protected void ConfigureParameters(IEnumerable<double> data) {
     95    public override void ConfigureParameters(IEnumerable<double> data) {
    9796      OriginalStandardDeviation = data.StandardDeviation();
    9897      OriginalMean = data.Average();
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ShiftToRangeTransformation.cs

    r14400 r14843  
    4444    }
    4545
    46     public override IEnumerable<double> Apply(IEnumerable<double> data) {
    47       ConfigureParameters(data);
    48       return base.Apply(data);
    49     }
    50 
    5146    public override bool Check(IEnumerable<double> data, out string errorMsg) {
    5247      ConfigureParameters(data);
     
    5449    }
    5550
    56     protected void ConfigureParameters(IEnumerable<double> data) {
     51    public override void ConfigureParameters(IEnumerable<double> data) {
    5752      double originalRangeStart = data.Min();
    5853      double originalRangeEnd = data.Max();
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/Transformation.cs

    r14400 r14843  
    6666    protected Transformation(IEnumerable<string> allowedColumns) : base(allowedColumns) { }
    6767
     68    public virtual void ConfigureParameters(IEnumerable<T> data) {
     69      // override in transformations with parameters
     70    }
     71
    6872    public abstract IEnumerable<T> Apply(IEnumerable<T> data);
     73    public IEnumerable<T> ConfigureAndApply(IEnumerable<T> data) {
     74      ConfigureParameters(data);
     75      return Apply(data);
     76    }
    6977
    7078    public abstract bool Check(IEnumerable<T> data, out string errorMsg);
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs

    r14400 r14843  
    3333    IEnumerable<string> AllowedInputVariables { get; }
    3434
     35    double[,] AllowedInputsTrainingValues { get; }
     36    double[,] AllowedInputsTestValues { get; }
     37
    3538    IntRange TrainingPartition { get; }
    3639    IntRange TestPartition { get; }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/ITransformation.cs

    r14400 r14843  
    3030
    3131  public interface ITransformation<T> : ITransformation {
     32    void ConfigureParameters(IEnumerable<T> data);
     33    IEnumerable<T> ConfigureAndApply(IEnumerable<T> data);
    3234    IEnumerable<T> Apply(IEnumerable<T> data);
    3335  }
  • trunk/sources/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4/SymbolicDataAnalysisExpressionTreeSimplifierTest.cs

    r14826 r14843  
    3434    public void SimplifierAxiomsTest() {
    3535      SymbolicExpressionImporter importer = new SymbolicExpressionImporter();
    36       SymbolicDataAnalysisExpressionTreeSimplifier simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();
     36      TreeSimplifier simplifier = new TreeSimplifier();
    3737      SymbolicExpressionTreeStringFormatter formatter = new SymbolicExpressionTreeStringFormatter();
    3838      #region single argument arithmetics
Note: See TracChangeset for help on using the changeset viewer.