Changeset 14843
- Timestamp:
- 04/11/17 15:55:44 (8 years ago)
- Location:
- trunk/sources
- Files:
-
- 8 added
- 5 deleted
- 38 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs
r14400 r14843 165 165 try { 166 166 CalculateModel(ds, rows, scaleInputs); 167 } 168 catch (alglib.alglibexception ae) { 167 } catch (alglib.alglibexception ae) { 169 168 // wrap exception so that calling code doesn't have to know about alglib implementation 170 169 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); … … 260 259 private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) { 261 260 if (scaling != null) { 262 return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling); 261 // TODO: completely remove Scaling class 262 List<ITransformation<double>> transformations = new List<ITransformation<double>>(); 263 264 foreach (var varName in allowedInputs) { 265 double min; 266 double max; 267 scaling.GetScalingParameters(varName, out min, out max); 268 var add = -min / (max - min); 269 var mult = 1.0 / (max - min); 270 transformations.Add(new LinearTransformation(allowedInputs) { Addend = add, Multiplier = mult }); 271 } 272 return ds.ToArray(allowedInputs, transformations, rows); 263 273 } else { 264 return AlglibUtil.PrepareInputMatrix(ds,allowedInputs, rows);274 return ds.ToArray(allowedInputs, rows); 265 275 } 266 276 } … … 334 344 return Enumerable.Range(0, newN) 335 345 .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha)); 336 } 337 catch (alglib.alglibexception ae) { 346 } catch (alglib.alglibexception ae) { 338 347 // wrap exception so that calling code doesn't have to know about alglib implementation 339 348 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); … … 381 390 } 382 391 return kss; 383 } 384 catch (alglib.alglibexception ae) { 392 } catch (alglib.alglibexception ae) { 385 393 // wrap exception so that calling code doesn't have to know about alglib implementation 386 394 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r14826 r14843 246 246 <SubType>Code</SubType> 247 247 </Compile> 248 <Compile Include="Linear\AlglibUtil.cs" />249 <Compile Include="Linear\Scaling.cs" />250 248 <Compile Include="Linear\LinearDiscriminantAnalysis.cs" /> 251 249 <Compile Include="Linear\LinearRegression.cs"> … … 255 253 <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" /> 256 254 <Compile Include="Linear\MultinomialLogitModel.cs" /> 255 <Compile Include="Linear\Scaling.cs" /> 257 256 <Compile Include="MctsSymbolicRegression\Automaton.cs" /> 258 257 <Compile Include="MctsSymbolicRegression\CodeGenerator.cs" /> -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs
r14826 r14843 73 73 var doubleVariableNames = allowedInputVariables.Where(dataset.VariableHasType<double>).ToArray(); 74 74 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>).ToArray(); 75 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,doubleVariableNames.Concat(new string[] { targetVariable }), rows);75 double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows); 76 76 77 var factorVariables = AlglibUtil.GetFactorVariableValues(dataset,factorVariableNames, rows);78 double[,] factorMatrix = AlglibUtil.PrepareInputMatrix(dataset,factorVariables, rows);77 var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 78 var factorMatrix = dataset.ToArray(factorVariables, rows); 79 79 80 80 inputMatrix = factorMatrix.HorzCat(inputMatrix); … … 94 94 if (info < 1) throw new ArgumentException("Error in calculation of linear discriminant analysis solution"); 95 95 96 ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); 97 ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); 98 tree.Root.AddSubtree(startNode); 99 ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); 100 startNode.AddSubtree(addition); 101 102 int col = 0; 103 foreach (var kvp in factorVariables) { 104 var varName = kvp.Key; 105 foreach (var cat in kvp.Value) { 106 BinaryFactorVariableTreeNode vNode = 107 (BinaryFactorVariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.BinaryFactorVariable().CreateTreeNode(); 108 vNode.VariableName = varName; 109 vNode.VariableValue = cat; 110 vNode.Weight = w[col]; 111 addition.AddSubtree(vNode); 112 col++; 113 } 114 } 115 foreach (string column in doubleVariableNames) { 116 VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); 117 vNode.VariableName = column; 118 vNode.Weight = w[col]; 119 addition.AddSubtree(vNode); 120 col++; 121 } 96 var nFactorCoeff = factorMatrix.GetLength(1); 97 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, w.Take(nFactorCoeff).ToArray(), 98 doubleVariableNames, w.Skip(nFactorCoeff).Take(doubleVariableNames.Length).ToArray()); 122 99 123 100 var model = CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter(), problemData, rows); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r14826 r14843 76 76 var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 77 77 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 78 var factorVariables = AlglibUtil.GetFactorVariableValues(dataset,factorVariableNames, rows);79 double[,] binaryMatrix = AlglibUtil.PrepareInputMatrix(dataset,factorVariables, rows);80 double[,] doubleVarMatrix = AlglibUtil.PrepareInputMatrix(dataset,doubleVariables.Concat(new string[] { targetVariable }), rows);78 var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 79 double[,] binaryMatrix = dataset.ToArray(factorVariables, rows); 80 double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows); 81 81 var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); 82 82 … … 98 98 alglib.lrunpack(lm, out coefficients, out nFeatures); 99 99 100 ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); 101 ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); 102 tree.Root.AddSubtree(startNode); 103 ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); 104 startNode.AddSubtree(addition); 105 106 int col = 0; 107 foreach (var kvp in factorVariables) { 108 var varName = kvp.Key; 109 foreach (var cat in kvp.Value) { 110 BinaryFactorVariableTreeNode vNode = 111 (BinaryFactorVariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.BinaryFactorVariable().CreateTreeNode(); 112 vNode.VariableName = varName; 113 vNode.VariableValue = cat; 114 vNode.Weight = coefficients[col]; 115 addition.AddSubtree(vNode); 116 col++; 117 } 118 } 119 foreach (string column in doubleVariables) { 120 VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); 121 vNode.VariableName = column; 122 vNode.Weight = coefficients[col]; 123 addition.AddSubtree(vNode); 124 col++; 125 } 126 127 ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode(); 128 cNode.Value = coefficients[coefficients.Length - 1]; 129 addition.AddSubtree(cNode); 130 100 int nFactorCoeff = binaryMatrix.GetLength(1); 101 int nVarCoeff = doubleVariables.Count(); 102 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(), 103 doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(), 104 @const: coefficients[nFeatures]); 105 131 106 SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone()); 132 107 solution.Model.Name = "Linear Regression Model"; -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs
r14826 r14843 72 72 var factorVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<string>); 73 73 IEnumerable<int> rows = problemData.TrainingIndices; 74 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,doubleVariableNames.Concat(new string[] { targetVariable }), rows);74 double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows); 75 75 76 var factorVariableValues = AlglibUtil.GetFactorVariableValues(dataset,factorVariableNames, rows);77 var factorMatrix = AlglibUtil.PrepareInputMatrix(dataset,factorVariableValues, rows);76 var factorVariableValues = dataset.GetFactorVariableValues(factorVariableNames, rows); 77 var factorMatrix = dataset.ToArray(factorVariableValues, rows); 78 78 inputMatrix = factorMatrix.HorzCat(inputMatrix); 79 79 -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs
r14826 r14843 97 97 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 98 98 99 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);100 double[,] factorData = AlglibUtil.PrepareInputMatrix(dataset,factorVariables, rows);99 double[,] inputData = dataset.ToArray(allowedInputVariables, rows); 100 double[,] factorData = dataset.ToArray(factorVariables, rows); 101 101 102 102 inputData = factorData.HorzCat(inputData); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs
r14400 r14843 29 29 30 30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 [Obsolete("Use transformation classes in Problems.DataAnalysis instead")] 31 32 [StorableClass] 32 33 [Item(Name = "Scaling", Description = "Contains information about scaling of variables for data-analysis algorithms.")] -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/LdaInitializer.cs
r14400 r14843 44 44 var attributes = data.AllowedInputVariables.Count(); 45 45 46 var ldaDs = AlglibUtil.PrepareInputMatrix(data.Dataset,47 48 46 var ldaDs = data.Dataset.ToArray( 47 data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()), 48 data.TrainingIndices); 49 49 50 50 // map class values to sequential natural numbers (required by alglib) -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/PcaInitializer.cs
r14400 r14843 44 44 var attributes = data.AllowedInputVariables.Count(); 45 45 46 var pcaDs = AlglibUtil.PrepareInputMatrix(data.Dataset,data.AllowedInputVariables, data.TrainingIndices);46 var pcaDs = data.Dataset.ToArray(data.AllowedInputVariables, data.TrainingIndices); 47 47 48 48 int info; -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaGradientCalculator.cs
r14400 r14843 99 99 } 100 100 101 var data = AlglibUtil.PrepareInputMatrix(problemData.Dataset,problemData.AllowedInputVariables,102 101 var data = problemData.Dataset.ToArray(problemData.AllowedInputVariables, 102 problemData.TrainingIndices); 103 103 var classes = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); 104 104 -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs
r14400 r14843 86 86 87 87 public double[,] Reduce(IDataset dataset, IEnumerable<int> rows) { 88 var data = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);88 var data = dataset.ToArray(allowedInputVariables, rows); 89 89 90 90 var targets = dataset.GetDoubleValues(TargetVariable, rows).ToArray(); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r14826 r14843 119 119 if (IsCompatibilityLoaded) { 120 120 // no scaling 121 inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,121 inputMatrix = dataset.ToArray( 122 122 this.allowedInputVariables.Concat(new string[] { targetVariable }), 123 123 rows); … … 167 167 168 168 private static double[,] CreateScaledData(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, double[] offsets, double[] factors) { 169 var x = new double[rows.Count(), variables.Count()]; 170 var colIdx = 0; 171 foreach (var variableName in variables) { 172 var rowIdx = 0; 173 foreach (var val in dataset.GetDoubleValues(variableName, rows)) { 174 x[rowIdx, colIdx] = (val + offsets[colIdx]) * factors[colIdx]; 175 rowIdx++; 176 } 177 colIdx++; 178 } 179 return x; 169 var transforms = 170 variables.Select( 171 (_, colIdx) => 172 new LinearTransformation(variables) { Addend = offsets[colIdx] * factors[colIdx], Multiplier = factors[colIdx] }); 173 return dataset.ToArray(variables, transforms, rows); 180 174 } 181 175 … … 187 181 double[,] inputData; 188 182 if (IsCompatibilityLoaded) { 189 inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);183 inputData = dataset.ToArray(allowedInputVariables, rows); 190 184 } else { 191 185 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); … … 223 217 double[,] inputData; 224 218 if (IsCompatibilityLoaded) { 225 inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);219 inputData = dataset.ToArray(allowedInputVariables, rows); 226 220 } else { 227 221 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs
r14523 r14843 184 184 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 185 185 IEnumerable<int> rows = problemData.TrainingIndices; 186 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables.Concat(new string[] { targetVariable }), rows);186 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 187 187 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 188 188 throw new NotSupportedException("Neural network classification does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs
r14523 r14843 125 125 public NeuralNetworkEnsembleClassification() 126 126 : base() { 127 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 128 (IntValue)new IntValue(0).AsReadOnly(), 129 (IntValue)new IntValue(1).AsReadOnly(), 127 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 128 (IntValue)new IntValue(0).AsReadOnly(), 129 (IntValue)new IntValue(1).AsReadOnly(), 130 130 (IntValue)new IntValue(2).AsReadOnly() }); 131 131 var selectedHiddenLayerValue = (from v in validHiddenLayerValues … … 170 170 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 171 171 IEnumerable<int> rows = problemData.TrainingIndices; 172 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables.Concat(new string[] { targetVariable }), rows);172 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 173 173 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 174 174 throw new NotSupportedException("Neural network ensemble classification does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs
r14400 r14843 91 91 92 92 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 93 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);93 double[,] inputData = dataset.ToArray(allowedInputVariables, rows); 94 94 95 95 int n = inputData.GetLength(0); … … 108 108 109 109 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 110 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);110 double[,] inputData = dataset.ToArray(allowedInputVariables, rows); 111 111 112 112 int n = inputData.GetLength(0); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs
r14523 r14843 169 169 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 170 170 IEnumerable<int> rows = problemData.TrainingIndices; 171 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables.Concat(new string[] { targetVariable }), rows);171 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 172 172 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 173 173 throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs
r14400 r14843 95 95 96 96 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 97 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);97 double[,] inputData = dataset.ToArray(allowedInputVariables, rows); 98 98 99 99 int n = inputData.GetLength(0); … … 112 112 113 113 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 114 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);114 double[,] inputData = dataset.ToArray( allowedInputVariables, rows); 115 115 116 116 int n = inputData.GetLength(0); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs
r14523 r14843 185 185 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 186 186 IEnumerable<int> rows = problemData.TrainingIndices; 187 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables.Concat(new string[] { targetVariable }), rows);187 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 188 188 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 189 189 throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r14400 r14843 139 139 140 140 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 141 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,AllowedInputVariables, rows);141 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); 142 142 AssertInputMatrix(inputData); 143 143 … … 157 157 158 158 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) { 159 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,AllowedInputVariables, rows);159 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); 160 160 AssertInputMatrix(inputData); 161 161 … … 175 175 176 176 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 177 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,AllowedInputVariables, rows);177 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); 178 178 AssertInputMatrix(inputData); 179 179 … … 294 294 out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError) { 295 295 var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 296 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset,variables, trainingIndices);296 double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices); 297 297 298 298 alglib.dfreport rep; … … 316 316 317 317 var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 318 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset,variables, trainingIndices);318 double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices); 319 319 320 320 var classValues = problemData.ClassValues.ToArray(); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs
r14523 r14843 115 115 alglib.lrunpack(lm, out coefficients, out nFeatures); 116 116 117 118 ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); 119 ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); 120 tree.Root.AddSubtree(startNode); 121 ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); 122 startNode.AddSubtree(addition); 123 124 for (int i = 0; i < timeOffset; i++) { 125 LaggedVariableTreeNode node = (LaggedVariableTreeNode)new LaggedVariable().CreateTreeNode(); 126 node.VariableName = targetVariable; 127 node.Weight = coefficients[i]; 128 node.Lag = (i + 1) * -1; 129 addition.AddSubtree(node); 130 } 131 132 ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode(); 133 cNode.Value = coefficients[coefficients.Length - 1]; 134 addition.AddSubtree(cNode); 117 var tree = LinearModelToTreeConverter.CreateTree( 118 variableNames: Enumerable.Repeat(problemData.TargetVariable, nFeatures).ToArray(), 119 lags: Enumerable.Range(0, timeOffset).Select(i => (i + 1) * -1).ToArray(), 120 coefficients: coefficients.Take(nFeatures).ToArray(), 121 @const: coefficients[nFeatures] 122 ); 135 123 136 124 var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs
r14523 r14843 90 90 double[,] centers; 91 91 int[] xyc; 92 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);92 double[,] inputMatrix = dataset.ToArray(allowedInputVariables, rows); 93 93 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 94 94 throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Common/3.3/EnumerableExtensions.cs
r14769 r14843 88 88 } 89 89 90 public static IEnumerable<T> TakeEvery<T>(this IEnumerable<T> xs, int nth) { 91 int i = 0; 92 foreach (var x in xs) { 93 if (i % nth == 0) yield return x; 94 i++; 95 } 96 } 97 90 98 /// <summary> 91 99 /// Compute the n-ary cartesian product of arbitrarily many sequences: http://blogs.msdn.com/b/ericlippert/archive/2010/06/28/computing-a-cartesian-product-with-linq.aspx -
trunk/sources/HeuristicLab.DataPreprocessing/3.4/PreprocessingTransformator.cs
r14400 r14843 113 113 // don't apply when the check fails 114 114 if (success) 115 return transformation. Apply(data);115 return transformation.ConfigureAndApply(data); 116 116 else 117 117 return data; -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/SymbolicRegressionSolutionErrorCharacteristicsCurveView.cs
r14826 r14843 48 48 if (!problemData.TrainingIndices.Any()) return null; // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model) 49 49 50 var usedVariables = Content.Model.SymbolicExpressionTree.IterateNodesPostfix() 51 .OfType<IVariableTreeNode>() 52 .Select(node => node.VariableName).ToArray(); 50 var usedVariables = Content.Model.VariablesUsedForPrediction; 53 51 54 52 var usedDoubleVariables = usedVariables -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs
r14840 r14843 164 164 // A dictionary is used to find parameters 165 165 double[] initialConstants; 166 var parameters = new List<TreeToAutoDiffTerm Transformator.DataForVariable>();167 168 TreeToAutoDiffTerm Transformator.ParametricFunction func;169 TreeToAutoDiffTerm Transformator.ParametricFunctionGradient func_grad;170 if (!TreeToAutoDiffTerm Transformator.TryTransformToAutoDiff(tree, updateVariableWeights, out parameters, out initialConstants, out func, out func_grad))166 var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>(); 167 168 TreeToAutoDiffTermConverter.ParametricFunction func; 169 TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad; 170 if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, out parameters, out initialConstants, out func, out func_grad)) 171 171 throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree."); 172 172 if (parameters.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0 … … 175 175 176 176 //extract inital constants 177 double[] c = new double[initialConstants.Length ];177 double[] c = new double[initialConstants.Length + 2]; 178 178 { 179 179 c[0] = 0.0; … … 256 256 } 257 257 258 private static alglib.ndimensional_pfunc CreatePFunc(TreeToAutoDiffTerm Transformator.ParametricFunction func) {258 private static alglib.ndimensional_pfunc CreatePFunc(TreeToAutoDiffTermConverter.ParametricFunction func) { 259 259 return (double[] c, double[] x, ref double fx, object o) => { 260 260 fx = func(c, x); … … 262 262 } 263 263 264 private static alglib.ndimensional_pgrad CreatePGrad(TreeToAutoDiffTerm Transformator.ParametricFunctionGradient func_grad) {264 private static alglib.ndimensional_pgrad CreatePGrad(TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad) { 265 265 return (double[] c, double[] x, ref double fx, double[] grad, object o) => { 266 266 var tupel = func_grad(c, x); … … 270 270 } 271 271 public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) { 272 return TreeToAutoDiffTerm Transformator.IsCompatible(tree);272 return TreeToAutoDiffTermConverter.IsCompatible(tree); 273 273 } 274 274 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/InteractiveSymbolicDataAnalysisSolutionSimplifierView.cs
r14826 r14843 266 266 267 267 private void btnSimplify_Click(object sender, EventArgs e) { 268 var simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();268 var simplifier = new TreeSimplifier(); 269 269 var simplifiedExpressionTree = simplifier.Simplify(Content.Model.SymbolicExpressionTree); 270 270 UpdateModel(simplifiedExpressionTree); -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Formatters/SymbolicDataAnalysisExpressionExcelFormatter.cs
r14826 r14843 51 51 while (dividend > 0) { 52 52 int modulo = (dividend - 1) % 26; 53 columnName = Convert.ToChar(65 + modulo) + columnName;53 columnName = System.Convert.ToChar(65 + modulo) + columnName; 54 54 dividend = (int)((dividend - modulo) / 26); 55 55 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj
r14840 r14843 137 137 <SubType>Code</SubType> 138 138 </Compile> 139 <Compile Include="Converters\Convert.cs" /> 140 <Compile Include="Converters\LinearModelToTreeConverter.cs" /> 141 <Compile Include="Converters\TreeSimplifier.cs" /> 142 <Compile Include="Converters\TreeToAutoDiffTermConverter.cs" /> 139 143 <Compile Include="Formatters\InfixExpressionFormatter.cs" /> 140 144 <Compile Include="Formatters\SymbolicDataAnalysisExpressionMathematicaFormatter.cs" /> … … 250 254 <Compile Include="Symbols\VariableConditionTreeNode.cs" /> 251 255 <Compile Include="Symbols\VariableTreeNode.cs" /> 252 <Compile Include="Transformation\SymbolicDataAnalysisExpressionTreeSimplifier.cs" /> 253 <Compile Include="Transformation\SymbolicExpressionTreeBacktransformator.cs" /> 254 <Compile Include="Transformation\TreeToAutoDiffTermTransformator.cs" /> 255 <Compile Include="Transformation\TransformationToSymbolicTreeMapper.cs" /> 256 <Compile Include="Transformations\SymbolicExpressionTreeBacktransformator.cs" /> 257 <Compile Include="Transformations\TransformationToSymbolicTreeMapper.cs" /> 256 258 <Compile Include="TreeMatching\SymbolicExpressionTreeBottomUpSimilarityCalculator.cs" /> 257 259 <Compile Include="TreeMatching\SymbolicExpressionTreeCanonicalSorter.cs" /> -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeSimplificationOperator.cs
r14400 r14843 37 37 } 38 38 39 private readonly SymbolicDataAnalysisExpressionTreeSimplifier simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();39 private readonly TreeSimplifier simplifier = new TreeSimplifier(); 40 40 41 41 [StorableConstructor] -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/DatasetExtensions.cs
r14400 r14843 20 20 #endregion 21 21 22 using System; 22 23 using System.Collections.Generic; 24 using System.Linq; 23 25 24 26 namespace HeuristicLab.Problems.DataAnalysis { 25 27 public static class DatasetExtensions { 26 public static IEnumerable<T> TakeEvery<T>(this IEnumerable<T> xs, int nth) { 27 int i = 0; 28 foreach (var x in xs) { 29 if (i % nth == 0) yield return x; 30 i++; 28 public static double[,] ToArray(this IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) { 29 return ToArray(dataset, 30 variables, 31 transformations: variables.Select(_ => (ITransformation<double>)null), // no transform 32 rows: rows); 33 } 34 public static double[,] ToArray(this IDataset dataset, IEnumerable<string> variables, 35 IEnumerable<ITransformation<double>> transformations, IEnumerable<int> rows) { 36 string[] variablesArr = variables.ToArray(); 37 int[] rowsArr = rows.ToArray(); 38 ITransformation<double>[] transformArr = transformations.ToArray(); 39 if (transformArr.Length != variablesArr.Length) 40 throw new ArgumentException("Number of variables and number of transformations must match."); 41 42 double[,] matrix = new double[rowsArr.Length, variablesArr.Length]; 43 44 for (int i = 0; i < variablesArr.Length; i++) { 45 var origValues = dataset.GetDoubleValues(variablesArr[i], rowsArr); 46 var values = transformArr[i] != null ? transformArr[i].Apply(origValues) : origValues; 47 int row = 0; 48 foreach (var value in values) { 49 matrix[row, i] = value; 50 row++; 51 } 31 52 } 53 54 return matrix; 55 } 56 57 /// <summary> 58 /// Prepares a binary data matrix from a number of factors and specified factor values 59 /// </summary> 60 /// <param name="dataset">A dataset that contains the variable values</param> 61 /// <param name="factorVariables">An enumerable of categorical variables (factors). For each variable an enumerable of values must be specified.</param> 62 /// <param name="rows">An enumerable of row indices for the dataset</param> 63 /// <returns></returns> 64 /// <remarks>Factor variables (categorical variables) are split up into multiple binary variables one for each specified value.</remarks> 65 public static double[,] ToArray( 66 this IDataset dataset, 67 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables, 68 IEnumerable<int> rows) { 69 // check input variables. Only string variables are allowed. 70 var invalidInputs = 71 factorVariables.Select(kvp => kvp.Key).Where(name => !dataset.VariableHasType<string>(name)); 72 if (invalidInputs.Any()) 73 throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs)); 74 75 int numBinaryColumns = factorVariables.Sum(kvp => kvp.Value.Count()); 76 77 List<int> rowsList = rows.ToList(); 78 double[,] matrix = new double[rowsList.Count, numBinaryColumns]; 79 80 int col = 0; 81 foreach (var kvp in factorVariables) { 82 var varName = kvp.Key; 83 var cats = kvp.Value; 84 if (!cats.Any()) continue; 85 foreach (var cat in cats) { 86 var values = dataset.GetStringValues(varName, rows); 87 int row = 0; 88 foreach (var value in values) { 89 matrix[row, col] = value == cat ? 1 : 0; 90 row++; 91 } 92 col++; 93 } 94 } 95 return matrix; 96 } 97 98 public static IEnumerable<KeyValuePair<string, IEnumerable<string>>> GetFactorVariableValues( 99 this IDataset ds, IEnumerable<string> factorVariables, IEnumerable<int> rows) { 100 return from factor in factorVariables 101 let distinctValues = ds.GetStringValues(factor, rows).Distinct().ToArray() 102 // 1 distinct value => skip (constant) 103 // 2 distinct values => only take one of the two values 104 // >=3 distinct values => create a binary value for each value 105 let reducedValues = distinctValues.Length <= 2 106 ? distinctValues.Take(distinctValues.Length - 1) 107 : distinctValues 108 select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues); 32 109 } 33 110 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs
r14826 r14843 94 94 } 95 95 96 public double[,] AllowedInputsTrainingValues { 97 get { return Dataset.ToArray(AllowedInputVariables, TrainingIndices); } 98 } 99 100 public double[,] AllowedInputsTestValues { get { return Dataset.ToArray(AllowedInputVariables, TestIndices); } } 96 101 public IntRange TrainingPartition { 97 102 get { return TrainingPartitionParameter.Value; } -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/LinearTransformation.cs
r14400 r14843 52 52 public double Multiplier { 53 53 get { return MultiplierParameter.Value.Value; } 54 protectedset {54 set { 55 55 MultiplierParameter.Value.Value = value; 56 56 } … … 59 59 public double Addend { 60 60 get { return AddendParameter.Value.Value; } 61 protectedset {61 set { 62 62 AddendParameter.Value.Value = value; 63 63 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ShiftStandardDistributionTransformation.cs
r14400 r14843 71 71 72 72 public override IEnumerable<double> Apply(IEnumerable<double> data) { 73 ConfigureParameters(data);74 73 if (OriginalStandardDeviation.IsAlmost(0.0)) { 75 74 return data; … … 94 93 } 95 94 96 p rotectedvoid ConfigureParameters(IEnumerable<double> data) {95 public override void ConfigureParameters(IEnumerable<double> data) { 97 96 OriginalStandardDeviation = data.StandardDeviation(); 98 97 OriginalMean = data.Average(); -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ShiftToRangeTransformation.cs
r14400 r14843 44 44 } 45 45 46 public override IEnumerable<double> Apply(IEnumerable<double> data) {47 ConfigureParameters(data);48 return base.Apply(data);49 }50 51 46 public override bool Check(IEnumerable<double> data, out string errorMsg) { 52 47 ConfigureParameters(data); … … 54 49 } 55 50 56 p rotectedvoid ConfigureParameters(IEnumerable<double> data) {51 public override void ConfigureParameters(IEnumerable<double> data) { 57 52 double originalRangeStart = data.Min(); 58 53 double originalRangeEnd = data.Max(); -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/Transformation.cs
r14400 r14843 66 66 protected Transformation(IEnumerable<string> allowedColumns) : base(allowedColumns) { } 67 67 68 public virtual void ConfigureParameters(IEnumerable<T> data) { 69 // override in transformations with parameters 70 } 71 68 72 public abstract IEnumerable<T> Apply(IEnumerable<T> data); 73 public IEnumerable<T> ConfigureAndApply(IEnumerable<T> data) { 74 ConfigureParameters(data); 75 return Apply(data); 76 } 69 77 70 78 public abstract bool Check(IEnumerable<T> data, out string errorMsg); -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs
r14400 r14843 33 33 IEnumerable<string> AllowedInputVariables { get; } 34 34 35 double[,] AllowedInputsTrainingValues { get; } 36 double[,] AllowedInputsTestValues { get; } 37 35 38 IntRange TrainingPartition { get; } 36 39 IntRange TestPartition { get; } -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/ITransformation.cs
r14400 r14843 30 30 31 31 public interface ITransformation<T> : ITransformation { 32 void ConfigureParameters(IEnumerable<T> data); 33 IEnumerable<T> ConfigureAndApply(IEnumerable<T> data); 32 34 IEnumerable<T> Apply(IEnumerable<T> data); 33 35 } -
trunk/sources/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4/SymbolicDataAnalysisExpressionTreeSimplifierTest.cs
r14826 r14843 34 34 public void SimplifierAxiomsTest() { 35 35 SymbolicExpressionImporter importer = new SymbolicExpressionImporter(); 36 SymbolicDataAnalysisExpressionTreeSimplifier simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();36 TreeSimplifier simplifier = new TreeSimplifier(); 37 37 SymbolicExpressionTreeStringFormatter formatter = new SymbolicExpressionTreeStringFormatter(); 38 38 #region single argument arithmetics
Note: See TracChangeset
for help on using the changeset viewer.