Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/31/20 12:06:20 (4 years ago)
Author:
lleko
Message:

#3022 add normalization

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/3022-FastFunctionExtraction/FFX/BFUtils.cs

    r17737 r17740  
    11using HeuristicLab.Algorithms.DataAnalysis.Glmnet;
     2using HeuristicLab.Common;
    23using HeuristicLab.Data;
    34using HeuristicLab.Problems.DataAnalysis;
     
    1819            if (approach.AllowHinge) {
    1920                // only allow hinge functions for features with exponent = 1 (deemed too complex otherwise)
    20                 var linearSimpleBasisFuncs = simpleBasisFuncs.Where(simpleBf => simpleBf.Exponent == 1);
     21                var linearSimpleBasisFuncs = simpleBasisFuncs.Where(simpleBf => simpleBf.Exponent == 1 && simpleBf.Operator.Equals(NonlinearOperator.None));
    2122                simpleBasisFuncs = simpleBasisFuncs.Concat(CreateHingeBases(data, linearSimpleBasisFuncs, approach.MinHingeThr, approach.MaxHingeThr, approach.NumHingeThrs));
    2223            }
     
    8889        // the importance of a basis function is measured by the absolute value of its coefficient when optimized on the data
    8990        public static IEnumerable<ISimpleBasisFunction> OrderBasisFuncsByImportance(IRegressionProblemData data, IList<ISimpleBasisFunction> candidateFunctions) {
    90             var elnetData = PrepareData(Normalize(data), candidateFunctions);
     91            var elnetData = PrepareData(Normalize(data, out _, out _, out _, out _), candidateFunctions);
    9192            var coeff = ElasticNetLinearRegression.CalculateModelCoefficients(elnetData, 0, 0, out var trainNMSE, out var testNMSE); // LS-fit
    9293            var intercept = coeff.Last();
     
    177178        }
    178179
    179         private static IRegressionProblemData Normalize(IRegressionProblemData data)
    180             => new RegressionProblemData(Normalize(data.Dataset), data.AllowedInputVariables, data.TargetVariable);
     180        public static IRegressionProblemData Normalize(IRegressionProblemData data, out double[] X_avgs, out double[] X_stds, out double y_avg, out double y_std) {
     181            X_avgs = data.AllowedInputVariables
     182                .Select(varname => data.Dataset.GetDoubleValues(varname)
     183                    .Average())
     184                .ToArray();
     185            X_stds = data.AllowedInputVariables
     186                .Select(varname => data.Dataset.GetDoubleValues(varname)
     187                    .StandardDeviationPop())
     188                .ToArray();
     189            for (int i = 0; i < X_stds.Length; i++) {
     190                if (X_stds[i] == 0) X_stds[i] = 1;
     191            }
     192            y_avg = data.TargetVariableValues.Average();
     193            y_std = data.TargetVariableValues.StandardDeviationPop();
     194            if (y_std == 0) y_std = 1;
     195            var temp = Normalize(data.Dataset);
     196            var ans = new RegressionProblemData(Normalize(data.Dataset), data.AllowedInputVariables, data.TargetVariable);
     197            return ans;
     198        }
    181199
    182200        // return a normalized version of IDataset ds
Note: See TracChangeset for help on using the changeset viewer.