Changeset 2542 for trunk/sources
- Timestamp:
- 12/03/09 18:05:27 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.LinearRegression/3.2/LinearRegressionOperator.cs
r2538 r2542 57 57 int minTimeOffset = minTimeOffsetData == null ? 0 : minTimeOffsetData.Data; 58 58 59 List<int> allowedColumns = CalculateAllowedColumns(dataset, targetVariableIndex, start, end); 59 IFunctionTree tree = CreateModel(dataset, targetVariable, dataset.VariableNames, start, end, minTimeOffset, maxTimeOffset); 60 scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName("LinearRegressionModel"), new GeneticProgrammingModel(tree))); 61 return null; 62 } 63 64 public static IFunctionTree CreateModel(Dataset dataset, string targetVariable, IEnumerable<string> inputVariables, int start, int end) { 65 return CreateModel(dataset, targetVariable, inputVariables, start, end, 0, 0); 66 } 67 68 public static IFunctionTree CreateModel(Dataset dataset, string targetVariable, IEnumerable<string> inputVariables, 69 int start, int end, 70 int minTimeOffset, int maxTimeOffset) { 71 int targetVariableIndex = dataset.GetVariableIndex(targetVariable); 72 List<int> allowedColumns = CalculateAllowedColumns(dataset, targetVariableIndex, inputVariables.Select(x => dataset.GetVariableIndex(x)), start, end); 60 73 List<int> allowedRows = CalculateAllowedRows(dataset, targetVariableIndex, allowedColumns, start, end, minTimeOffset, maxTimeOffset); 61 74 … … 63 76 double[] targetVector = PrepareTargetVector(dataset, targetVariableIndex, allowedRows); 64 77 double[] coefficients = CalculateCoefficients(inputMatrix, targetVector); 65 IFunctionTree tree = CreateModel(coefficients, allowedColumns.Select(i => dataset.GetVariableName(i)).ToList(), minTimeOffset, maxTimeOffset); 66 67 scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName("LinearRegressionModel"), new GeneticProgrammingModel(tree))); 68 return null; 78 return CreateModel(coefficients, allowedColumns.Select(i => dataset.GetVariableName(i)).ToList(), minTimeOffset, maxTimeOffset); 69 79 } 70 80 71 private IFunctionTree CreateModel(double[] coefficients, List<string> allowedVariables, int minTimeOffset, int maxTimeOffset) {81 private static IFunctionTree CreateModel(double[] coefficients, List<string> allowedVariables, int minTimeOffset, int maxTimeOffset) { 72 82 IFunctionTree root = new Addition().GetTreeNode(); 73 83 … … 90 100 } 91 101 92 private double[] CalculateCoefficients(double[,] inputMatrix, double[] targetVector) {102 private static double[] CalculateCoefficients(double[,] inputMatrix, double[] targetVector) { 93 103 int retVal = 0; 94 104 alglib.linreg.linearmodel lm = new alglib.linreg.linearmodel(); … … 115 125 116 126 //returns list of valid row indexes (rows without NaN values) 117 private List<int> CalculateAllowedRows(Dataset dataset, int targetVariable, IList<int> allowedColumns, int start, int end, int minTimeOffset, int maxTimeOffset) {127 private static List<int> CalculateAllowedRows(Dataset dataset, int targetVariable, IList<int> allowedColumns, int start, int end, int minTimeOffset, int maxTimeOffset) { 118 128 List<int> allowedRows = new List<int>(); 119 129 bool add; … … 140 150 141 151 //returns list of valid column indexes (columns which contain max. 10% NaN (or infinity) and contain at least two different values) 142 private List<int> CalculateAllowedColumns(Dataset dataset, int targetVariable, int start, int end) {152 private static List<int> CalculateAllowedColumns(Dataset dataset, int targetVariable, IEnumerable<int> inputVariables, int start, int end) { 143 153 List<int> allowedColumns = new List<int>(); 144 154 double n = end - start; 145 for (int i= 0; i < dataset.Columns; i++) {146 double nanRatio = dataset.CountMissingValues(i , start, end) / n;147 if (i != targetVariable && nanRatio < 0.1 && dataset.GetRange(i, start, end) > 0.0) {148 allowedColumns.Add(i );155 foreach (int inputVariable in inputVariables) {// = 0; i < dataset.Columns; i++) { 156 double nanRatio = dataset.CountMissingValues(inputVariable, start, end) / n; 157 if (inputVariable != targetVariable && nanRatio < 0.1 && dataset.GetRange(inputVariable, start, end) > 0.0) { 158 allowedColumns.Add(inputVariable); 149 159 } 150 160 } … … 152 162 } 153 163 154 private double[,] PrepareInputMatrix(Dataset dataset, List<int> allowedColumns, List<int> allowedRows, int minTimeOffset, int maxTimeOffset) {164 private static double[,] PrepareInputMatrix(Dataset dataset, List<int> allowedColumns, List<int> allowedRows, int minTimeOffset, int maxTimeOffset) { 155 165 int rowCount = allowedRows.Count; 156 166 int timeOffsetRange = (maxTimeOffset - minTimeOffset + 1); … … 167 177 } 168 178 169 private double[] PrepareTargetVector(Dataset dataset, int targetVariable, List<int> allowedRows) {179 private static double[] PrepareTargetVector(Dataset dataset, int targetVariable, List<int> allowedRows) { 170 180 int rowCount = allowedRows.Count; 171 181 double[] targetVector = new double[rowCount];
Note: See TracChangeset
for help on using the changeset viewer.