Changeset 18220 for trunk/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/NMSESingleObjectiveConstraintsEvaluator.cs
- Timestamp:
- 02/24/22 20:33:45 (12 months ago)
- Location:
- trunk
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk
- Property svn:mergeinfo changed
-
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression
- Property svn:mergeinfo changed
-
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4
- Property svn:mergeinfo changed
-
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/NMSESingleObjectiveConstraintsEvaluator.cs
r18132 r18220 125 125 var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value; 126 126 127 if (OptimizeParameters) { 128 SymbolicRegressionParameterOptimizationEvaluator.OptimizeParameters(interpreter, tree, problemData, rows, 129 false, ParameterOptimizationIterations, true, 130 estimationLimits.Lower, estimationLimits.Upper); 131 } else { 132 if (applyLinearScaling) { 133 var rootNode = new ProgramRootSymbol().CreateTreeNode(); 134 var startNode = new StartSymbol().CreateTreeNode(); 135 var offset = tree.Root.GetSubtree(0) //Start 136 .GetSubtree(0); //Offset 137 var scaling = offset.GetSubtree(0); 138 139 //Check if tree contains offset and scaling nodes 140 if (!(offset.Symbol is Addition) || !(scaling.Symbol is Multiplication)) 141 throw new ArgumentException($"{ItemName} can only be used with LinearScalingGrammar."); 142 143 var t = (ISymbolicExpressionTreeNode)scaling.GetSubtree(0).Clone(); 144 rootNode.AddSubtree(startNode); 145 startNode.AddSubtree(t); 146 var newTree = new SymbolicExpressionTree(rootNode); 147 148 //calculate alpha and beta for scaling 149 var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(newTree, problemData.Dataset, rows); 150 151 var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); 152 OnlineLinearScalingParameterCalculator.Calculate(estimatedValues, targetValues, out var alpha, out var beta, 153 out var errorState); 154 155 if (errorState == OnlineCalculatorError.None) { 156 //Set alpha and beta to the scaling nodes from ia grammar 157 var offsetParameter = offset.GetSubtree(1) as NumberTreeNode; 158 offsetParameter.Value = alpha; 159 var scalingParameter = scaling.GetSubtree(1) as NumberTreeNode; 160 scalingParameter.Value = beta; 161 } 162 } // else: alpha and beta are evolved 163 } 164 165 var quality = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, 166 BoundsEstimator, UseSoftConstraints, PenalityFactor); 127 var quality = Evaluate(tree, problemData, rows, interpreter, applyLinearScaling, estimationLimits.Lower, estimationLimits.Upper); 167 128 QualityParameter.ActualValue = new DoubleValue(quality); 168 129 … … 170 131 } 171 132 133 private static void CalcLinearScalingTerms( 134 ISymbolicExpressionTree tree, 135 IRegressionProblemData problemData, 136 IEnumerable<int> rows, 137 ISymbolicDataAnalysisExpressionTreeInterpreter interpreter) { 138 var rootNode = new ProgramRootSymbol().CreateTreeNode(); 139 var startNode = new StartSymbol().CreateTreeNode(); 140 var offset = tree.Root.GetSubtree(0) //Start 141 .GetSubtree(0); //Offset 142 var scaling = offset.GetSubtree(0); 143 144 //Check if tree contains offset and scaling nodes 145 if (!(offset.Symbol is Addition) || !(scaling.Symbol is Multiplication)) 146 throw new ArgumentException($"Scaling can only be used with LinearScalingGrammar."); 147 148 var t = (ISymbolicExpressionTreeNode)scaling.GetSubtree(0).Clone(); 149 rootNode.AddSubtree(startNode); 150 startNode.AddSubtree(t); 151 var newTree = new SymbolicExpressionTree(rootNode); 152 153 //calculate alpha and beta for scaling 154 var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(newTree, problemData.Dataset, rows); 155 156 var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); 157 OnlineLinearScalingParameterCalculator.Calculate(estimatedValues, targetValues, out var alpha, out var beta, 158 out var errorState); 159 160 if (errorState == OnlineCalculatorError.None) { 161 //Set alpha and beta to the scaling nodes from grammar 162 var offsetParameter = offset.GetSubtree(1) as NumberTreeNode; 163 offsetParameter.Value = alpha; 164 var scalingParameter = scaling.GetSubtree(1) as NumberTreeNode; 165 scalingParameter.Value = beta; 166 } 167 } 168 172 169 public static double Calculate( 170 ISymbolicExpressionTree tree, 171 IRegressionProblemData problemData, IEnumerable<int> rows, 173 172 ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, 174 ISymbolicExpressionTree tree,175 173 double lowerEstimationLimit, double upperEstimationLimit, 176 IRegressionProblemData problemData, IEnumerable<int> rows,177 174 IBoundsEstimator estimator, 178 175 bool useSoftConstraints = false, double penaltyFactor = 1.0) { 179 176 177 var constraints = Enumerable.Empty<ShapeConstraint>(); 178 if (problemData is ShapeConstrainedRegressionProblemData scProbData) 179 constraints = scProbData.ShapeConstraints.EnabledConstraints; 180 180 181 var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, rows); 182 var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit); 181 183 var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); 182 var constraints = Enumerable.Empty<ShapeConstraint>(); 183 if (problemData is ShapeConstrainedRegressionProblemData scProbData) { 184 constraints = scProbData.ShapeConstraints.EnabledConstraints; 185 } 184 var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out var errorState); 185 if (errorState != OnlineCalculatorError.None) 186 return 1.0; 187 188 if (!constraints.Any()) 189 return nmse; 190 186 191 var intervalCollection = problemData.VariableRanges; 187 188 var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit); 189 var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, 190 out var errorState); 191 192 if (errorState != OnlineCalculatorError.None) { 192 var constraintViolations = IntervalUtil.GetConstraintViolations(constraints, estimator, intervalCollection, tree); 193 194 // infinite/NaN constraints 195 if (constraintViolations.Any(x => double.IsNaN(x) || double.IsInfinity(x))) 193 196 return 1.0; 194 } 195 196 var constraintViolations = IntervalUtil.GetConstraintViolations(constraints, estimator, intervalCollection, tree); 197 198 if (constraintViolations.Any(x => double.IsNaN(x) || double.IsInfinity(x))) { 199 return 1.0; 200 } 201 202 if (useSoftConstraints) { 203 if (penaltyFactor < 0.0) 204 throw new ArgumentException("The parameter has to be >= 0.0.", nameof(penaltyFactor)); 205 206 var weightedViolationsAvg = constraints 207 .Zip(constraintViolations, (c, v) => c.Weight * v) 208 .Average(); 209 210 return Math.Min(nmse, 1.0) + penaltyFactor * weightedViolationsAvg; 211 } else if (constraintViolations.Any(x => x > 0.0)) { 212 return 1.0; 213 } 214 215 return nmse; 197 198 // hard constraints 199 if (!useSoftConstraints) { 200 if (constraintViolations.Any(x => x > 0.0)) 201 return 1.0; 202 return nmse; 203 } 204 205 // soft constraints 206 if (penaltyFactor < 0.0) 207 throw new ArgumentException("The parameter has to be >= 0.0.", nameof(penaltyFactor)); 208 209 var weightedViolationsAvg = constraints 210 .Zip(constraintViolations, (c, v) => c.Weight * v) 211 .Average(); 212 213 return Math.Min(nmse, 1.0) + penaltyFactor * weightedViolationsAvg; 216 214 } 217 215 … … 223 221 ApplyLinearScalingParameter.ExecutionContext = context; 224 222 225 var nmse = Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, 226 EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, 227 problemData, rows, BoundsEstimator, UseSoftConstraints, PenalityFactor); 223 var nmse = Calculate( 224 tree, problemData, rows, 225 SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, 226 EstimationLimitsParameter.ActualValue.Lower, 227 EstimationLimitsParameter.ActualValue.Upper, 228 BoundsEstimator, 229 UseSoftConstraints, 230 PenalityFactor); 228 231 229 232 SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null; … … 233 236 return nmse; 234 237 } 238 239 public override double Evaluate( 240 ISymbolicExpressionTree tree, 241 IRegressionProblemData problemData, 242 IEnumerable<int> rows, 243 ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, 244 bool applyLinearScaling = true, 245 double lowerEstimationLimit = double.MinValue, 246 double upperEstimationLimit = double.MaxValue) { 247 248 if (OptimizeParameters) 249 Optimize( 250 interpreter, tree, 251 problemData, rows, 252 ParameterOptimizationIterations, 253 updateVariableWeights: true, 254 lowerEstimationLimit, 255 upperEstimationLimit); 256 else if (applyLinearScaling) // extra scaling terms, which are included in tree 257 CalcLinearScalingTerms(tree, problemData, rows, interpreter); 258 259 return Calculate( 260 tree, problemData, 261 rows, interpreter, 262 lowerEstimationLimit, 263 upperEstimationLimit, 264 BoundsEstimator, 265 UseSoftConstraints, 266 PenalityFactor); 267 } 268 269 public static double Optimize(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, 270 ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, 271 int maxIterations, bool updateVariableWeights = true, 272 double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, 273 bool updateParametersInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) { 274 275 // Numeric parameters in the tree become variables for parameter optimization. 276 // Variables in the tree become parameters (fixed values) for parameter optimization. 277 // For each parameter (variable in the original tree) we store the 278 // variable name, variable value (for factor vars) and lag as a DataForVariable object. 279 // A dictionary is used to find parameters 280 double[] initialParameters; 281 var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>(); 282 283 TreeToAutoDiffTermConverter.ParametricFunction func; 284 TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad; 285 if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, addLinearScalingTerms: false, out parameters, out initialParameters, out func, out func_grad)) 286 throw new NotSupportedException("Could not optimize parameters of symbolic expression tree due to not supported symbols used in the tree."); 287 var parameterEntries = parameters.ToArray(); // order of entries must be the same for x 288 289 // extract initial parameters 290 double[] c = (double[])initialParameters.Clone(); 291 292 double originalQuality = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate( 293 tree, problemData, rows, 294 interpreter, applyLinearScaling: false, 295 lowerEstimationLimit, 296 upperEstimationLimit); 297 298 if (counter == null) counter = new EvaluationsCounter(); 299 var rowEvaluationsCounter = new EvaluationsCounter(); 300 301 alglib.minlmreport rep; 302 303 IDataset ds = problemData.Dataset; 304 int n = rows.Count(); 305 int k = parameters.Count; 306 307 double[,] x = new double[n, k]; 308 int row = 0; 309 foreach (var r in rows) { 310 int col = 0; 311 foreach (var info in parameterEntries) { 312 if (ds.VariableHasType<double>(info.variableName)) { 313 x[row, col] = ds.GetDoubleValue(info.variableName, r + info.lag); 314 } else if (ds.VariableHasType<string>(info.variableName)) { 315 x[row, col] = ds.GetStringValue(info.variableName, r) == info.variableValue ? 1 : 0; 316 } else throw new InvalidProgramException("found a variable of unknown type"); 317 col++; 318 } 319 row++; 320 } 321 double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray(); 322 323 324 alglib.ndimensional_rep xrep = (p, f, obj) => iterationCallback(p, f, obj); 325 326 try { 327 alglib.minlmcreatevj(y.Length, c, out var lmstate); 328 alglib.minlmsetcond(lmstate, 0.0, maxIterations); 329 alglib.minlmsetxrep(lmstate, iterationCallback != null); 330 // alglib.minlmoptguardgradient(lmstate, 1e-5); // for debugging gradient calculation 331 alglib.minlmoptimize(lmstate, CreateFunc(func, x, y), CreateJac(func_grad, x, y), xrep, rowEvaluationsCounter); 332 alglib.minlmresults(lmstate, out c, out rep); 333 // alglib.minlmoptguardresults(lmstate, out var optGuardReport); 334 } catch (ArithmeticException) { 335 return originalQuality; 336 } catch (alglib.alglibexception) { 337 return originalQuality; 338 } 339 340 counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n; 341 counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n; 342 343 // * TerminationType, completion code: 344 // * -8 optimizer detected NAN/INF values either in the function itself, 345 // or in its Jacobian 346 // * -5 inappropriate solver was used: 347 // * solver created with minlmcreatefgh() used on problem with 348 // general linear constraints (set with minlmsetlc() call). 349 // * -3 constraints are inconsistent 350 // * 2 relative step is no more than EpsX. 351 // * 5 MaxIts steps was taken 352 // * 7 stopping conditions are too stringent, 353 // further improvement is impossible 354 // * 8 terminated by user who called MinLMRequestTermination(). 355 // X contains point which was "current accepted" when termination 356 // request was submitted. 357 if (rep.terminationtype > 0) { 358 UpdateParameters(tree, c, updateVariableWeights); 359 } 360 var quality = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate( 361 tree, problemData, rows, 362 interpreter, applyLinearScaling: false, 363 lowerEstimationLimit, upperEstimationLimit); 364 365 if (!updateParametersInTree) UpdateParameters(tree, initialParameters, updateVariableWeights); 366 367 if (originalQuality < quality || double.IsNaN(quality)) { 368 UpdateParameters(tree, initialParameters, updateVariableWeights); 369 return originalQuality; 370 } 371 return quality; 372 } 373 374 private static void UpdateParameters(ISymbolicExpressionTree tree, double[] parameters, bool updateVariableWeights) { 375 int i = 0; 376 foreach (var node in tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>()) { 377 NumberTreeNode numberTreeNode = node as NumberTreeNode; 378 VariableTreeNodeBase variableTreeNodeBase = node as VariableTreeNodeBase; 379 FactorVariableTreeNode factorVarTreeNode = node as FactorVariableTreeNode; 380 if (numberTreeNode != null) { 381 if (numberTreeNode.Parent.Symbol is Power 382 && numberTreeNode.Parent.GetSubtree(1) == numberTreeNode) continue; // exponents in powers are not optimized (see TreeToAutoDiffTermConverter) 383 numberTreeNode.Value = parameters[i++]; 384 } else if (updateVariableWeights && variableTreeNodeBase != null) 385 variableTreeNodeBase.Weight = parameters[i++]; 386 else if (factorVarTreeNode != null) { 387 for (int j = 0; j < factorVarTreeNode.Weights.Length; j++) 388 factorVarTreeNode.Weights[j] = parameters[i++]; 389 } 390 } 391 } 392 393 private static alglib.ndimensional_fvec CreateFunc(TreeToAutoDiffTermConverter.ParametricFunction func, double[,] x, double[] y) { 394 int d = x.GetLength(1); 395 // row buffer 396 var xi = new double[d]; 397 // function must return residuals, alglib optimizes resid² 398 return (double[] c, double[] resid, object o) => { 399 for (int i = 0; i < y.Length; i++) { 400 Buffer.BlockCopy(x, i * d * sizeof(double), xi, 0, d * sizeof(double)); // copy row. We are using BlockCopy instead of Array.Copy because x has rank 2 401 resid[i] = func(c, xi) - y[i]; 402 } 403 var counter = (EvaluationsCounter)o; 404 counter.FunctionEvaluations += y.Length; 405 }; 406 } 407 408 private static alglib.ndimensional_jac CreateJac(TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad, double[,] x, double[] y) { 409 int numParams = x.GetLength(1); 410 // row buffer 411 var xi = new double[numParams]; 412 return (double[] c, double[] resid, double[,] jac, object o) => { 413 int numVars = c.Length; 414 for (int i = 0; i < y.Length; i++) { 415 Buffer.BlockCopy(x, i * numParams * sizeof(double), xi, 0, numParams * sizeof(double)); // copy row 416 var tuple = func_grad(c, xi); 417 resid[i] = tuple.Item2 - y[i]; 418 Buffer.BlockCopy(tuple.Item1, 0, jac, i * numVars * sizeof(double), numVars * sizeof(double)); // copy the gradient to jac. BlockCopy because jac has rank 2. 419 } 420 var counter = (EvaluationsCounter)o; 421 counter.FunctionEvaluations += y.Length; 422 counter.GradientEvaluations += y.Length; 423 }; 424 } 425 426 public class EvaluationsCounter { 427 public int FunctionEvaluations = 0; 428 public int GradientEvaluations = 0; 429 } 235 430 } 236 431 }
Note: See TracChangeset
for help on using the changeset viewer.