Changeset 17986 for branches/3106_AnalyticContinuedFractionsRegression
- Timestamp:
- 06/13/21 19:36:59 (4 years ago)
- Location:
- branches/3106_AnalyticContinuedFractionsRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/ContinuedFractionRegression
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/3106_AnalyticContinuedFractionsRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/ContinuedFractionRegression/Algorithm.cs
r17985 r17986 37 37 private const string DeltaParameterName = "Delta"; 38 38 private const string ScaleDataParameterName = "ScaleData"; 39 private const string LocalSearchMinNumSamplesParameterName = "LocalSearchMinNumSamples"; 40 private const string LocalSearchSamplesFractionParameterName = "LocalSearchSamplesFraction"; 39 41 40 42 … … 84 86 get { return ScaleDataParameter.Value.Value; } 85 87 set { ScaleDataParameter.Value.Value = value; } 88 } 89 public IFixedValueParameter<IntValue> LocalSearchMinNumSamplesParameter => (IFixedValueParameter<IntValue>)Parameters[LocalSearchMinNumSamplesParameterName]; 90 public int LocalSearchMinNumSamples { 91 get { return LocalSearchMinNumSamplesParameter.Value.Value; } 92 set { LocalSearchMinNumSamplesParameter.Value.Value = value; } 93 } 94 public IFixedValueParameter<PercentValue> LocalSearchSamplesFractionParameter => (IFixedValueParameter<PercentValue>)Parameters[LocalSearchSamplesFractionParameterName]; 95 public double LocalSearchSamplesFraction { 96 get { return LocalSearchSamplesFractionParameter.Value.Value; } 97 set { LocalSearchSamplesFractionParameter.Value.Value = value; } 86 98 } 87 99 #endregion … … 104 116 Parameters.Add(new FixedValueParameter<IntValue>(LocalSearchIterationsParameterName, "Number of iterations for local search (simplex) (default value 250)", new IntValue(250))); 105 117 Parameters.Add(new FixedValueParameter<IntValue>(LocalSearchRestartsParameterName, "Number of restarts for local search (default value 4)", new IntValue(4))); 106 Parameters.Add(new FixedValueParameter<DoubleValue>(LocalSearchToleranceParameterName, "The tolerance value for local search (simplex) (default value: 1e-3)", new DoubleValue(1e-3))); 118 Parameters.Add(new FixedValueParameter<DoubleValue>(LocalSearchToleranceParameterName, "The tolerance value for local search (simplex) (default value: 1e-3)", new DoubleValue(1e-3))); // page 12 of the preprint 107 119 Parameters.Add(new FixedValueParameter<PercentValue>(DeltaParameterName, "The relative weight for the number of variables term in the fitness function (default value: 10%)", new PercentValue(0.1))); 108 120 Parameters.Add(new FixedValueParameter<BoolValue>(ScaleDataParameterName, "Turns on/off scaling of input variable values to the range [0 .. 1] (default: false)", new BoolValue(false))); 109 } 121 Parameters.Add(new FixedValueParameter<IntValue>(LocalSearchMinNumSamplesParameterName, "The minimum number of samples for the local search (default 200)", new IntValue(200))); 122 Parameters.Add(new FixedValueParameter<PercentValue>(LocalSearchSamplesFractionParameterName, "The fraction of samples used for local search. Only used when the number of samples is more than " + LocalSearchMinNumSamplesParameterName + " (default 20%)", new PercentValue(0.2))); 123 } 124 125 [StorableHook(HookType.AfterDeserialization)] 126 private void AfterDeserialization() { 127 // for backwards compatibility 128 if (!Parameters.ContainsKey(LocalSearchMinNumSamplesParameterName)) 129 Parameters.Add(new FixedValueParameter<IntValue>(LocalSearchMinNumSamplesParameterName, "The minimum number of samples for the local search (default 200)", new IntValue(200))); 130 if (!Parameters.ContainsKey(LocalSearchSamplesFractionParameterName)) 131 Parameters.Add(new FixedValueParameter<PercentValue>(LocalSearchSamplesFractionParameterName, "The fraction of samples used for local search. Only used when the number of samples is more than " + LocalSearchMinNumSamplesParameterName + " (default 20%)", new PercentValue(0.2))); 132 } 133 110 134 111 135 public override IDeepCloneable Clone(Cloner cloner) { … … 116 140 var problemData = Problem.ProblemData; 117 141 double[,] xy; 142 var transformations = new List<ITransformation<double>>(); 118 143 if (ScaleData) { 144 // TODO: scaling via transformations is really ugly. 119 145 // Scale data to range 0 .. 1 120 146 // 121 147 // Scaling was not used for the experiments in the paper. Statement by the authors: "We did not pre-process the data." 122 var transformations = new List<Transformation<double>>();123 148 foreach (var input in problemData.AllowedInputVariables) { 124 149 var values = problemData.Dataset.GetDoubleValues(input, problemData.TrainingIndices); … … 129 154 linTransformation.Addend = -min / range; 130 155 linTransformation.Multiplier = 1.0 / range; 156 linTransformation.ColumnParameter.ActualValue = linTransformation.ColumnParameter.ValidValues.First(sv => sv.Value == input); 131 157 transformations.Add(linTransformation); 132 158 } 133 159 // do not scale the target 134 transformations.Add(new LinearTransformation(problemData.AllowedInputVariables) { Addend = 0.0, Multiplier = 1.0 }); 160 var targetTransformation = new LinearTransformation(new string[] { problemData.TargetVariable }) { Addend = 0.0, Multiplier = 1.0 }; 161 targetTransformation.ColumnParameter.ActualValue = targetTransformation.ColumnParameter.ValidValues.First(sv => sv.Value == problemData.TargetVariable); 162 transformations.Add(targetTransformation); 135 163 xy = problemData.Dataset.ToArray(problemData.AllowedInputVariables.Concat(new[] { problemData.TargetVariable }), 136 164 transformations, … … 144 172 var seed = new System.Random().Next(); 145 173 var rand = new MersenneTwister((uint)seed); 146 CFRAlgorithm(nVars, Depth, MutationRate, xy, out var bestObj, rand, NumGenerations, StagnationGenerations, 147 Delta, 148 LocalSearchIterations, LocalSearchRestarts, LocalSearchTolerance, cancellationToken); 149 } 150 151 private void CFRAlgorithm(int nVars, int depth, double mutationRate, double[,] trainingData, 152 out double bestObj, 153 IRandom rand, int numGen, int stagnatingGens, double evalDelta, 154 int localSearchIterations, int localSearchRestarts, double localSearchTolerance, 155 CancellationToken cancellationToken) { 156 /* Algorithm 1 */ 157 /* Generate initial population by a randomized algorithm */ 158 var pop = InitialPopulation(nVars, depth, rand, trainingData); 159 bestObj = pop.pocketObjValue; 160 // the best value since the last reset 161 var episodeBestObj = pop.pocketObjValue; 162 var episodeBestObjGen = 0; 163 for (int gen = 1; gen <= numGen && !cancellationToken.IsCancellationRequested; gen++) { 164 /* mutate each current solution in the population */ 165 var pop_mu = Mutate(pop, mutationRate, rand, trainingData); 166 /* generate new population by recombination mechanism */ 167 var pop_r = RecombinePopulation(pop_mu, rand, nVars, trainingData); 168 169 // Paper: 170 // A period of individual search operation is performed every generation on all current solutions. 171 172 // Statement by authors: 173 // "We ran the Local Search after Mutation and recombination operations. We executed the local-search only on the Current solutions." 174 // "We executed the MaintainInvariant() in the following steps: 175 // - After generating the initial population 176 // - after resetting the root 177 // - after executing the local-search on the whole population. 178 // We updated the pocket/ current automatically after mutation and recombination operation." 179 180 /* local search optimization of current solutions */ 181 foreach (var agent in pop_r.IterateLevels()) { 182 LocalSearchSimplex(localSearchIterations, localSearchRestarts, localSearchTolerance, evalDelta, agent, trainingData, rand); 183 Debug.Assert(agent.pocketObjValue < agent.currentObjValue); 184 } 185 foreach (var agent in pop_r.IteratePostOrder()) agent.MaintainInvariant(); // post-order to make sure that the root contains the best model 186 foreach (var agent in pop_r.IteratePostOrder()) agent.AssertInvariant(); 187 188 // for detecting stagnation we track the best objective value since the last reset 189 // and reset if this does not change for stagnatingGens 190 if (gen > episodeBestObjGen + stagnatingGens) { 191 Reset(pop_r, nVars, depth, rand, trainingData); 192 episodeBestObj = double.MaxValue; 193 } 194 if (episodeBestObj > pop_r.pocketObjValue) { 195 episodeBestObjGen = gen; // wait at least stagnatingGens until resetting again 196 episodeBestObj = pop_r.pocketObjValue; 197 } 198 199 /* replace old population with evolved population */ 200 pop = pop_r; 201 202 /* keep track of the best solution */ 203 if (bestObj > pop.pocketObjValue) { 204 bestObj = pop.pocketObjValue; 205 Results.AddOrUpdateResult("MSE (best)", new DoubleValue(bestObj)); 206 Results.AddOrUpdateResult("Solution", CreateSymbolicRegressionSolution(pop.pocket, trainingData, Problem.ProblemData.AllowedInputVariables.ToArray(), Problem.ProblemData.TargetVariable)); 207 } 208 174 175 void iterationCallback(Agent pop) { 209 176 #region visualization and debugging 210 177 DataTable qualities; … … 230 197 #endregion 231 198 } 199 void newBestSolutionCallback(ContinuedFraction best, double objVal) { 200 Results.AddOrUpdateResult("MSE (best)", new DoubleValue(objVal)); 201 Results.AddOrUpdateResult("Solution", CreateSymbolicRegressionSolution(best, problemData, transformations)); 202 } 203 204 CFRAlgorithm(nVars, Depth, MutationRate, xy, out var bestObj, rand, NumGenerations, StagnationGenerations, 205 Delta, 206 LocalSearchIterations, LocalSearchRestarts, LocalSearchTolerance, newBestSolutionCallback, iterationCallback, cancellationToken); 207 } 208 209 private void CFRAlgorithm(int nVars, int depth, double mutationRate, double[,] trainingData, 210 out double bestObj, 211 IRandom rand, int numGen, int stagnatingGens, double evalDelta, 212 int localSearchIterations, int localSearchRestarts, double localSearchTolerance, 213 Action<ContinuedFraction, double> newBestSolutionCallback, 214 Action<Agent> iterationCallback, 215 CancellationToken cancellationToken) { 216 /* Algorithm 1 */ 217 /* Generate initial population by a randomized algorithm */ 218 var pop = InitialPopulation(nVars, depth, rand, trainingData); 219 bestObj = pop.pocketObjValue; 220 // the best value since the last reset 221 var episodeBestObj = pop.pocketObjValue; 222 var episodeBestObjGen = 0; 223 for (int gen = 1; gen <= numGen && !cancellationToken.IsCancellationRequested; gen++) { 224 /* mutate each current solution in the population */ 225 var pop_mu = Mutate(pop, mutationRate, rand, trainingData); 226 /* generate new population by recombination mechanism */ 227 var pop_r = RecombinePopulation(pop_mu, rand, nVars, trainingData); 228 229 // Paper: 230 // A period of individual search operation is performed every generation on all current solutions. 231 232 // Statement by authors: 233 // "We ran the Local Search after Mutation and recombination operations. We executed the local-search only on the Current solutions." 234 // "We executed the MaintainInvariant() in the following steps: 235 // - After generating the initial population 236 // - after resetting the root 237 // - after executing the local-search on the whole population. 238 // We updated the pocket/ current automatically after mutation and recombination operation." 239 240 /* local search optimization of current solutions */ 241 foreach (var agent in pop_r.IterateLevels()) { 242 LocalSearchSimplex(localSearchIterations, localSearchRestarts, localSearchTolerance, LocalSearchMinNumSamples, LocalSearchSamplesFraction, evalDelta, agent, trainingData, rand); 243 Debug.Assert(agent.pocketObjValue < agent.currentObjValue); 244 } 245 foreach (var agent in pop_r.IteratePostOrder()) agent.MaintainInvariant(); // post-order to make sure that the root contains the best model 246 foreach (var agent in pop_r.IteratePostOrder()) agent.AssertInvariant(); 247 248 // for detecting stagnation we track the best objective value since the last reset 249 // and reset if this does not change for stagnatingGens 250 if (gen > episodeBestObjGen + stagnatingGens) { 251 Reset(pop_r, nVars, depth, rand, trainingData); 252 episodeBestObj = double.MaxValue; 253 } 254 if (episodeBestObj > pop_r.pocketObjValue) { 255 episodeBestObjGen = gen; // wait at least stagnatingGens until resetting again 256 episodeBestObj = pop_r.pocketObjValue; 257 } 258 259 /* replace old population with evolved population */ 260 pop = pop_r; 261 262 /* keep track of the best solution */ 263 if (bestObj > pop.pocketObjValue) { 264 bestObj = pop.pocketObjValue; 265 newBestSolutionCallback(pop.pocket, bestObj); 266 } 267 268 269 iterationCallback(pop); 270 } 232 271 } 233 272 … … 264 303 } 265 304 266 // Reset is not described in detail in the paper. 305 // Our criterion for relevance has been fairly strict: if no 306 // better model has been produced for five(5) straight generations, 307 // then the pocket of the root agent is removed and a new solution is created at random. 308 267 309 // Statement by the authors: "We only replaced the pocket solution of the root with 268 310 // a randomly generated solution. Then we execute the maintain-invariant process. … … 270 312 private void Reset(Agent root, int nVars, int depth, IRandom rand, double[,] trainingData) { 271 313 root.pocket = new ContinuedFraction(nVars, depth, rand); 272 root.current = new ContinuedFraction(nVars, depth, rand);273 274 root.currentObjValue = Evaluate(root.current, trainingData, Delta);275 314 root.pocketObjValue = Evaluate(root.pocket, trainingData, Delta); 276 315 … … 348 387 349 388 a.current = ch; 350 LocalSearchSimplex(LocalSearchIterations, LocalSearchRestarts, LocalSearchTolerance, Delta, a, trainingData, rand);389 LocalSearchSimplex(LocalSearchIterations, LocalSearchRestarts, LocalSearchTolerance, LocalSearchMinNumSamples, LocalSearchSamplesFraction, Delta, a, trainingData, rand); 351 390 return ch; 352 391 } … … 364 403 // non-zero coefficients. We do not apply mutation on pocket solutions because we consider them as a "collective memory" 365 404 // of good models visited in the past. They influence the search process via recombination only. 366 LocalSearchSimplex(LocalSearchIterations, LocalSearchRestarts, LocalSearchTolerance, Delta, agent, trainingData, rand);405 LocalSearchSimplex(LocalSearchIterations, LocalSearchRestarts, LocalSearchTolerance, LocalSearchMinNumSamples, LocalSearchSamplesFraction, Delta, agent, trainingData, rand); 367 406 } 368 407 } … … 483 522 484 523 485 private static void LocalSearchSimplex(int iterations, int restarts, double tolerance, double delta, Agent a, double[,] trainingData, IRandom rand) { 486 double uniformPeturbation = 1.0; 524 // The authors used the Nelder Mead solver from https://direct.mit.edu/evco/article/25/3/351/1046/Evolving-a-Nelder-Mead-Algorithm-for-Optimization 525 // Using different solvers (e.g. LevMar) is mentioned but not analysed 526 527 528 private static void LocalSearchSimplex(int iterations, int restarts, double tolerance, int minNumRows, double samplesFrac, double delta, Agent a, double[,] trainingData, IRandom rand) { 487 529 int maxEvals = iterations; 488 530 int numSearches = restarts + 1; 489 531 var numRows = trainingData.GetLength(0); 490 int numSelectedRows = numRows / 5; // 20% of the training samples 532 int numSelectedRows = numRows; 533 if (numRows > minNumRows) 534 numSelectedRows = (int)(numRows * samplesFrac); 491 535 492 536 var ch = a.current; … … 499 543 var bestCoeff = origCoeff; 500 544 501 var fittingData = SelectRandomRows(trainingData, numSelectedRows, rand);545 double[,] fittingData = null; 502 546 503 547 double objFunc(double[] curCoeff) { … … 507 551 508 552 for (int count = 0; count < numSearches; count++) { 553 fittingData = SelectRandomRows(trainingData, numSelectedRows, rand); 509 554 510 555 SimplexConstant[] constants = new SimplexConstant[origCoeff.Length]; 511 556 for (int i = 0; i < origCoeff.Length; i++) { 512 constants[i] = new SimplexConstant(origCoeff[i], uniformPeturbation);557 constants[i] = new SimplexConstant(origCoeff[i], initialPerturbation: 1.0); 513 558 } 514 559 … … 518 563 SetCoeff(ch, optimizedCoeff); 519 564 565 // the result with the best guiding function value (on the entire dataset) is chosen. 520 566 var newQuality = Evaluate(ch, trainingData, delta); 521 567 … … 524 570 bestQuality = newQuality; 525 571 } 526 } // reps572 } 527 573 528 574 SetCoeff(a.current, bestCoeff); … … 585 631 Symbol varSy = new Problems.DataAnalysis.Symbolic.Variable(); 586 632 587 private ISymbolicRegressionSolution CreateSymbolicRegressionSolution(ContinuedFraction cfrac, double[,] trainingData, string[] variables, string targetVariable) { 633 private ISymbolicRegressionSolution CreateSymbolicRegressionSolution(ContinuedFraction cfrac, IRegressionProblemData problemData, List<ITransformation<double>> transformations) { 634 var variables = problemData.AllowedInputVariables.ToArray(); 588 635 ISymbolicExpressionTreeNode res = null; 589 636 for (int i = cfrac.h.Length - 1; i > 1; i -= 2) { … … 611 658 start.AddSubtree(h0Term); 612 659 613 var model = new SymbolicRegressionModel(targetVariable, new SymbolicExpressionTree(progRoot), new SymbolicDataAnalysisExpressionTreeBatchInterpreter()); 614 var ds = new Dataset(variables.Concat(new[] { targetVariable }), trainingData); 615 var problemData = new RegressionProblemData(ds, variables, targetVariable); 616 var sol = new SymbolicRegressionSolution(model, problemData); 660 ISymbolicRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, new SymbolicExpressionTree(progRoot), new SymbolicDataAnalysisExpressionTreeBatchInterpreter()); 661 if (transformations != null && transformations.Any()) { 662 var backscaling = new SymbolicExpressionTreeBacktransformator(new TransformationToSymbolicTreeMapper()); 663 model = (ISymbolicRegressionModel)backscaling.Backtransform(model, transformations, problemData.TargetVariable); 664 } 665 var sol = new SymbolicRegressionSolution(model, (IRegressionProblemData)problemData.Clone()); 617 666 return sol; 618 667 } -
branches/3106_AnalyticContinuedFractionsRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/ContinuedFractionRegression/ContinuedFraction.cs
r17984 r17986 9 9 public ContinuedFraction(int nVars, int depth, IRandom rand) { 10 10 this.vars = new bool[nVars]; 11 for (int i = 0; i < nVars; i++) vars[i] = rand.NextDouble() < 0. 5;11 for (int i = 0; i < nVars; i++) vars[i] = rand.NextDouble() < 0.3; // page 12 of the preprint. Each input variable then has a probability p = 1/3 to be present in the whitelist 12 12 13 13 this.h = new Term[depth * 2 + 1];
Note: See TracChangeset
for help on using the changeset viewer.