- Timestamp:
- 08/01/10 18:13:42 (14 years ago)
- Location:
- trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs
r4068 r4127 40 40 [StorableClass] 41 41 public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer { 42 private const string RandomParameterName = "Random"; 42 43 private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; 43 44 private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; … … 46 47 private const string ValidationSamplesEndParameterName = "SamplesEnd"; 47 48 private const string QualityParameterName = "Quality"; 48 private const string ScaledQualityParameterName = "ScaledQuality";49 49 private const string UpperEstimationLimitParameterName = "UpperEstimationLimit"; 50 50 private const string LowerEstimationLimitParameterName = "LowerEstimationLimit"; 51 private const string AlphaParameterName = "Alpha";52 private const string BetaParameterName = "Beta";53 51 private const string BestSolutionParameterName = "Best solution (validation)"; 54 52 private const string BestSolutionQualityParameterName = "Best solution quality (validation)"; … … 59 57 private const string BestKnownQualityParameterName = "BestKnownQuality"; 60 58 private const string GenerationsParameterName = "Generations"; 59 private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples"; 61 60 62 61 private const string TrainingMeanSquaredErrorQualityParameterName = "Mean squared error (training)"; … … 101 100 102 101 #region parameter properties 102 public ILookupParameter<IRandom> RandomParameter { 103 get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; } 104 } 103 105 public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter { 104 106 get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; } 105 107 } 106 public ScopeTreeLookupParameter<DoubleValue> QualityParameter {107 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }108 }109 public ScopeTreeLookupParameter<DoubleValue> AlphaParameter {110 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[AlphaParameterName]; }111 }112 public ScopeTreeLookupParameter<DoubleValue> BetaParameter {113 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[BetaParameterName]; }114 }115 108 public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter { 116 109 get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; } … … 125 118 get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; } 126 119 } 120 public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter { 121 get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; } 122 } 123 127 124 public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter { 128 125 get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; } … … 152 149 #endregion 153 150 #region properties 151 public IRandom Random { 152 get { return RandomParameter.ActualValue; } 153 } 154 154 public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree { 155 155 get { return SymbolicExpressionTreeParameter.ActualValue; } 156 156 } 157 public ItemArray<DoubleValue> Quality {158 get { return QualityParameter.ActualValue; }159 }160 public ItemArray<DoubleValue> Alpha {161 get { return AlphaParameter.ActualValue; }162 }163 public ItemArray<DoubleValue> Beta {164 get { return BetaParameter.ActualValue; }165 }166 157 public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter { 167 158 get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; } … … 176 167 get { return ValidationSamplesEndParameter.ActualValue; } 177 168 } 169 public PercentValue RelativeNumberOfEvaluatedSamples { 170 get { return RelativeNumberOfEvaluatedSamplesParameter.Value; } 171 } 172 178 173 public DoubleValue UpperEstimationLimit { 179 174 get { return UpperEstimationLimitParameter.ActualValue; } … … 196 191 public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer() 197 192 : base() { 193 Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use.")); 198 194 Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze.")); 199 195 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic expression trees to analyze.")); 200 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(AlphaParameterName, "The alpha parameter for linear scaling."));201 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(BetaParameterName, "The beta parameter for linear scaling."));202 196 Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees.")); 203 197 Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution.")); 204 198 Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set.")); 205 199 Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set.")); 200 Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1))); 206 201 Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees.")); 207 202 Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees.")); … … 218 213 219 214 public override IOperation Apply() { 220 var alphas = Alpha;221 var betas = Beta;222 215 var trees = SymbolicExpressionTree; 223 216 224 IEnumerable<SymbolicExpressionTree> scaledTrees;225 if (alphas.Length == trees.Length) {226 scaledTrees = from i in Enumerable.Range(0, trees.Length)227 select SymbolicRegressionSolutionLinearScaler.Scale(trees[i], alphas[i].Value, betas[i].Value);228 } else {229 scaledTrees = trees;230 }231 232 217 string targetVariable = ProblemData.TargetVariable.Value; 218 219 // select a random subset of rows in the validation set 233 220 int validationStart = ValidiationSamplesStart.Value; 234 221 int validationEnd = ValidationSamplesEnd.Value; 222 uint seed = (uint)Random.Next(); 223 int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value); 224 if (count == 0) count = 1; 225 IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count); 226 235 227 double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity; 236 228 double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity; 237 229 238 double bestValidation Mse = double.MaxValue;230 double bestValidationRSquared = -1.0; 239 231 SymbolicExpressionTree bestTree = null; 240 232 241 OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator(); 242 foreach (var scaledTree in scaledTrees) { 243 double validationMse = SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, scaledTree, 233 foreach (var tree in trees) { 234 double validationRSquared = SymbolicRegressionPearsonsRSquaredEvaluator.Calculate(SymbolicExpressionTreeInterpreter, tree, 244 235 lowerEstimationLimit, upperEstimationLimit, 245 236 ProblemData.Dataset, targetVariable, 246 Enumerable.Range(validationStart, validationEnd - validationStart));247 248 if (validation Mse < bestValidationMse) {249 bestValidation Mse = validationMse;250 bestTree = scaledTree;237 rows); 238 239 if (validationRSquared > bestValidationRSquared) { 240 bestValidationRSquared = validationRSquared; 241 bestTree = tree; 251 242 } 252 243 } 253 244 254 if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value > bestValidationMse) { 245 246 // if the best validation tree is better than the current best solution => update 247 if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value < bestValidationRSquared) { 248 // calculate scaling parameters and validation MSE only for the best tree 249 // scale tree for solution 250 double alpha, beta; 251 double validationMSE = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree, 252 lowerEstimationLimit, upperEstimationLimit, 253 ProblemData.Dataset, targetVariable, 254 rows, out beta, out alpha); 255 256 var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta); 255 257 var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(), 256 bestTree);258 scaledTree); 257 259 var solution = new SymbolicRegressionSolution(ProblemData, model, lowerEstimationLimit, upperEstimationLimit); 258 260 solution.Name = BestSolutionParameterName; … … 260 262 261 263 BestSolutionParameter.ActualValue = solution; 262 BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidation Mse);264 BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationRSquared); 263 265 264 266 BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies); … … 271 273 } 272 274 Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value); 273 Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidation Mse);275 Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationRSquared); 274 276 275 277 DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value; 276 278 AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName); 277 AddValue(validationValues, bestValidation Mse, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);279 AddValue(validationValues, bestValidationRSquared, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName); 278 280 return base.Apply(); 279 281 } 280 282 281 283 [StorableHook(HookType.AfterDeserialization)] 282 private void Initialize() { 283 if (!Parameters.ContainsKey(AlphaParameterName)) { 284 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(AlphaParameterName, "The alpha parameter for linear scaling.")); 285 } 286 if (!Parameters.ContainsKey(BetaParameterName)) { 287 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(BetaParameterName, "The beta parameter for linear scaling.")); 288 } 289 if (!Parameters.ContainsKey(VariableFrequenciesParameterName)) { 290 Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts")); 291 } 292 if (!Parameters.ContainsKey(GenerationsParameterName)) { 293 Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far.")); 294 } 295 } 284 private void Initialize() { } 296 285 297 286 private static void AddValue(DataTable table, double data, string name, string description) { -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs
r4118 r4127 376 376 fixedBestValidationSolutionAnalyzer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd; 377 377 fixedBestValidationSolutionAnalyzer.BestKnownQualityParameter.ActualName = BestKnownQualityParameter.Name; 378 fixedBestValidationSolutionAnalyzer.QualityParameter.ActualName = Evaluator.QualityParameter.ActualName;379 378 } 380 var bestValidationSolutionAnalyzer = analyzer as FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer;379 var bestValidationSolutionAnalyzer = analyzer as ValidationBestScaledSymbolicRegressionSolutionAnalyzer; 381 380 if (bestValidationSolutionAnalyzer != null) { 382 381 bestValidationSolutionAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
Note: See TracChangeset
for help on using the changeset viewer.