Changeset 2319 for trunk/sources
- Timestamp:
- 09/01/09 11:09:50 (15 years ago)
- Location:
- trunk/sources
- Files:
-
- 3 added
- 5 deleted
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.DataAnalysis/3.2/Dataset.cs
r2311 r2319 31 31 namespace HeuristicLab.DataAnalysis { 32 32 public sealed class Dataset : ItemBase { 33 34 private string name;35 private double[] samples;36 private int rows;37 private int columns;38 33 private Dictionary<int, Dictionary<int, double>>[] cachedMeans; 39 34 private Dictionary<int, Dictionary<int, double>>[] cachedRanges; 40 private double[] scalingFactor;41 private double[] scalingOffset;42 35 private bool cachedValuesInvalidated = true; 43 44 private bool fireChangeEvents = true; 45 public bool FireChangeEvents { 46 get { return fireChangeEvents; } 47 set { fireChangeEvents = value; } 48 } 49 36 37 public Dataset() 38 : this(new double[,] { { 0.0 } }) { 39 } 40 41 public Dataset(double[,] samples) { 42 Name = "-"; 43 Rows = samples.GetLength(0); 44 Columns = samples.GetLength(1); 45 double[] values = new double[Rows * Columns]; 46 int i = 0; 47 for (int row = 0; row < Rows; row++) { 48 for (int column = 0; column < columns; column++) { 49 values[i++] = samples[row, column]; 50 } 51 } 52 Samples = values; 53 fireChangeEvents = true; 54 } 55 56 #region Properties 57 private string name; 50 58 public string Name { 51 59 get { return name; } … … 53 61 } 54 62 63 private int rows; 55 64 public int Rows { 56 65 get { return rows; } … … 58 67 } 59 68 69 private int columns; 60 70 public int Columns { 61 71 get { return columns; } … … 68 78 } 69 79 70 public double[] ScalingFactor { 71 get { return scalingFactor; } 72 set { 73 if (value.Length != scalingFactor.Length) 74 throw new ArgumentException("Length of scaling factor array doesn't match number of variables"); 75 scalingFactor = value; 76 } 77 } 78 public double[] ScalingOffset { 79 get { return scalingOffset; } 80 set { 81 if (value.Length != scalingOffset.Length) 82 throw new ArgumentException("Length of scaling offset array doesn't match number of variables"); 83 scalingOffset = value; 84 } 85 } 86 87 public double GetValue(int i, int j) { 88 return samples[columns * i + j]; 89 } 90 91 public void SetValue(int i, int j, double v) { 92 if (v != samples[columns * i + j]) { 93 samples[columns * i + j] = v; 94 cachedValuesInvalidated = true; 95 if (fireChangeEvents) FireChanged(); 96 } 97 } 98 80 private string[] variableNames; 81 public IEnumerable<string> VariableNames { 82 get { return variableNames; } 83 } 84 85 private double[] samples; 99 86 public double[] Samples { 100 87 get { return samples; } … … 113 100 } 114 101 115 private string[] variableNames; 116 public IEnumerable<string> VariableNames { 117 get { return variableNames; } 118 } 119 120 public Dataset() 121 : this(new double[,] { { 0.0 } }) { 122 } 123 124 public Dataset(double[,] samples) { 125 Name = "-"; 126 Rows = samples.GetLength(0); 127 Columns = samples.GetLength(1); 128 double[] values = new double[Rows * Columns]; 129 int i = 0; 130 for (int row = 0; row < Rows; row++) { 131 for (int column = 0; column < columns; column++) { 132 values[i++] = samples[row, column]; 133 } 134 } 135 Samples = values; 136 fireChangeEvents = true; 137 } 138 139 140 public string GetVariableName(int variableIndex) { 141 return variableNames[variableIndex]; 142 } 143 144 public int GetVariableIndex(string variableName) { 145 for (int i = 0; i < variableNames.Length; i++) { 146 if (variableNames[i].Equals(variableName)) return i; 147 } 148 throw new ArgumentException("The variable name " + variableName + " was not found."); 102 private bool fireChangeEvents = true; 103 public bool FireChangeEvents { 104 get { return fireChangeEvents; } 105 set { fireChangeEvents = value; } 106 } 107 108 private double[] scalingFactor; 109 public double[] ScalingFactor { 110 get { return scalingFactor; } 111 set { 112 if (value.Length != scalingFactor.Length) 113 throw new ArgumentException("Length of scaling factor array doesn't match number of variables"); 114 scalingFactor = value; 115 } 116 } 117 118 private double[] scalingOffset; 119 public double[] ScalingOffset { 120 get { return scalingOffset; } 121 set { 122 if (value.Length != scalingOffset.Length) 123 throw new ArgumentException("Length of scaling offset array doesn't match number of variables"); 124 scalingOffset = value; 125 } 126 } 127 #endregion 128 129 #region Modify and get values 130 public double GetValue(int i, int j) { 131 return samples[columns * i + j]; 149 132 } 150 133 … … 173 156 } 174 157 158 public void SetValue(int i, int j, double v) { 159 if (v != samples[columns * i + j]) { 160 samples[columns * i + j] = v; 161 cachedValuesInvalidated = true; 162 if (fireChangeEvents) FireChanged(); 163 } 164 } 165 166 public IEnumerable<double> ReplaceVariableValues(int variableIndex, IEnumerable<double> newValues, int start, int end) { 167 double[] oldValues = new double[end - start]; 168 for (int i = 0; i < end - start; i++) oldValues[i] = this.GetValue(i + start, variableIndex); 169 if (newValues.Count() != end - start) throw new ArgumentException("The length of the new values sequence doesn't match the required length (number of replaced values)"); 170 171 int index = start; 172 this.FireChangeEvents = false; 173 foreach (double v in newValues) { 174 this.SetValue(index++, variableIndex, v); 175 } 176 this.FireChangeEvents = true; 177 this.FireChanged(); 178 return oldValues; 179 } 180 181 public IEnumerable<double> ReplaceVariableValues(string variableName, IEnumerable<double> newValues, int start, int end) { 182 return ReplaceVariableValues(this.GetVariableIndex(variableName), newValues, start, end); 183 } 184 #endregion 185 186 #region Variable name methods 187 public string GetVariableName(int variableIndex) { 188 return variableNames[variableIndex]; 189 } 190 191 public int GetVariableIndex(string variableName) { 192 for (int i = 0; i < variableNames.Length; i++) { 193 if (variableNames[i].Equals(variableName)) return i; 194 } 195 throw new ArgumentException("The variable name " + variableName + " was not found."); 196 } 197 175 198 public void SetVariableName(int variableIndex, string name) { 176 199 variableNames[variableIndex] = name; … … 180 203 return this.variableNames.Contains(variableName); 181 204 } 205 #endregion 182 206 183 207 public override IView CreateView() { 184 208 return new DatasetView(this); 209 } 210 211 212 #region Variable statistics 213 public double GetMean(string variableName) { 214 return GetMean(GetVariableIndex(variableName)); 215 } 216 217 public double GetMean(string variableName, int start, int end) { 218 return GetMean(GetVariableIndex(variableName), start, end); 219 } 220 221 public double GetMean(int column) { 222 return GetMean(column, 0, Rows); 223 } 224 225 public double GetMean(int column, int start, int end) { 226 if (cachedValuesInvalidated) CreateDictionaries(); 227 if (!cachedMeans[column].ContainsKey(start) || !cachedMeans[column][start].ContainsKey(end)) { 228 double[] values = new double[end - start]; 229 for (int sample = start; sample < end; sample++) { 230 values[sample - start] = GetValue(sample, column); 231 } 232 double mean = Statistics.Mean(values); 233 if (!cachedMeans[column].ContainsKey(start)) cachedMeans[column][start] = new Dictionary<int, double>(); 234 cachedMeans[column][start][end] = mean; 235 return mean; 236 } else { 237 return cachedMeans[column][start][end]; 238 } 239 } 240 241 public double GetRange(string variableName) { 242 return GetRange(this.GetVariableIndex(variableName)); 243 } 244 245 public double GetRange(int column) { 246 return GetRange(column, 0, Rows); 247 } 248 249 public double GetRange(string variableName, int start, int end) { 250 return GetRange(this.GetVariableIndex(variableName), start, end); 251 } 252 253 public double GetRange(int column, int start, int end) { 254 if (cachedValuesInvalidated) CreateDictionaries(); 255 if (!cachedRanges[column].ContainsKey(start) || !cachedRanges[column][start].ContainsKey(end)) { 256 double[] values = new double[end - start]; 257 for (int sample = start; sample < end; sample++) { 258 values[sample - start] = GetValue(sample, column); 259 } 260 double range = Statistics.Range(values); 261 if (!cachedRanges[column].ContainsKey(start)) cachedRanges[column][start]= new Dictionary<int, double>(); 262 cachedRanges[column][start][end] = range; 263 return range; 264 } else { 265 return cachedRanges[column][start][end]; 266 } 267 } 268 269 public double GetMaximum(string variableName) { 270 return GetMaximum(this.GetVariableIndex(variableName)); 271 } 272 273 public double GetMaximum(int column) { 274 return GetMaximum(column, 0, Rows); 275 } 276 277 public double GetMaximum(string variableName, int start, int end) { 278 return GetMaximum(this.GetVariableIndex(variableName), start, end); 279 } 280 281 public double GetMaximum(int column, int start, int end) { 282 double max = Double.NegativeInfinity; 283 for (int i = start; i < end; i++) { 284 double val = GetValue(i, column); 285 if (!double.IsNaN(val) && val > max) max = val; 286 } 287 return max; 288 } 289 290 public double GetMinimum(string variableName) { 291 return GetMinimum(GetVariableIndex(variableName)); 292 } 293 294 public double GetMinimum(int column) { 295 return GetMinimum(column, 0, Rows); 296 } 297 298 public double GetMinimum(string variableName, int start, int end) { 299 return GetMinimum(this.GetVariableIndex(variableName), start, end); 300 } 301 302 public double GetMinimum(int column, int start, int end) { 303 double min = Double.PositiveInfinity; 304 for (int i = start; i < end; i++) { 305 double val = GetValue(i, column); 306 if (!double.IsNaN(val) && val < min) min = val; 307 } 308 return min; 309 } 310 #endregion 311 312 internal void ScaleVariable(int column) { 313 if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) { 314 double min = GetMinimum(column); 315 double max = GetMaximum(column); 316 double range = max - min; 317 if (range == 0) ScaleVariable(column, 1.0, -min); 318 else ScaleVariable(column, 1.0 / range, -min); 319 } 320 cachedValuesInvalidated = true; 321 if (fireChangeEvents) FireChanged(); 322 } 323 324 internal void ScaleVariable(int column, double factor, double offset) { 325 scalingFactor[column] = factor; 326 scalingOffset[column] = offset; 327 for (int i = 0; i < Rows; i++) { 328 double origValue = samples[i * columns + column]; 329 samples[i * columns + column] = (origValue + offset) * factor; 330 } 331 cachedValuesInvalidated = true; 332 if (fireChangeEvents) FireChanged(); 333 } 334 335 internal void UnscaleVariable(int column) { 336 if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) { 337 for (int i = 0; i < rows; i++) { 338 double scaledValue = samples[i * columns + column]; 339 samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column]; 340 } 341 scalingFactor[column] = 1.0; 342 scalingOffset[column] = 0.0; 343 } 344 cachedValuesInvalidated = true; 345 if (fireChangeEvents) FireChanged(); 346 } 347 348 private void CreateDictionaries() { 349 // keep a means and ranges dictionary for each column (possible target variable) of the dataset. 350 cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns]; 351 cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns]; 352 for (int i = 0; i < columns; i++) { 353 cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>(); 354 cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>(); 355 } 356 cachedValuesInvalidated = false; 185 357 } 186 358 … … 313 485 } 314 486 #endregion 315 316 public double GetMean(int column) {317 return GetMean(column, 0, Rows);318 }319 320 public double GetMean(int column, int from, int to) {321 if (cachedValuesInvalidated) CreateDictionaries();322 if (!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) {323 double[] values = new double[to - from];324 for (int sample = from; sample < to; sample++) {325 values[sample - from] = GetValue(sample, column);326 }327 double mean = Statistics.Mean(values);328 if (!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary<int, double>();329 cachedMeans[column][from][to] = mean;330 return mean;331 } else {332 return cachedMeans[column][from][to];333 }334 }335 336 public double GetRange(int column) {337 return GetRange(column, 0, Rows);338 }339 340 public double GetRange(int column, int from, int to) {341 if (cachedValuesInvalidated) CreateDictionaries();342 if (!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) {343 double[] values = new double[to - from];344 for (int sample = from; sample < to; sample++) {345 values[sample - from] = GetValue(sample, column);346 }347 double range = Statistics.Range(values);348 if (!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary<int, double>();349 cachedRanges[column][from][to] = range;350 return range;351 } else {352 return cachedRanges[column][from][to];353 }354 }355 356 public double GetMaximum(int column) {357 return GetMaximum(column, 0, Rows);358 }359 360 public double GetMaximum(int column, int start, int end) {361 double max = Double.NegativeInfinity;362 for (int i = start; i < end; i++) {363 double val = GetValue(i, column);364 if (!double.IsNaN(val) && val > max) max = val;365 }366 return max;367 }368 369 public double GetMinimum(int column) {370 return GetMinimum(column, 0, Rows);371 }372 373 public double GetMinimum(int column, int start, int end) {374 double min = Double.PositiveInfinity;375 for (int i = start; i < end; i++) {376 double val = GetValue(i, column);377 if (!double.IsNaN(val) && val < min) min = val;378 }379 return min;380 }381 382 internal void ScaleVariable(int column) {383 if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) {384 double min = GetMinimum(column);385 double max = GetMaximum(column);386 double range = max - min;387 if (range == 0) ScaleVariable(column, 1.0, -min);388 else ScaleVariable(column, 1.0 / range, -min);389 }390 cachedValuesInvalidated = true;391 if (fireChangeEvents) FireChanged();392 }393 394 internal void ScaleVariable(int column, double factor, double offset) {395 scalingFactor[column] = factor;396 scalingOffset[column] = offset;397 for (int i = 0; i < Rows; i++) {398 double origValue = samples[i * columns + column];399 samples[i * columns + column] = (origValue + offset) * factor;400 }401 cachedValuesInvalidated = true;402 if (fireChangeEvents) FireChanged();403 }404 405 internal void UnscaleVariable(int column) {406 if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) {407 for (int i = 0; i < rows; i++) {408 double scaledValue = samples[i * columns + column];409 samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column];410 }411 scalingFactor[column] = 1.0;412 scalingOffset[column] = 0.0;413 }414 cachedValuesInvalidated = true;415 if (fireChangeEvents) FireChanged();416 }417 418 private void CreateDictionaries() {419 // keep a means and ranges dictionary for each column (possible target variable) of the dataset.420 cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns];421 cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns];422 for (int i = 0; i < columns; i++) {423 cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();424 cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();425 }426 cachedValuesInvalidated = false;427 }428 487 } 429 488 } -
trunk/sources/HeuristicLab.GP.StructureIdentification.Classification/3.3/OffspringSelectionGP.cs
r2222 r2319 22 22 using HeuristicLab.Core; 23 23 using HeuristicLab.Modeling; 24 using HeuristicLab.Operators; 24 25 25 26 namespace HeuristicLab.GP.StructureIdentification.Classification { … … 27 28 28 29 protected override IOperator CreateBestSolutionProcessor() { 29 IOperator seq = base.CreateBestSolutionProcessor(); 30 IOperator seq = new SequentialProcessor(); 31 seq.AddSubOperator(base.CreateBestSolutionProcessor()); 30 32 seq.AddSubOperator(StandardGP.BestSolutionProcessor); 31 33 return seq; -
trunk/sources/HeuristicLab.GP.StructureIdentification.Classification/3.3/StandardGP.cs
r2222 r2319 27 27 public class StandardGP : HeuristicLab.GP.StructureIdentification.StandardGP, IClassificationAlgorithm { 28 28 protected override IOperator CreateBestSolutionProcessor() { 29 IOperator seq = base.CreateBestSolutionProcessor();30 seq.AddSubOperator(BestSolutionProcessor);31 return seq;29 IOperator bestSolutionProcessor = BestSolutionProcessor; 30 bestSolutionProcessor.AddSubOperator(base.CreateBestSolutionProcessor()); 31 return bestSolutionProcessor; 32 32 } 33 33 … … 76 76 } 77 77 } 78 -
trunk/sources/HeuristicLab.GP.StructureIdentification.TimeSeries/3.3/OffspringSelectionGP.cs
r2222 r2319 48 48 49 49 protected override IOperator CreateBestSolutionProcessor() { 50 IOperator seq = base.CreateBestSolutionProcessor(); 50 SequentialProcessor seq = new SequentialProcessor(); 51 seq.AddSubOperator(base.CreateBestSolutionProcessor()); 51 52 seq.AddSubOperator(StandardGP.BestSolutionProcessor); 52 53 return seq; -
trunk/sources/HeuristicLab.GP.StructureIdentification.TimeSeries/3.3/StandardGP.cs
r2222 r2319 50 50 51 51 protected override IOperator CreateBestSolutionProcessor() { 52 IOperator seq = base.CreateBestSolutionProcessor(); 52 SequentialProcessor seq = new SequentialProcessor(); 53 seq.AddSubOperator(base.CreateBestSolutionProcessor()); 53 54 seq.AddSubOperator(BestSolutionProcessor); 54 55 return seq; -
trunk/sources/HeuristicLab.GP.StructureIdentification/3.3/BaseClasses/AlgorithmBase.cs
r2290 r2319 311 311 312 312 protected internal virtual IOperator CreateBestSolutionProcessor() { 313 return new EmptyOperator(); 313 SequentialProcessor seq = new SequentialProcessor(); 314 // calculate and set variable impacts 315 VariableNamesExtractor namesExtractor = new VariableNamesExtractor(); 316 namesExtractor.GetVariableInfo("VariableNames").ActualName = "InputVariableNames"; 317 PredictorBuilder predictorBuilder = new PredictorBuilder(); 318 319 VariableEvaluationImpactCalculator evaluationImpactCalculator = new VariableEvaluationImpactCalculator(); 320 evaluationImpactCalculator.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; 321 evaluationImpactCalculator.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; 322 VariableQualityImpactCalculator qualityImpactCalculator = new VariableQualityImpactCalculator(); 323 qualityImpactCalculator.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; 324 qualityImpactCalculator.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; 325 326 seq.AddSubOperator(namesExtractor); 327 seq.AddSubOperator(predictorBuilder); 328 seq.AddSubOperator(evaluationImpactCalculator); 329 seq.AddSubOperator(qualityImpactCalculator); 330 return seq; 314 331 } 315 332 … … 410 427 411 428 protected internal virtual IAnalyzerModel CreateGPModel(IScope bestModelScope) { 412 Engine.GlobalScope.AddSubScope(bestModelScope);413 IGeneticProgrammingModel tree = bestModelScope.GetVariableValue<IGeneticProgrammingModel>("FunctionTree", false);414 ITreeEvaluator evaluator = bestModelScope.GetVariableValue<ITreeEvaluator>("TreeEvaluator", true);415 429 IAnalyzerModel model = new AnalyzerModel(); 416 model.Predictor = new Predictor(evaluator, tree);430 model.Predictor = bestModelScope.GetVariableValue<IPredictor>("Predictor", true); 417 431 Dataset ds = bestModelScope.GetVariableValue<Dataset>("Dataset", true); 418 432 model.Dataset = ds; … … 427 441 model.TrainingMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("Quality", false).Data; 428 442 model.ValidationMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("ValidationQuality", false).Data; 429 // calculate and set variable impacts430 VariableEvaluationImpactCalculator evaluationImpactCalculator = new VariableEvaluationImpactCalculator();431 evaluationImpactCalculator.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";432 evaluationImpactCalculator.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";433 VariableQualityImpactCalculator qualityImpactCalculator = new VariableQualityImpactCalculator();434 qualityImpactCalculator.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";435 qualityImpactCalculator.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";436 437 438 evaluationImpactCalculator.Apply(bestModelScope);439 qualityImpactCalculator.Apply(bestModelScope);440 443 441 444 ItemList evaluationImpacts = bestModelScope.GetVariableValue<ItemList>("VariableEvaluationImpacts", false); -
trunk/sources/HeuristicLab.GP.StructureIdentification/3.3/HeuristicLab.GP.StructureIdentification-3.3.csproj
r2285 r2319 85 85 <Compile Include="BaseClasses\FunctionTreeBase.cs" /> 86 86 <Compile Include="BaseClasses\TreeEvaluatorBase.cs" /> 87 <Compile Include="PredictorBuilder.cs" /> 87 88 <Compile Include="Predictor.cs" /> 88 89 <Compile Include="Evaluators\SimpleGPEvaluatorBase.cs" /> … … 145 146 <Compile Include="Properties\AssemblyInfo.cs" /> 146 147 <Compile Include="SymbolicExpressionExporter.cs" /> 147 <Compile Include="Evaluators\VariableEvaluationImpactCalculator.cs" /> 148 <Compile Include="Evaluators\VariableQualityImpactCalculator.cs" /> 148 <Compile Include="VariableNamesExtractor.cs" /> 149 149 </ItemGroup> 150 150 <ItemGroup> -
trunk/sources/HeuristicLab.GP.StructureIdentification/3.3/StandardGP.cs
r2285 r2319 203 203 } 204 204 205 protected internal override IOperator CreateBestSolutionProcessor() { 205 protected internal override IOperator CreateBestSolutionProcessor() { 206 206 SequentialProcessor bestSolutionProcessor = new SequentialProcessor(); 207 bestSolutionProcessor.AddSubOperator(base.CreateBestSolutionProcessor()); 208 207 209 #region MSE 208 210 MeanSquaredErrorEvaluator testMseEvaluator = new MeanSquaredErrorEvaluator(); -
trunk/sources/HeuristicLab.LinearRegression/3.2/LinearRegression.cs
r2290 r2319 237 237 #endregion 238 238 239 HeuristicLab.GP.StructureIdentification.VariableEvaluationImpactCalculator evalImpactCalc = new HeuristicLab.GP.StructureIdentification.VariableEvaluationImpactCalculator(); 240 evalImpactCalc.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart"; 241 evalImpactCalc.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd"; 242 evalImpactCalc.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; 243 HeuristicLab.Modeling.VariableQualityImpactCalculator qualImpactCalc = new HeuristicLab.GP.StructureIdentification.VariableQualityImpactCalculator(); 244 qualImpactCalc.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart"; 245 qualImpactCalc.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd"; 246 qualImpactCalc.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; 239 VariableNamesExtractor namesExtractor = new VariableNamesExtractor(); 240 namesExtractor.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; 241 namesExtractor.GetVariableInfo("VariableNames").ActualName = "InputVariableNames"; 242 PredictorBuilder predictorBuilder = new PredictorBuilder(); 243 predictorBuilder.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; 244 VariableEvaluationImpactCalculator evalImpactCalc = new VariableEvaluationImpactCalculator(); 245 evalImpactCalc.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; 246 evalImpactCalc.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; 247 VariableQualityImpactCalculator qualImpactCalc = new VariableQualityImpactCalculator(); 248 qualImpactCalc.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; 249 qualImpactCalc.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; 247 250 seqProc.AddSubOperator(trainingMSE); 248 251 seqProc.AddSubOperator(validationMSE); … … 260 263 seqProc.AddSubOperator(validationVAF); 261 264 seqProc.AddSubOperator(testVAF); 265 seqProc.AddSubOperator(namesExtractor); 266 seqProc.AddSubOperator(predictorBuilder); 262 267 seqProc.AddSubOperator(qualImpactCalc); 263 268 seqProc.AddSubOperator(evalImpactCalc); … … 269 274 270 275 protected internal virtual IAnalyzerModel CreateLRModel(IScope bestModelScope) { 271 IGeneticProgrammingModel tree = bestModelScope.GetVariableValue<IGeneticProgrammingModel>("LinearRegressionModel", false);272 ITreeEvaluator evaluator = bestModelScope.GetVariableValue<ITreeEvaluator>("TreeEvaluator", true);273 276 IAnalyzerModel model = new AnalyzerModel(); 274 model.Predictor = new Predictor(evaluator, tree);277 model.Predictor = bestModelScope.GetVariableValue<IPredictor>("Predictor", true); 275 278 model.TrainingMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("TrainingQuality", false).Data; 276 279 model.ValidationMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("ValidationQuality", false).Data; -
trunk/sources/HeuristicLab.Modeling/3.2/HeuristicLab.Modeling-3.2.csproj
r2313 r2319 87 87 <Compile Include="IAnalyzerModel.cs" /> 88 88 <Compile Include="MatrixCreator.cs" /> 89 <Compile Include="VariableImpactCalculatorBase.cs" />90 89 <Compile Include="VariableEvaluationImpactCalculator.cs" /> 91 90 <Compile Include="IPredictor.cs" /> -
trunk/sources/HeuristicLab.Modeling/3.2/VariableEvaluationImpactCalculator.cs
r2226 r2319 30 30 31 31 namespace HeuristicLab.Modeling { 32 public abstract class VariableEvaluationImpactCalculator : VariableImpactCalculatorBase<double[]> { 33 public override string OutputVariableName { 34 get { return "VariableEvaluationImpacts"; } 32 public class VariableEvaluationImpactCalculator : OperatorBase { 33 34 public VariableEvaluationImpactCalculator() 35 : base() { 36 AddVariableInfo(new VariableInfo("Predictor", "The predictor used to evaluate the model", typeof(IPredictor), VariableKind.In)); 37 AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.In)); 38 AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(IntData), VariableKind.In)); 39 AddVariableInfo(new VariableInfo("InputVariableNames", "Names of used variables in the model (optional)", typeof(ItemList<StringData>), VariableKind.In)); 40 AddVariableInfo(new VariableInfo("SamplesStart", "SamplesStart", typeof(IntData), VariableKind.In)); 41 AddVariableInfo(new VariableInfo("SamplesEnd", "SamplesEnd", typeof(IntData), VariableKind.In)); 42 AddVariableInfo(new VariableInfo("VariableEvaluationImpacts", "VariableEvaluationImpacts", typeof(ItemList), VariableKind.New)); 35 43 } 36 44 … … 39 47 } 40 48 41 private double[,] CombineOutputs(double[] referenceOutputs, double[] newOutputs) { 42 if (referenceOutputs.Length != newOutputs.Length) throw new InvalidProgramException(); 43 double[,] result = new double[referenceOutputs.Length, 2]; 44 for (int i = 0; i < referenceOutputs.Length; i++) { 45 result[i, 0] = referenceOutputs[i]; 46 result[i, 1] = newOutputs[i]; 49 public override IOperation Apply(IScope scope) { 50 IPredictor predictor = GetVariableValue<IPredictor>("Predictor", scope, true); 51 Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true); 52 int targetVariable = GetVariableValue<IntData>("TargetVariable", scope, true).Data; 53 string targetVariableName = dataset.GetVariableName(targetVariable); 54 ItemList<StringData> inputVariableNames = GetVariableValue<ItemList<StringData>>("InputVariableNames", scope, true, false); 55 int start = GetVariableValue<IntData>("SamplesStart", scope, true).Data; 56 int end = GetVariableValue<IntData>("SamplesEnd", scope, true).Data; 57 58 Dictionary<string, double> evaluationImpacts; 59 if (inputVariableNames == null) 60 evaluationImpacts = Calculate(dataset, predictor, targetVariableName, start, end); 61 else 62 evaluationImpacts = Calculate(dataset, predictor, targetVariableName, inputVariableNames.Select(iv => iv.Data), start, end); 63 64 ItemList variableImpacts = new ItemList(); 65 foreach (KeyValuePair<string, double> p in evaluationImpacts) { 66 if (p.Key != targetVariableName) { 67 ItemList row = new ItemList(); 68 row.Add(new StringData(p.Key)); 69 row.Add(new DoubleData(p.Value)); 70 variableImpacts.Add(row); 71 } 47 72 } 48 return result; 73 74 scope.AddVariable(new Variable(scope.TranslateName("VariableEvaluationImpacts"), variableImpacts)); 75 return null; 76 49 77 } 50 78 51 protected override double CalculateImpact(double[] referenceValue, double[] newValue) { 79 public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, int start, int end) { 80 return Calculate(dataset, predictor, targetVariableName, null, start, end); 81 } 82 83 84 public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, IEnumerable<string> inputVariableNames, int start, int end) { 85 Dictionary<string, double> evaluationImpacts = new Dictionary<string, double>(); 86 Dataset dirtyDataset = (Dataset)dataset.Clone(); 87 double[] referenceValues = predictor.Predict(dataset, start, end); 88 89 double mean; 90 IEnumerable<double> oldValues; 91 double[] newValues; 92 IEnumerable<string> variables; 93 if (inputVariableNames != null) 94 variables = inputVariableNames; 95 else 96 variables = dataset.VariableNames; 97 98 foreach (string variableName in variables) { 99 if (variableName != targetVariableName) { 100 mean = dataset.GetMean(variableName, start, end); 101 oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end); 102 newValues = predictor.Predict(dirtyDataset, start, end); 103 evaluationImpacts[variableName] = CalculateMSE(referenceValues, newValues); 104 dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end); 105 } 106 } 107 108 double impactsSum = evaluationImpacts.Values.Sum(); 109 if (impactsSum.IsAlmost(0.0)) impactsSum = 1.0; 110 foreach (KeyValuePair<string, double> p in evaluationImpacts.ToList()) 111 evaluationImpacts[p.Key] = p.Value / impactsSum; 112 113 return evaluationImpacts; 114 } 115 116 private static double CalculateMSE(double[] referenceValues, double[] newValues) { 52 117 try { 53 return SimpleMSEEvaluator.Calculate( CombineOutputs(referenceValue, newValue));118 return SimpleMSEEvaluator.Calculate(MatrixCreator<double>.CreateMatrix(referenceValues, newValues)); 54 119 } 55 120 catch (ArgumentException) { … … 57 122 } 58 123 } 59 60 protected override double[] CalculateValue(IScope scope, Dataset dataset, int targetVariable, int start, int end) {61 return GetOutputs(scope, dataset, targetVariable, start, end);62 }63 64 protected override double[] PostProcessImpacts(double[] impacts) {65 double mseSum = impacts.Sum();66 if (mseSum.IsAlmost(0.0)) mseSum = 1.0;67 for (int i = 0; i < impacts.Length; i++) {68 impacts[i] = impacts[i] / mseSum;69 }70 return impacts;71 }72 73 private bool IsAlmost(double x, double y) {74 return Math.Abs(x - y) < 1.0E-12;75 }76 77 protected abstract double[] GetOutputs(IScope scope, Dataset dataset, int targetVariable, int start, int end);78 124 } 79 125 } -
trunk/sources/HeuristicLab.Modeling/3.2/VariableQualityImpactCalculator.cs
r2165 r2319 30 30 31 31 namespace HeuristicLab.Modeling { 32 public abstract class VariableQualityImpactCalculator : VariableImpactCalculatorBase<double> { 32 public class VariableQualityImpactCalculator : OperatorBase { 33 34 public VariableQualityImpactCalculator() 35 : base() { 36 AddVariableInfo(new VariableInfo("Predictor", "The predictor used to evaluate the model", typeof(IPredictor), VariableKind.In)); 37 AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.In)); 38 AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(IntData), VariableKind.In)); 39 AddVariableInfo(new VariableInfo("InputVariableNames", "Names of used variables in the model (optional)", typeof(ItemList<StringData>), VariableKind.In)); 40 AddVariableInfo(new VariableInfo("SamplesStart", "SamplesStart", typeof(IntData), VariableKind.In)); 41 AddVariableInfo(new VariableInfo("SamplesEnd", "SamplesEnd", typeof(IntData), VariableKind.In)); 42 AddVariableInfo(new VariableInfo("VariableQualityImpacts", "VariableQualityImpacts", typeof(ItemList), VariableKind.New)); 43 } 44 33 45 public override string Description { 34 46 get { return @"Calculates the impact of all allowed input variables on the quality of the model using evaluator supplied as suboperator."; } 35 47 } 36 48 37 public override string OutputVariableName { 38 get { return "VariableQualityImpacts"; } 49 public override IOperation Apply(IScope scope) { 50 IPredictor predictor = GetVariableValue<IPredictor>("Predictor", scope, true); 51 Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true); 52 int targetVariable = GetVariableValue<IntData>("TargetVariable", scope, true).Data; 53 string targetVariableName = dataset.GetVariableName(targetVariable); 54 ItemList<StringData> inputVariableNames = GetVariableValue<ItemList<StringData>>("InputVariableNames", scope, true, false); 55 int start = GetVariableValue<IntData>("SamplesStart", scope, true).Data; 56 int end = GetVariableValue<IntData>("SamplesEnd", scope, true).Data; 57 58 Dictionary<string, double> qualityImpacts; 59 if (inputVariableNames == null) 60 qualityImpacts = Calculate(dataset, predictor, targetVariableName, start, end); 61 else 62 qualityImpacts = Calculate(dataset, predictor, targetVariableName, inputVariableNames.Select(iv => iv.Data), start, end); 63 64 ItemList variableImpacts = new ItemList(); 65 foreach (KeyValuePair<string, double> p in qualityImpacts) { 66 if (p.Key != targetVariableName) { 67 ItemList row = new ItemList(); 68 row.Add(new StringData(p.Key)); 69 row.Add(new DoubleData(p.Value)); 70 variableImpacts.Add(row); 71 } 72 } 73 74 scope.AddVariable(new Variable(scope.TranslateName("VariableQualityImpacts"), variableImpacts)); 75 return null; 39 76 } 40 77 41 protected override double CalculateImpact(double referenceValue, double newValue) { 78 public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, int start, int end) { 79 return Calculate(dataset, predictor, targetVariableName, null, start, end); 80 } 81 82 public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, IEnumerable<string> inputVariableNames, int start, int end) { 83 Dictionary<string, double> evaluationImpacts = new Dictionary<string, double>(); 84 Dataset dirtyDataset = (Dataset)dataset.Clone(); 85 86 double[] predictedValues = predictor.Predict(dataset, start, end); 87 double[] targetValues = dataset.GetVariableValues(targetVariableName, start, end); 88 89 double oldMSE = CalculateMSE(predictedValues, targetValues); 90 double newMSE; 91 92 double mean; 93 IEnumerable<double> oldValues; 94 IEnumerable<string> variables; 95 if (inputVariableNames != null) 96 variables = inputVariableNames; 97 else 98 variables = dataset.VariableNames; 99 100 foreach (string variableName in variables) { 101 if (variableName != targetVariableName) { 102 mean = dataset.GetMean(variableName, start, end); 103 oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end); 104 predictedValues = predictor.Predict(dirtyDataset, start, end); 105 newMSE = CalculateMSE(predictedValues, targetValues); 106 evaluationImpacts[variableName] = newMSE / oldMSE; 107 dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end); 108 } 109 } 110 111 return evaluationImpacts; 112 } 113 114 private static double CalculateImpact(double referenceValue, double newValue) { 42 115 return newValue / referenceValue; 43 116 } 44 117 45 protected override double CalculateValue(IScope scope, Dataset dataset, int targetVariable, int start, int end) { 46 return CalculateQuality(scope, dataset, targetVariable, start, end); 118 private static double CalculateMSE(double[] referenceValues, double[] newValues) { 119 try { 120 return SimpleMSEEvaluator.Calculate(MatrixCreator<double>.CreateMatrix(referenceValues, newValues)); 121 } 122 catch (ArgumentException) { 123 return double.PositiveInfinity; 124 } 47 125 } 48 49 protected abstract double CalculateQuality(IScope scope, Dataset dataset, int targetVariable, int start, int end);50 126 } 51 127 } -
trunk/sources/HeuristicLab.SupportVectorMachines/3.2/HeuristicLab.SupportVectorMachines-3.2.csproj
r2285 r2319 84 84 <ItemGroup> 85 85 <Compile Include="Predictor.cs" /> 86 <Compile Include="PredictorBuilder.cs" /> 86 87 <Compile Include="SupportVectorRegression.cs" /> 87 88 <Compile Include="SVMModel.cs" /> … … 97 98 <DependentUpon>SVMModelView.cs</DependentUpon> 98 99 </Compile> 99 <Compile Include="VariableEvaluationImpactCalculator.cs" />100 <Compile Include="VariableQualityImpactCalculator.cs" />101 100 </ItemGroup> 102 101 <ItemGroup> -
trunk/sources/HeuristicLab.SupportVectorMachines/3.2/SupportVectorRegression.cs
r2290 r2319 403 403 SequentialSubScopesProcessor seqSubScopeProc = new SequentialSubScopesProcessor(); 404 404 SequentialProcessor seqProc = new SequentialProcessor(); 405 PredictorBuilder predictorBuilder = new PredictorBuilder(); 406 predictorBuilder.GetVariableInfo("SVMModel").ActualName = "Model"; 405 407 VariableEvaluationImpactCalculator evalImpactCalc = new VariableEvaluationImpactCalculator(); 406 evalImpactCalc.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart"; 407 evalImpactCalc.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd"; 408 evalImpactCalc.GetVariableInfo("SVMModel").ActualName = "Model"; 408 evalImpactCalc.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; 409 evalImpactCalc.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; 409 410 VariableQualityImpactCalculator qualImpactCalc = new VariableQualityImpactCalculator(); 410 qualImpactCalc.GetVariableInfo(" TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";411 qualImpactCalc.GetVariableInfo(" TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";412 qualImpactCalc.GetVariableInfo("SVMModel").ActualName = "Model";413 411 qualImpactCalc.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; 412 qualImpactCalc.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; 413 414 seqProc.AddSubOperator(predictorBuilder); 414 415 seqProc.AddSubOperator(evalImpactCalc); 415 416 seqProc.AddSubOperator(qualImpactCalc); … … 448 449 model.TestSamplesStart = bestModelScope.GetVariableValue<IntData>("TestSamplesStart", true).Data; 449 450 model.TestSamplesEnd = bestModelScope.GetVariableValue<IntData>("TestSamplesEnd", true).Data; 450 Dictionary<string, int> variableNames = new Dictionary<string, int>(); 451 for (int i = 0; i < ds.Columns; i++) variableNames[ds.GetVariableName(i)] = i; 452 model.Predictor = new Predictor(bestModelScope.GetVariableValue<SVMModel>("Model", false), model.TargetVariable, variableNames); 453 451 model.Predictor = bestModelScope.GetVariableValue<IPredictor>("Predictor", true); 454 452 455 453 ItemList evaluationImpacts = bestModelScope.GetVariableValue<ItemList>("VariableEvaluationImpacts", false);
Note: See TracChangeset
for help on using the changeset viewer.