#region License Information /* HeuristicLab * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Text; using System.Xml; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.DataAnalysis; using System.Linq; namespace HeuristicLab.Modeling { public class ProblemInjector : OperatorBase { public override string Description { get { return @"Injects the necessary variables for a data-based modeling problem."; } } public ProblemInjector() : base() { AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.New)); GetVariableInfo("Dataset").Local = true; AddVariable(new Variable("Dataset", new Dataset())); AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(StringData), VariableKind.New)); GetVariableInfo("TargetVariable").Local = true; AddVariable(new Variable("TargetVariable", new StringData())); AddVariableInfo(new VariableInfo("AllowedFeatures", "Indexes of allowed input variables", typeof(ItemList), VariableKind.In)); GetVariableInfo("AllowedFeatures").Local = true; AddVariable(new Variable("AllowedFeatures", new ItemList())); AddVariableInfo(new VariableInfo("TrainingSamplesStart", "TrainingSamplesStart", typeof(IntData), VariableKind.New)); GetVariableInfo("TrainingSamplesStart").Local = true; AddVariable(new Variable("TrainingSamplesStart", new IntData())); AddVariableInfo(new VariableInfo("TrainingSamplesEnd", "TrainingSamplesEnd", typeof(IntData), VariableKind.New)); GetVariableInfo("TrainingSamplesEnd").Local = true; AddVariable(new Variable("TrainingSamplesEnd", new IntData())); AddVariableInfo(new VariableInfo("ActualTrainingSamplesStart", "ActualTrainingSamplesStart", typeof(IntData), VariableKind.New)); AddVariableInfo(new VariableInfo("ActualTrainingSamplesEnd", "ActualTrainingSamplesEnd", typeof(IntData), VariableKind.New)); AddVariableInfo(new VariableInfo("ValidationSamplesStart", "ValidationSamplesStart", typeof(IntData), VariableKind.New)); GetVariableInfo("ValidationSamplesStart").Local = true; AddVariable(new Variable("ValidationSamplesStart", new IntData())); AddVariableInfo(new VariableInfo("ValidationSamplesEnd", "ValidationSamplesEnd", typeof(IntData), VariableKind.New)); GetVariableInfo("ValidationSamplesEnd").Local = true; AddVariable(new Variable("ValidationSamplesEnd", new IntData())); AddVariableInfo(new VariableInfo("TestSamplesStart", "TestSamplesStart", typeof(IntData), VariableKind.New)); GetVariableInfo("TestSamplesStart").Local = true; AddVariable(new Variable("TestSamplesStart", new IntData())); AddVariableInfo(new VariableInfo("TestSamplesEnd", "TestSamplesEnd", typeof(IntData), VariableKind.New)); GetVariableInfo("TestSamplesEnd").Local = true; AddVariable(new Variable("TestSamplesEnd", new IntData())); AddVariableInfo(new VariableInfo("MaxNumberOfTrainingSamples", "Maximal number of training samples to use (optional)", typeof(IntData), VariableKind.In)); AddVariableInfo(new VariableInfo("NumberOfInputVariables", "The number of available input variables", typeof(IntData), VariableKind.New)); AddVariableInfo(new VariableInfo("InputVariables", "List of input variable names", typeof(ItemList), VariableKind.New)); } public override IView CreateView() { return new ProblemInjectorView(this); } public override IOperation Apply(IScope scope) { AddVariableToScope("TrainingSamplesStart", scope); AddVariableToScope("TrainingSamplesEnd", scope); AddVariableToScope("ValidationSamplesStart", scope); AddVariableToScope("ValidationSamplesEnd", scope); AddVariableToScope("TestSamplesStart", scope); AddVariableToScope("TestSamplesEnd", scope); Dataset operatorDataset = (Dataset)GetVariable("Dataset").Value; string targetVariable = ((StringData)GetVariable("TargetVariable").Value).Data; ItemList operatorAllowedFeatures = (ItemList)GetVariable("AllowedFeatures").Value; Dataset scopeDataset = CreateNewDataset(operatorDataset, targetVariable, operatorAllowedFeatures); ItemList inputVariables = new ItemList(); for (int i = 1; i < scopeDataset.Columns; i++) { inputVariables.Add(new StringData(scopeDataset.GetVariableName(i))); } scope.AddVariable(new Variable(scope.TranslateName("Dataset"), scopeDataset)); scope.AddVariable(new Variable(scope.TranslateName("TargetVariable"), new StringData(targetVariable))); scope.AddVariable(new Variable(scope.TranslateName("NumberOfInputVariables"), new IntData(scopeDataset.Columns - 1))); scope.AddVariable(new Variable(scope.TranslateName("InputVariables"), inputVariables)); int trainingStart = GetVariableValue("TrainingSamplesStart", scope, true).Data; int trainingEnd = GetVariableValue("TrainingSamplesEnd", scope, true).Data; var maxTraining = GetVariableValue("MaxNumberOfTrainingSamples", scope, true, false); int nTrainingSamples; if (maxTraining != null) { nTrainingSamples = Math.Min(maxTraining.Data, trainingEnd - trainingStart); if (nTrainingSamples <= 0) throw new ArgumentException("Maximal number of training samples must be larger than 0", "MaxNumberOfTrainingSamples"); } else { nTrainingSamples = trainingEnd - trainingStart; } scope.AddVariable(new Variable(scope.TranslateName("ActualTrainingSamplesStart"), new IntData(trainingStart))); scope.AddVariable(new Variable(scope.TranslateName("ActualTrainingSamplesEnd"), new IntData(trainingStart + nTrainingSamples))); return null; } private Dataset CreateNewDataset(Dataset operatorDataset, string targetVariable, ItemList operatorAllowedVariables) { int columns = (operatorAllowedVariables.Count() + 1); int rows = operatorDataset.Rows; double[] values = new double[rows * columns]; int targetVariableIndex = operatorDataset.GetVariableIndex(targetVariable); for (int row = 0; row < rows; row++) { int column = 0; values[row*columns + column] = operatorDataset.GetValue(row, targetVariableIndex); // set target variable value to column index 0 column++; // start input variables at column index 1 foreach (var inputVariable in operatorAllowedVariables) { int variableColumnIndex = operatorDataset.GetVariableIndex(inputVariable.Data); values[row * columns + column] = operatorDataset.GetValue(row, variableColumnIndex); column++; } } Dataset ds = new Dataset(); ds.Columns = columns; ds.Rows = operatorDataset.Rows; ds.Name = operatorDataset.Name; ds.Samples = values; double[] scalingFactor = new double[columns]; double[] scalingOffset = new double[columns]; ds.SetVariableName(0, targetVariable); scalingFactor[0] = operatorDataset.ScalingFactor[targetVariableIndex]; scalingOffset[0] = operatorDataset.ScalingOffset[targetVariableIndex]; for (int column = 1; column < columns; column++) { int variableColumnIndex = operatorDataset.GetVariableIndex(operatorAllowedVariables[column - 1].Data); ds.SetVariableName(column, operatorAllowedVariables[column - 1].Data); scalingFactor[column] = operatorDataset.ScalingFactor[variableColumnIndex]; scalingOffset[column] = operatorDataset.ScalingOffset[variableColumnIndex]; } ds.ScalingOffset = scalingOffset; ds.ScalingFactor = scalingFactor; return ds; } private void AddVariableToScope(string variableName, IScope scope) { scope.AddVariable(new Variable(scope.TranslateName(variableName), (IItem)GetVariable(variableName).Value.Clone())); } } }