#region License Information
/* HeuristicLab
* Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.DataAnalysis;
namespace HeuristicLab.GP.StructureIdentification.Classification {
public class CrossValidation : OperatorBase {
private const string DATASET = "Dataset";
private const string NFOLD = "n-Fold";
private const string TRAININGSAMPLESSTART = "TrainingSamplesStart";
private const string TRAININGSAMPLESEND = "TrainingSamplesEnd";
private const string VALIDATIONSAMPLESSTART = "ValidationSamplesStart";
private const string VALIDATIONSAMPLESEND = "ValidationSamplesEnd";
private const string TESTSAMPLESSTART = "TestSamplesStart";
private const string TESTSAMPLESEND = "TestSamplesEnd";
public override string Description {
get { return @"TASK"; }
}
public CrossValidation()
: base() {
AddVariableInfo(new VariableInfo(DATASET, "The original dataset and the new datasets in the newly created subscopes", typeof(Dataset), VariableKind.In));
AddVariableInfo(new VariableInfo(NFOLD, "Number of folds for the cross-validation", typeof(IntData), VariableKind.In));
AddVariableInfo(new VariableInfo(TRAININGSAMPLESSTART, "The start of training samples in the original dataset and starts of training samples in the new datasets", typeof(IntData), VariableKind.In | VariableKind.New));
AddVariableInfo(new VariableInfo(TRAININGSAMPLESEND, "The end of training samples in the original dataset and ends of training samples in the new datasets", typeof(IntData), VariableKind.In | VariableKind.New));
AddVariableInfo(new VariableInfo(VALIDATIONSAMPLESSTART, "The start of validation samples in the original dataset and starts of validation samples in the new datasets", typeof(IntData), VariableKind.In | VariableKind.New));
AddVariableInfo(new VariableInfo(VALIDATIONSAMPLESEND, "The end of validation samples in the original dataset and ends of validation samples in the new datasets", typeof(IntData), VariableKind.In | VariableKind.New));
AddVariableInfo(new VariableInfo(TESTSAMPLESSTART, "The start of the test samples in the new datasets", typeof(IntData), VariableKind.New));
AddVariableInfo(new VariableInfo(TESTSAMPLESEND, "The end of the test samples in the new datasets", typeof(IntData), VariableKind.New));
}
public override IOperation Apply(IScope scope) {
Dataset origDataset = GetVariableValue(DATASET, scope, true);
int nFolds = GetVariableValue(NFOLD, scope, true).Data;
if (nFolds < 2) throw new ArgumentException("The number of folds (nFolds) has to be >=2 for cross validation");
int origTrainingSamplesStart = GetVariableValue(TRAININGSAMPLESSTART, scope, true).Data;
int origTrainingSamplesEnd = GetVariableValue(TRAININGSAMPLESEND, scope, true).Data;
int origValidationSamplesStart = GetVariableValue(VALIDATIONSAMPLESSTART, scope, true).Data;
int origValidationSamplesEnd = GetVariableValue(VALIDATIONSAMPLESEND, scope, true).Data;
int n = origDataset.Rows;
int origTrainingSamples = (origTrainingSamplesEnd - origTrainingSamplesStart);
int origValidationSamples = (origValidationSamplesEnd - origValidationSamplesStart);
double percentTrainingSamples = origTrainingSamples / (double)(origValidationSamples + origTrainingSamples);
int nTestSamples = n / nFolds;
int newTrainingSamplesStart = 0;
int newTrainingSamplesEnd = (int)((n - nTestSamples) * percentTrainingSamples);
int newValidationSamplesStart = newTrainingSamplesEnd;
int newValidationSamplesEnd = n - nTestSamples;
int newTestSamplesStart = n - nTestSamples;
int newTestSamplesEnd = n;
for (int i = 0; i < nFolds; i++) {
Scope childScope = new Scope(i.ToString());
Dataset rotatedSet = new Dataset();
double[] samples = new double[origDataset.Samples.Length];
Array.Copy(origDataset.Samples, samples, samples.Length);
RotateArray(samples, i * nTestSamples * origDataset.Columns);
rotatedSet.Rows = origDataset.Rows;
rotatedSet.Columns = origDataset.Columns;
rotatedSet.Samples = samples;
childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(DATASET), rotatedSet));
childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(TRAININGSAMPLESSTART), new IntData(newTrainingSamplesStart)));
childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(TRAININGSAMPLESEND), new IntData(newTrainingSamplesEnd)));
childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(VALIDATIONSAMPLESSTART), new IntData(newValidationSamplesStart)));
childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(VALIDATIONSAMPLESEND), new IntData(newValidationSamplesEnd)));
childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(TESTSAMPLESSTART), new IntData(newTestSamplesStart)));
childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(TESTSAMPLESEND), new IntData(newTestSamplesEnd)));
scope.AddSubScope(childScope);
}
return null;
}
private void RotateArray(double[] samples, int p) {
Array.Reverse(samples, 0, p);
Array.Reverse(samples, p, samples.Length - p);
Array.Reverse(samples);
}
}
}