Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
05/14/11 16:45:46 (13 years ago)
Author:
abeham
Message:

#1465

  • updated branch with latest version of trunk
Location:
branches/histogram
Files:
12 edited
4 copied

Legend:

Unmodified
Added
Removed
  • branches/histogram

  • branches/histogram/HeuristicLab.Problems.DataAnalysis

  • branches/histogram/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLabProblemsDataAnalysisPlugin.cs.frame

    r5446 r6195  
    2626
    2727namespace HeuristicLab.Problems.DataAnalysis {
    28   [Plugin("HeuristicLab.Problems.DataAnalysis", "3.3.3.$WCREV$")]
     28  [Plugin("HeuristicLab.Problems.DataAnalysis", "3.3.4.$WCREV$")]
    2929  [PluginFile("HeuristicLab.Problems.DataAnalysis-3.3.dll", PluginFileType.Assembly)]
    3030  [PluginDependency("HeuristicLab.ALGLIB", "3.1")]
  • branches/histogram/HeuristicLab.Problems.DataAnalysis/3.3/Properties/AssemblyInfo.frame

    r5446 r6195  
    5353// by using the '*' as shown below:
    5454[assembly: AssemblyVersion("3.3.0.0")]
    55 [assembly: AssemblyFileVersion("3.3.3.$WCREV$")]
     55[assembly: AssemblyFileVersion("3.3.4.$WCREV$")]
  • branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj

    r5943 r6195  
    109109  <ItemGroup>
    110110    <Compile Include="DoubleLimit.cs" />
     111    <Compile Include="Implementation\Classification\ClassificationEnsembleModel.cs">
     112      <SubType>Code</SubType>
     113    </Compile>
    111114    <Compile Include="Implementation\Classification\ClassificationProblemData.cs" />
    112115    <Compile Include="Implementation\Classification\ClassificationProblem.cs" />
     
    115118    <Compile Include="Implementation\Clustering\ClusteringProblemData.cs" />
    116119    <Compile Include="Implementation\Clustering\ClusteringSolution.cs" />
    117     <Compile Include="Implementation\Classification\ClassificationEnsembleModel.cs" />
     120    <Compile Include="Implementation\Regression\RegressionEnsembleModel.cs">
     121      <SubType>Code</SubType>
     122    </Compile>
     123    <Compile Include="Implementation\Regression\RegressionEnsembleSolution.cs" />
     124    <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs">
     125      <SubType>Code</SubType>
     126    </Compile>
     127    <Compile Include="Interfaces\Classification\IClassificationEnsembleSolution.cs" />
    118128    <Compile Include="Interfaces\Classification\IDiscriminantFunctionThresholdCalculator.cs" />
    119     <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs" />
    120     <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs" />
     129    <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs">
     130      <SubType>Code</SubType>
     131    </Compile>
     132    <Compile Include="Interfaces\Regression\IRegressionEnsembleSolution.cs" />
    121133    <Compile Include="OnlineCalculators\OnlineLinearScalingParameterCalculator.cs" />
    122     <Compile Include="Implementation\Regression\RegressionEnsembleModel.cs" />
    123134    <Compile Include="Implementation\Classification\DiscriminantFunctionClassificationModel.cs" />
    124135    <Compile Include="Implementation\Classification\DiscriminantFunctionClassificationSolution.cs" />
  • branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs

    r5847 r6195  
    3737    private const string ClassNamesParameterName = "ClassNames";
    3838    private const string ClassificationPenaltiesParameterName = "ClassificationPenalties";
     39    private const int MaximumNumberOfClass = 100;
     40    private const int InspectedRowsToDetermineTargets = 500;
    3941
    4042    #region default data
     
    252254    public ClassificationProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable)
    253255      : base(dataset, allowedInputVariables) {
    254       var variables = InputVariables.Select(x => x.AsReadOnly()).ToList();
    255       Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>(variables), variables.Where(x => x.Value == targetVariable).First()));
     256      var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset).Select(x => new StringValue(x).AsReadOnly()).ToList();
     257      var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First();
     258
     259      Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>(validTargetVariableValues), target));
    256260      Parameters.Add(new FixedValueParameter<StringMatrix>(ClassNamesParameterName, ""));
    257261      Parameters.Add(new FixedValueParameter<DoubleMatrix>(ClassificationPenaltiesParameterName, ""));
     
    260264      RegisterParameterEvents();
    261265    }
     266
     267    private static IEnumerable<string> CheckVariablesForPossibleTargetVariables(Dataset dataset) {
     268      var validTargetVariables = from v in dataset.VariableNames
     269                                 let DistinctValues = dataset.Rows > InspectedRowsToDetermineTargets ? dataset.GetVariableValues(v, 0, InspectedRowsToDetermineTargets).Distinct().Count()
     270                                                                        : dataset.GetVariableValues(v).Distinct().Count()
     271                                 where DistinctValues < MaximumNumberOfClass
     272                                 select v;
     273
     274      if (!validTargetVariables.Any())
     275        throw new ArgumentException("Import of classification problem data was not successfull, because no target variable was found." +
     276          " A target variable must have at most " + MaximumNumberOfClass + " distinct values to be applicable to classification.");
     277      return validTargetVariables;
     278    }
     279
    262280
    263281    private void ResetTargetVariableDependentMembers() {
  • branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolution.cs

    r5942 r6195  
    3030namespace HeuristicLab.Problems.DataAnalysis {
    3131  /// <summary>
    32   /// Abstract base class for classification data analysis solutions
     32  /// Represents a classification data analysis solution
    3333  /// </summary>
    3434  [StorableClass]
    35   public abstract class ClassificationSolution : DataAnalysisSolution, IClassificationSolution {
     35  public class ClassificationSolution : DataAnalysisSolution, IClassificationSolution {
    3636    private const string TrainingAccuracyResultName = "Accuracy (training)";
    3737    private const string TestAccuracyResultName = "Accuracy (test)";
     
    6767      Add(new Result(TestAccuracyResultName, "Accuracy of the model on the test partition (percentage of correctly classified instances).", new PercentValue()));
    6868      RecalculateResults();
     69    }
     70
     71    public override IDeepCloneable Clone(Cloner cloner) {
     72      return new ClassificationSolution(this, cloner);
    6973    }
    7074
  • branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringSolution.cs

    r5809 r6195  
    2727namespace HeuristicLab.Problems.DataAnalysis {
    2828  /// <summary>
    29   /// Abstract base class for clustering data analysis solutions
     29  /// Represents a clustering data analysis solution
    3030  /// </summary>
    3131  [StorableClass]
    32   public abstract class ClusteringSolution : DataAnalysisSolution, IClusteringSolution {
     32  public class ClusteringSolution : DataAnalysisSolution, IClusteringSolution {
    3333
    3434    [StorableConstructor]
     
    3939    public ClusteringSolution(IClusteringModel model, IClusteringProblemData problemData)
    4040      : base(model, problemData) {
     41    }
     42
     43    public override IDeepCloneable Clone(Cloner cloner) {
     44      return new ClusteringSolution(this, cloner);
    4145    }
    4246
  • branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolution.cs

    r6011 r6195  
    3030namespace HeuristicLab.Problems.DataAnalysis {
    3131  /// <summary>
    32   /// Abstract base class for regression data analysis solutions
     32  /// Represents a regression data analysis solution
    3333  /// </summary>
    3434  [StorableClass]
    35   public abstract class RegressionSolution : DataAnalysisSolution, IRegressionSolution {
     35  public class RegressionSolution : DataAnalysisSolution, IRegressionSolution {
    3636    private const string TrainingMeanSquaredErrorResultName = "Mean squared error (training)";
    3737    private const string TestMeanSquaredErrorResultName = "Mean squared error (test)";
     
    5555    public double TrainingMeanSquaredError {
    5656      get { return ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value; }
    57       private set { ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value = value; }
     57      protected set { ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value = value; }
    5858    }
    5959
    6060    public double TestMeanSquaredError {
    6161      get { return ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value; }
    62       private set { ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value = value; }
     62      protected set { ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value = value; }
    6363    }
    6464
    6565    public double TrainingRSquared {
    6666      get { return ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value; }
    67       private set { ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value = value; }
     67      protected set { ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value = value; }
    6868    }
    6969
    7070    public double TestRSquared {
    7171      get { return ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value; }
    72       private set { ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value = value; }
     72      protected set { ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value = value; }
    7373    }
    7474
    7575    public double TrainingRelativeError {
    7676      get { return ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value; }
    77       private set { ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value = value; }
     77      protected set { ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value = value; }
    7878    }
    7979
    8080    public double TestRelativeError {
    8181      get { return ((DoubleValue)this[TestRelativeErrorResultName].Value).Value; }
    82       private set { ((DoubleValue)this[TestRelativeErrorResultName].Value).Value = value; }
     82      protected set { ((DoubleValue)this[TestRelativeErrorResultName].Value).Value = value; }
    8383    }
    8484
    8585    public double TrainingNormalizedMeanSquaredError {
    8686      get { return ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value; }
    87       private set { ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value = value; }
     87      protected set { ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value = value; }
    8888    }
    8989
    9090    public double TestNormalizedMeanSquaredError {
    9191      get { return ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value; }
    92       private set { ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value = value; }
     92      protected set { ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value = value; }
    9393    }
    9494
     
    113113    }
    114114
     115    public override IDeepCloneable Clone(Cloner cloner) {
     116      return new RegressionSolution(this, cloner);
     117    }
     118
    115119    protected override void OnProblemDataChanged(EventArgs e) {
    116120      base.OnProblemDataChanged(e);
     
    122126    }
    123127
    124     protected void RecalculateResults() {
     128    private void RecalculateResults() {
    125129      double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values
    126130      IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
  • branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineMeanAndVarianceCalculator.cs

    r5945 r6195  
    7373
    7474    public void Add(double x) {
    75       if (double.IsNaN(x) || double.IsInfinity(x) || (errorState & OnlineCalculatorError.InvalidValueAdded) > 0) {
     75      if (double.IsNaN(x) || double.IsInfinity(x) || x > 1E13 || x < -1E13 || (errorState & OnlineCalculatorError.InvalidValueAdded) > 0) {
    7676        errorState = errorState | OnlineCalculatorError.InvalidValueAdded;
    7777        varianceErrorState = errorState | OnlineCalculatorError.InvalidValueAdded;
     
    8484          errorState = errorState & (~OnlineCalculatorError.InsufficientElementsAdded);        // n >= 1
    8585        } else {
     86
    8687          varianceErrorState = varianceErrorState & (~OnlineCalculatorError.InsufficientElementsAdded);        // n >= 2
    8788          m_newM = m_oldM + (x - m_oldM) / n;
  • branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Tests

    • Property svn:ignore
      •  

        old new  
        22obj
        33*.vs10x
         4*.user
  • branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Tests/StatisticCalculatorsTest.cs

    r5944 r6195  
    7878          double variance = calculator.Variance;
    7979
    80           Assert.AreEqual(mean_alglib, mean, 1E-6 * scale);
    81           Assert.AreEqual(variance_alglib, variance, 1E-6 * scale);
     80          Assert.AreEqual(mean_alglib.ToString(), mean.ToString());
     81          Assert.AreEqual(variance_alglib.ToString(), variance.ToString());
    8282        }
    8383      }
     
    109109            double r2 = r2Calculator.RSquared;
    110110
    111             Assert.AreEqual(r2_alglib, r2, 1E-6 * Math.Max(c1Scale, c2Scale));
     111            Assert.AreEqual(r2_alglib.ToString(), r2.ToString());
    112112          }
    113113        }
    114114      }
    115115    }
     116    [TestMethod]
     117    public void CalculatePearsonsRSquaredOfConstantTest() {
     118      System.Random random = new System.Random(31415);
     119      int n = 12;
     120      int cols = testData.GetLength(1);
     121      for (int c1 = 0; c1 < cols; c1++) {
     122        double c1Scale = random.NextDouble() * 1E7;
     123        double c2Scale = 1.0;
     124        IEnumerable<double> x = from rows in Enumerable.Range(0, n)
     125                                select testData[rows, c1] * c1Scale;
     126        IEnumerable<double> y = (new List<double>() { 150494407424305.44 })
     127          .Concat(Enumerable.Repeat(150494407424305.47, n - 1));
     128        double[] xs = x.ToArray();
     129        double[] ys = y.ToArray();
     130        double r2_alglib = alglib.pearsoncorrelation(xs, ys, n);
     131        r2_alglib *= r2_alglib;
     132
     133        var r2Calculator = new OnlinePearsonsRSquaredCalculator();
     134        for (int i = 0; i < n; i++) {
     135          r2Calculator.Add(xs[i], ys[i]);
     136        }
     137        double r2 = r2Calculator.RSquared;
     138
     139        Assert.AreEqual(r2_alglib.ToString(), r2.ToString());
     140      }
     141    }
    116142  }
    117143}
Note: See TracChangeset for help on using the changeset viewer.