Changeset 3933


Ignore:
Timestamp:
06/21/10 13:21:14 (9 years ago)
Author:
mkommend
Message:

removed cloning of dataset and made it readonly (ticket #938)

Location:
trunk/sources
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/SupportVectorRegression/SupportVectorRegressionSolution.cs

    r3916 r3933  
    6565    private Dataset CalculateSupportVectors() {
    6666      if (Model.Model.SupportVectorIndizes.Length == 0)
    67         return new Dataset();
     67        return new Dataset(new List<string>(),new double[0,0]);
    6868
    69       Dataset dataset = new Dataset(ProblemData.Dataset.VariableNames, new double[Model.Model.SupportVectorCount, ProblemData.Dataset.Columns]);
     69      double[,] data = new double[Model.Model.SupportVectorIndizes.Length, ProblemData.Dataset.Columns];
    7070      for (int i = 0; i < Model.Model.SupportVectorIndizes.Length; i++) {
    7171        for (int column = 0; column < ProblemData.Dataset.Columns; column++)
    72           dataset[i, column] = ProblemData.Dataset[Model.Model.SupportVectorIndizes[i], column];
     72          data[i, column] = ProblemData.Dataset[Model.Model.SupportVectorIndizes[i], column];
    7373      }
    74       return dataset;
     74      return new Dataset(ProblemData.Dataset.VariableNames, data);
    7575    }
    7676
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.3/EstimatedValuesView.cs

    r3566 r3933  
    9191        double[,] values =
    9292        MatrixExtensions<double>.Create(
    93           Content.ProblemData.Dataset[Content.ProblemData.TargetVariable.Value],
     93          Content.ProblemData.Dataset.GetVariableValues(Content.ProblemData.TargetVariable.Value),
    9494          Content.EstimatedValues.ToArray());
    9595        var content = new DoubleMatrix(values);
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.3/LineChartView.cs

    r3916 r3933  
    6464      this.chart.Series[TARGETVARIABLE_SERIES_NAME].LegendText = Content.ProblemData.TargetVariable.Value;
    6565      this.chart.Series[TARGETVARIABLE_SERIES_NAME].ChartType = SeriesChartType.FastLine;
    66       this.chart.Series[TARGETVARIABLE_SERIES_NAME].Points.DataBindY(Content.ProblemData.Dataset[Content.ProblemData.TargetVariable.Value]);
     66      this.chart.Series[TARGETVARIABLE_SERIES_NAME].Points.DataBindY(Content.ProblemData.Dataset.GetVariableValues(Content.ProblemData.TargetVariable.Value));
    6767      this.UpdateStripLines();
    6868
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.3/ScatterPlotView.cs

    r3904 r3933  
    137137        int testStart = Content.ProblemData.TestSamplesStart.Value;
    138138        int testEnd = Content.ProblemData.TestSamplesEnd.Value;
    139         if(this.chart.Series[ALL_SERIES].Points.Count > 0)
    140         this.chart.Series[ALL_SERIES].Points.DataBindXY(Content.EstimatedValues.ToArray(), "",
    141           dataset[targetVariableName], "");
     139        if (this.chart.Series[ALL_SERIES].Points.Count > 0)
     140          this.chart.Series[ALL_SERIES].Points.DataBindXY(Content.EstimatedValues.ToArray(), "",
     141            dataset.GetVariableValues(targetVariableName), "");
    142142        if (this.chart.Series[TRAINING_SERIES].Points.Count > 0)
    143         this.chart.Series[TRAINING_SERIES].Points.DataBindXY(Content.EstimatedTrainingValues.ToArray(), "",
    144           dataset.GetVariableValues(targetVariableName, trainingStart, trainingEnd), "");
     143          this.chart.Series[TRAINING_SERIES].Points.DataBindXY(Content.EstimatedTrainingValues.ToArray(), "",
     144            dataset.GetVariableValues(targetVariableName, trainingStart, trainingEnd), "");
    145145        if (this.chart.Series[TEST_SERIES].Points.Count > 0)
    146         this.chart.Series[TEST_SERIES].Points.DataBindXY(Content.EstimatedTestValues.ToArray(), "",
    147           dataset.GetVariableValues(targetVariableName, testStart, testEnd), "");
    148 
    149         double max = Math.Max(Content.EstimatedValues.Max(), dataset.GetMax(targetVariableName));
    150         double min = Math.Min(Content.EstimatedValues.Min(), dataset.GetMin(targetVariableName));
     146          this.chart.Series[TEST_SERIES].Points.DataBindXY(Content.EstimatedTestValues.ToArray(), "",
     147            dataset.GetVariableValues(targetVariableName, testStart, testEnd), "");
     148
     149        double max = Math.Max(Content.EstimatedValues.Max(), dataset.GetVariableValues(targetVariableName).Max());
     150        double min = Math.Min(Content.EstimatedValues.Min(), dataset.GetVariableValues(targetVariableName).Min());
    151151
    152152        max = Math.Ceiling(max) * 1.2;
     
    185185          case ALL_SERIES:
    186186            predictedValues = Content.EstimatedValues;
    187             targetValues = dataset[targetVariableName];
     187            targetValues = dataset.GetVariableValues(targetVariableName);
    188188            break;
    189189          case TRAINING_SERIES:
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/DataAnalysisProblemData.cs

    r3723 r3933  
    100100        if (value != Dataset) {
    101101          if (value == null) throw new ArgumentNullException();
    102           if (Dataset != null) DeregisterDatasetEventHandlers();
    103102          DatasetParameter.Value = value;
    104103        }
     
    216215
    217216    private void RegisterParameterValueEventHandlers() {
    218       RegisterDatasetEventHandlers();
    219217      RegisterInputVariablesEventHandlers();
    220218      if (TargetVariable != null) RegisterStringValueEventHandlers(TargetVariable);
     
    228226    #region parameter value changed event handlers
    229227    void DatasetParameter_ValueChanged(object sender, EventArgs e) {
    230       RegisterDatasetEventHandlers();
    231228      OnProblemDataChanged(EventArgs.Empty);
    232229    }
     
    258255    }
    259256    #endregion
    260 
    261 
    262     private void RegisterDatasetEventHandlers() {
    263       Dataset.DataChanged += new EventHandler<EventArgs<int, int>>(Dataset_DataChanged);
    264       Dataset.Reset += new EventHandler(Dataset_Reset);
    265       Dataset.ColumnNamesChanged += new EventHandler(Dataset_ColumnNamesChanged);
    266     }
    267 
    268     private void DeregisterDatasetEventHandlers() {
    269       Dataset.DataChanged -= new EventHandler<EventArgs<int, int>>(Dataset_DataChanged);
    270       Dataset.Reset -= new EventHandler(Dataset_Reset);
    271       Dataset.ColumnNamesChanged -= new EventHandler(Dataset_ColumnNamesChanged);
    272     }
    273 
    274     void Dataset_ColumnNamesChanged(object sender, EventArgs e) {
    275       OnProblemDataChanged(e);
    276     }
    277 
    278     void Dataset_Reset(object sender, EventArgs e) {
    279       OnProblemDataChanged(e);
    280     }
    281 
    282     void Dataset_DataChanged(object sender, EventArgs<int, int> e) {
    283       OnProblemDataChanged(e);
    284     }
    285257
    286258    private void RegisterInputVariablesEventHandlers() {
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/DataAnalysisSolution.cs

    r3921 r3933  
    154154    public override IDeepCloneable Clone(Cloner cloner) {
    155155      DataAnalysisSolution clone = (DataAnalysisSolution)base.Clone(cloner);
    156       // don't clone the problem data!
    157       clone.problemData = problemData;
     156      clone.problemData = (DataAnalysisProblemData)cloner.Clone(problemData);
    158157      clone.model = (IDataAnalysisModel)cloner.Clone(model);
    159158      clone.lowerEstimationLimit = lowerEstimationLimit;
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs

    r3892 r3933  
    3535  [StorableClass]
    3636  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
    37     public Dataset()
    38       : base() {
    39       this.Name = string.Empty;
    40       this.data = new double[0, 0];
    41       this.variableNames = new string[0];
    42       this.SortableView = false;
     37    [StorableConstructor]
     38    protected Dataset(bool deserializing)
     39      : base(deserializing) {
    4340    }
    4441
    4542    public Dataset(IEnumerable<string> variableNames, double[,] data)
    46       : this() {
     43      : base() {
    4744      Name = "-";
    4845      if (variableNames.Count() != data.GetLength(1)) {
    4946        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
    5047      }
    51       this.data = data;
     48      this.data = (double[,])data.Clone();
    5249      this.variableNames = variableNames.ToArray();
    5350    }
     
    5754    public IEnumerable<string> VariableNames {
    5855      get { return variableNames; }
    59       private set {
    60         if (variableNames != value) {
    61           variableNames = value.ToArray();
    62           OnColumnNamesChanged();
    63         }
    64       }
    6556    }
    6657
     
    6960    private double[,] Data {
    7061      get { return data; }
    71       set {
    72         if (data != value) {
    73           if (value == null) throw new ArgumentNullException();
    74           this.data = value;
    75           OnReset(EventArgs.Empty);
    76         }
    77       }
    7862    }
    7963
     
    8165    public double this[int rowIndex, int columnIndex] {
    8266      get { return data[rowIndex, columnIndex]; }
    83       set {
    84         if (!value.Equals(data[rowIndex, columnIndex])) {
    85           data[rowIndex, columnIndex] = value;
    86           OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
    87           OnItemChanged(rowIndex, columnIndex);
    88         }
    89       }
    9067    }
     68
    9169    public double this[string variableName, int rowIndex] {
    9270      get {
     
    9472        return data[rowIndex, columnIndex];
    9573      }
    96       set {
    97         int columnIndex = GetVariableIndex(variableName);
    98         if (!value.Equals(data[rowIndex, columnIndex])) {
    99           data[rowIndex, columnIndex] = value;
    100           OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
    101           OnItemChanged(rowIndex, columnIndex);
    102         }
    103       }
    10474    }
    105     // access to full columns
    106     public double[] this[string variableName] {
    107       get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
     75
     76    public double[] GetVariableValues(int variableIndex) {
     77      return GetVariableValues(variableIndex, 0, Rows);
    10878    }
    10979
     
    11888        values[i] = data[i + start, variableIndex];
    11989      return values;
     90    }
     91
     92    public double[] GetVariableValues(string variableName) {
     93      return GetVariableValues(GetVariableIndex(variableName), 0, Rows);
    12094    }
    12195
     
    135109      throw new ArgumentException("The variable name " + variableName + " was not found.");
    136110    }
     111    #endregion
    137112
    138     public void SetVariableName(int variableIndex, string name) {
    139       if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
    140       if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
    141       if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
    142       variableNames[variableIndex] = name;
    143       OnColumnNamesChanged();
     113    public double[,] GetClonedData() {
     114      return (double[,])data.Clone();
    144115    }
    145116
    146     #endregion
    147 
    148     #region variable statistics
    149     public double GetMean(string variableName) {
    150       return GetMean(GetVariableIndex(variableName));
     117    public override IDeepCloneable Clone(Cloner cloner) {
     118      return this;
    151119    }
    152120
    153     public double GetMean(string variableName, int start, int end) {
    154       return GetMean(GetVariableIndex(variableName), start, end);
    155     }
    156 
    157     public double GetMean(int variableIndex) {
    158       return GetMean(variableIndex, 0, Rows);
    159     }
    160 
    161     public double GetMean(int variableIndex, int start, int end) {
    162       return GetVariableValues(variableIndex, start, end).Average();
    163     }
    164 
    165     public double GetRange(string variableName) {
    166       return GetRange(GetVariableIndex(variableName));
    167     }
    168 
    169     public double GetRange(int variableIndex) {
    170       return GetRange(variableIndex, 0, Rows);
    171     }
    172 
    173     public double GetRange(string variableName, int start, int end) {
    174       return GetRange(GetVariableIndex(variableName), start, end);
    175     }
    176 
    177     public double GetRange(int variableIndex, int start, int end) {
    178       var values = GetVariableValues(variableIndex, start, end);
    179       return values.Max() - values.Min();
    180     }
    181 
    182     public double GetMax(string variableName) {
    183       return GetMax(GetVariableIndex(variableName));
    184     }
    185 
    186     public double GetMax(int variableIndex) {
    187       return GetMax(variableIndex, 0, Rows);
    188     }
    189 
    190     public double GetMax(string variableName, int start, int end) {
    191       return GetMax(GetVariableIndex(variableName), start, end);
    192     }
    193 
    194     public double GetMax(int variableIndex, int start, int end) {
    195       return GetVariableValues(variableIndex, start, end).Max();
    196     }
    197 
    198     public double GetMin(string variableName) {
    199       return GetMin(GetVariableIndex(variableName));
    200     }
    201 
    202     public double GetMin(int variableIndex) {
    203       return GetMin(variableIndex, 0, Rows);
    204     }
    205 
    206     public double GetMin(string variableName, int start, int end) {
    207       return GetMin(GetVariableIndex(variableName), start, end);
    208     }
    209 
    210     public double GetMin(int variableIndex, int start, int end) {
    211       return GetVariableValues(variableIndex, start, end).Min();
    212     }
    213 
    214     public int GetMissingValues(string variableName) {
    215       return GetMissingValues(GetVariableIndex(variableName));
    216     }
    217     public int GetMissingValues(int variableIndex) {
    218       return GetMissingValues(variableIndex, 0, Rows);
    219     }
    220 
    221     public int GetMissingValues(string variableName, int start, int end) {
    222       return GetMissingValues(GetVariableIndex(variableName), start, end);
    223     }
    224 
    225     public int GetMissingValues(int variableIndex, int start, int end) {
    226       return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
    227     }
    228 
    229     #endregion
    230 
    231     public override IDeepCloneable Clone(Cloner cloner) {
    232       Dataset clone = (Dataset)base.Clone(cloner);
    233       clone.data = (double[,])data.Clone();
    234       clone.variableNames = (string[])variableNames.Clone();
    235       return clone;
    236     }
    237 
    238     #region events
    239     public event EventHandler<EventArgs<int, int>> DataChanged;
    240     private void OnDataChanged(EventArgs<int, int> e) {
    241       var listeners = DataChanged;
    242       if (listeners != null) listeners(this, e);
    243     }
    244121    public event EventHandler Reset;
    245122    private void OnReset(EventArgs e) {
     
    247124      if (listeners != null) listeners(this, e);
    248125    }
    249     #endregion
    250126
    251127    #region IStringConvertibleMatrix Members
    252128
    253129    public int Rows {
    254       get {
    255         return data.GetLength(0);
    256       }
    257       set {
    258         if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
    259         if (value != Rows) {
    260           var newValues = new double[value, Columns];
    261           for (int row = 0; row < Math.Min(Rows, value); row++) {
    262             for (int column = 0; column < Columns; column++) {
    263               newValues[row, column] = data[row, column];
    264             }
    265           }
    266           Data = newValues;
    267         }
    268       }
     130      get { return data.GetLength(0); }
     131      set { throw new NotSupportedException(); }
    269132    }
    270133
    271134    public int Columns {
    272       get {
    273         return data.GetLength(1);
    274       }
    275       set {
    276         if (value != Columns) {
    277           var newValues = new double[Rows, value];
    278           var newVariableNames = new string[value];
    279           for (int row = 0; row < Rows; row++) {
    280             for (int column = 0; column < Math.Min(value, Columns); column++) {
    281               newValues[row, column] = data[row, column];
    282             }
    283           }
    284           string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
    285           for (int column = 0; column < value; column++) {
    286             if (column < Columns)
    287               newVariableNames[column] = variableNames[column];
    288             else
    289               newVariableNames[column] = "Var" + column.ToString(formatString);
    290           }
    291           VariableNames = newVariableNames;
    292           Data = newValues;
    293         }
    294       }
     135      get { return data.GetLength(1); }
     136      set { throw new NotSupportedException(); }
    295137    }
    296138
    297     [Storable]
    298     private bool sortableView;
    299139    public bool SortableView {
    300       get { return sortableView; }
    301       set {
    302         if (value != sortableView) {
    303           sortableView = value;
    304           OnSortableViewChanged();
    305         }
    306       }
     140      get { return false; }
     141      set { throw new NotSupportedException(); }
    307142    }
    308143
    309144    public bool ReadOnly {
    310       get { return false; }
     145      get { return true; }
    311146    }
    312147
    313148    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
    314149      get { return this.VariableNames; }
    315       set {
    316         int i = 0;
    317         foreach (string variableName in value) {
    318           SetVariableName(i, variableName);
    319           i++;
    320         }
    321         OnColumnNamesChanged();
    322       }
     150      set { throw new NotSupportedException(); }
    323151    }
    324152
    325153    IEnumerable<string> IStringConvertibleMatrix.RowNames {
    326154      get { return new List<string>(); }
    327       set { throw new NotImplementedException(); }
     155      set { throw new NotSupportedException(); }
    328156    }
    329157
    330158    public bool Validate(string value, out string errorMessage) {
    331       double val;
    332       bool valid = double.TryParse(value, out val);
    333       errorMessage = string.Empty;
    334       if (!valid) {
    335         StringBuilder sb = new StringBuilder();
    336         sb.Append("Invalid Value (Valid Value Format: \"");
    337         sb.Append(FormatPatterns.GetDoubleFormatPattern());
    338         sb.Append("\")");
    339         errorMessage = sb.ToString();
    340       }
    341       return valid;
     159      throw new NotSupportedException();
    342160    }
    343161
     
    347165
    348166    public bool SetValue(string value, int rowIndex, int columnIndex) {
    349       double v;
    350       if (double.TryParse(value, out v)) {
    351         data[rowIndex, columnIndex] = v;
    352         OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
    353         OnItemChanged(rowIndex, columnIndex);
    354         return true;
    355       } else return false;
     167      throw new NotSupportedException();
    356168    }
    357169
     
    381193    }
    382194    #endregion
    383 
    384 
    385195  }
    386196}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/SupportVectorMachine/SupportVectorMachineCrossValidationEvaluator.cs

    r3884 r3933  
    152152
    153153      int reducedRows = (int)((SamplesEnd.Value - SamplesStart.Value) * reductionRatio);
    154       var reducedProblemData = (DataAnalysisProblemData)DataAnalysisProblemData.Clone();
    155       ShuffleRows(RandomParameter.ActualValue, reducedProblemData.Dataset, SamplesStart.Value, SamplesEnd.Value);
     154      DataAnalysisProblemData reducedProblemData = (DataAnalysisProblemData)DataAnalysisProblemData.Clone();
     155      reducedProblemData.Dataset = CreateReducedDataset(RandomParameter.ActualValue, reducedProblemData.Dataset, reductionRatio, SamplesStart.Value, SamplesEnd.Value);
    156156
    157157      double quality = PerformCrossValidation(reducedProblemData,
     
    164164    }
    165165
    166     private void ShuffleRows(IRandom random, Dataset dataset, int start, int end) {
    167       for (int row = end - 1; row > start ; row--) {
    168         int otherRow = random.Next(start, row);
    169         for (int column = 0; column < dataset.Columns; column++) {
    170           double tmp = dataset[otherRow, column];
    171           dataset[otherRow, column] = dataset[row, column];
    172           dataset[row, column] = tmp;
    173         }
     166    private Dataset CreateReducedDataset(IRandom random, Dataset dataset, double reductionRatio, int start, int end) {
     167      int reducedRows = (int)((end - start) * reductionRatio);
     168      double[,] reducedData = dataset.GetClonedData();
     169      HashSet<int> leftRows = new HashSet<int>(Enumerable.Range(0, end - start));
     170      for (int row = 0; row < reducedRows; row++) {
     171        int rowIndex = random.Next(0, leftRows.Count);
     172        leftRows.Remove(rowIndex);
     173        for (int column = 0; column < dataset.Columns; column++)
     174          reducedData[row, column] = dataset[rowIndex, column];
    174175      }
     176      return new Dataset(dataset.VariableNames, reducedData);
    175177    }
    176178
Note: See TracChangeset for help on using the changeset viewer.