Context Navigation

← Previous Change
Next Change →

Changeset 3264 for trunk

Timestamp:

04/04/10 18:53:55 (15 years ago)

Author:

gkronber

Message:

Implemented import of CSV files for regression problems. #938 (Data types and operators for regression problems)

Location:

trunk/sources

Files:

: 2 added
: 4 edited
: 2 copied

HeuristicLab.Problems.DataAnalysis.Regression/3.3/CsvFileParser.cs (copied) (copied from trunk/sources/HeuristicLab.DataAnalysis/3.2/DatasetParser.cs) (12 diffs)
HeuristicLab.Problems.DataAnalysis.Regression/3.3/DataFormatException.cs (copied) (copied from trunk/sources/HeuristicLab.DataAnalysis/3.2/DataFormatException.cs) (3 diffs)
HeuristicLab.Problems.DataAnalysis.Regression/3.3/HeuristicLab.Problems.DataAnalysis.Regression-3.3.csproj (modified) (3 diffs)
HeuristicLab.Problems.DataAnalysis.Regression/3.3/RegressionProblem.cs (modified) (3 diffs)
HeuristicLab.Problems.DataAnalysis.Regression/3.3/RegressionProblemView.Designer.cs (added)
HeuristicLab.Problems.DataAnalysis.Regression/3.3/RegressionProblemView.cs (added)
HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblemView.cs (modified) (1 diff)
HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/CsvFileParser.cs

-                      r3262
+                      r3264
 #region License Information
 /* HeuristicLab
  * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
  * This file is part of HeuristicLab.
 …
 using System.Text;
+namespace HeuristicLab.DataAnalysis {
+  public class DatasetParser {
+    private const string PROBLEMNAME = "PROBLEMNAME";
+namespace HeuristicLab.Problems.DataAnalysis.Regression {
+  public class CsvFileParser {
     private const string VARIABLENAMES = "VARIABLENAMES";
-    private const string TARGETVARIABLE = "TARGETVARIABLE";
-    private const string MAXIMUMTREEHEIGHT = "MAXIMUMTREEHEIGHT";
-    private const string MAXIMUMTREESIZE = "MAXIMUMTREESIZE";
-    private const string TRAININGSAMPLESSTART = "TRAININGSAMPLESSTART";
-    private const string TRAININGSAMPLESEND = "TRAININGSAMPLESEND";
-    private const string VALIDATIONSAMPLESSTART = "VALIDATIONSAMPLESSTART";
-    private const string VALIDATIONSAMPLESEND = "VALIDATIONSAMPLESEND";
-    private const string TESTSAMPLESSTART = "TESTSAMPLESSTART";
-    private const string TESTSAMPLESEND = "TESTSAMPLESEND";
-    private const string NONINPUTVARIABLES = "NONINPUTVARIABLES";
     private Tokenizer tokenizer;
     private Dictionary<string, List<Token>> metadata;
     private List<List<double>> samplesList;
+    private List<string> variableNames;
+    private List<List<double>> rowValues;
     private int rows;
 …
+    }
     private double[] samples;
     public double[] Samples {
+    private double[,] values;
+    public double[,] Values {
       get {
         return samples;
+      }
+    }
     public string ProblemName {
+        return values;
+      }
+    }
+    public IEnumerable<string> VariableNames {
       get {
+        if (metadata.ContainsKey(PROBLEMNAME)) {
+          return metadata[PROBLEMNAME][0].stringValue;
+        } else return "-";
+      }
+    }
+    public string[] VariableNames {
+      get {
+        if (metadata.ContainsKey(VARIABLENAMES)) {
+          List<Token> nameList = metadata[VARIABLENAMES];
+          string[] names = new string[nameList.Count];
+          for (int i = 0; i < names.Length; i++) {
+            names[i] = nameList[i].stringValue;
+          }
+          return names;
+        } else {
+        if (variableNames.Count > 0) return variableNames;
+        else {
           string[] names = new string[columns];
           for (int i = 0; i < names.Length; i++) {
 …
+    }
+    public int TargetVariable {
+      get {
+        if (metadata.ContainsKey(TARGETVARIABLE)) {
+          return metadata[TARGETVARIABLE][0].intValue;
+        } else return 0; // default is the first column
+      }
+    }
+    public int MaxTreeHeight {
+      get {
+        if (metadata.ContainsKey(MAXIMUMTREEHEIGHT)) {
+          return metadata[MAXIMUMTREEHEIGHT][0].intValue;
+        } else return 0;
+      }
+    }
+    public int MaxTreeSize {
+      get {
+        if (metadata.ContainsKey(MAXIMUMTREESIZE)) {
+          return metadata[MAXIMUMTREESIZE][0].intValue;
+        } else return 0;
+      }
+    }
+    public int TrainingSamplesStart {
+      get {
+        if (metadata.ContainsKey(TRAININGSAMPLESSTART)) {
+          return metadata[TRAININGSAMPLESSTART][0].intValue;
+        } else return 0;
+      }
+    }
+    public int TrainingSamplesEnd {
+      get {
+        if (metadata.ContainsKey(TRAININGSAMPLESEND)) {
+          return metadata[TRAININGSAMPLESEND][0].intValue;
+        } else return rows;
+      }
+    }
+    public int ValidationSamplesStart {
+      get {
+        if (metadata.ContainsKey(VALIDATIONSAMPLESSTART)) {
+          return metadata[VALIDATIONSAMPLESSTART][0].intValue;
+        } else return 0;
+      }
+    }
+    public int ValidationSamplesEnd {
+      get {
+        if (metadata.ContainsKey(VALIDATIONSAMPLESEND)) {
+          return metadata[VALIDATIONSAMPLESEND][0].intValue;
+        } else return rows;
+      }
+    }
+    public int TestSamplesStart {
+      get {
+        if (metadata.ContainsKey(TESTSAMPLESSTART)) {
+          return metadata[TESTSAMPLESSTART][0].intValue;
+        } else return 0;
+      }
+    }
+    public int TestSamplesEnd {
+      get {
+        if (metadata.ContainsKey(TESTSAMPLESEND)) {
+          return metadata[TESTSAMPLESEND][0].intValue;
+        } else return rows;
+      }
+    }
+    public List<int> NonInputVariables {
+      get {
+        List<int> disallowedVariables = new List<int>();
+        if (metadata.ContainsKey(NONINPUTVARIABLES)) {
+          foreach (Token t in metadata[NONINPUTVARIABLES]) {
+            disallowedVariables.Add(t.intValue);
+          }
+        }
+        return disallowedVariables;
+      }
+    }
+    public DatasetParser() {
+      this.metadata = new Dictionary<string, List<Token>>();
+      samplesList = new List<List<double>>();
+    }
+    public void Reset() {
+      metadata.Clear();
+      samplesList.Clear();
+    }
+    public void Import(string importFileName, bool strict) {
+      TryParse(importFileName, strict);
+    public CsvFileParser() {
+      rowValues = new List<List<double>>();
+      variableNames = new List<string>();
+    }
+    private void Reset() {
+      variableNames.Clear();
+      rowValues.Clear();
+    }
+    public void Parse(string fileName) {
+      TryParse(fileName);
       // translate the list of samples into a DoubleMatrixData item
       samples = new double[samplesList.Count * samplesList[0].Count];
       rows = samplesList.Count;
       columns = samplesList[0].Count;
       int i = 0;
       int j = 0;
       foreach (List<double> row in samplesList) {
         j = 0;
+      rows = rowValues.Count;
+      columns = rowValues[0].Count;
+      values = new double[rows, columns];
+      int rowIndex = 0;
+      int columnIndex = 0;
+      foreach (List<double> row in rowValues) {
+        columnIndex = 0;
         foreach (double element in row) {
+          samples[i * columns + j] = element;
+          j++;
+        }
+        i++;
+      }
+    }
+    private void TryParse(string importFileName, bool strict) {
+          values[rowIndex, columnIndex++] = element;
+        }
+        rowIndex++;
+      }
+    }
+    private void TryParse(string fileName) {
       Exception lastEx = null;
       NumberFormatInfo[] possibleFormats = new NumberFormatInfo[] { NumberFormatInfo.InvariantInfo, CultureInfo.GetCultureInfo("de-DE").NumberFormat, NumberFormatInfo.CurrentInfo };
+      NumberFormatInfo[] possibleFormats = new NumberFormatInfo[] { NumberFormatInfo.InvariantInfo };
       foreach (NumberFormatInfo numberFormat in possibleFormats) {
         using (StreamReader reader = new StreamReader(importFileName)) {
+        using (StreamReader reader = new StreamReader(fileName)) {
           tokenizer = new Tokenizer(reader, numberFormat);
           try {
             // parse the file
             Parse(strict);
+            Parse();
             return; // parsed without errors -> return;
+          }
 …
     #region tokenizer
     internal enum TokenTypeEnum {
       At, Assign, NewLine, SemiColon, String, Double, Int
+      NewLine, Separator, String, Double
+    }
 …
       public string stringValue;
       public double doubleValue;
-      public int intValue;
       public Token(TokenTypeEnum type, string value) {
 …
         stringValue = value;
         doubleValue = 0.0;
-        intValue = 0;
+      }
 …
     class Tokenizer {
+    internal class Tokenizer {
       private StreamReader reader;
       private List<Token> tokens;
       private NumberFormatInfo numberFormatInfo;
+      public int CurrentLineNumber = 0;
+      public string CurrentLine;
+      public static Token NewlineToken = new Token(TokenTypeEnum.NewLine, "\n");
+      public static Token AtToken = new Token(TokenTypeEnum.At, "@");
+      public static Token AssignmentToken = new Token(TokenTypeEnum.Assign, "=");
+      public static Token SeparatorToken = new Token(TokenTypeEnum.SemiColon, ";");
+      public Tokenizer(StreamReader reader, NumberFormatInfo numberFormatInfo) {
+      private int currentLineNumber = 0;
+      public int CurrentLineNumber {
+        get { return currentLineNumber; }
+        private set { currentLineNumber = value; }
+      }
+      private string currentLine;
+      public string CurrentLine {
+        get { return currentLine; }
+        private set { currentLine = value; }
+      }
+      private Token newlineToken;
+      public Token NewlineToken {
+        get { return newlineToken; }
+        private set { newlineToken = value; }
+      }
+      private Token separatorToken;
+      public Token SeparatorToken {
+        get { return separatorToken; }
+        private set { separatorToken = value; }
+      }
+      public Tokenizer(StreamReader reader, NumberFormatInfo numberFormatInfo, char separator) {
         this.reader = reader;
         this.numberFormatInfo = numberFormatInfo;
+        separatorToken = new Token(TokenTypeEnum.Separator, separator.ToString());
+        newlineToken = new Token(TokenTypeEnum.NewLine, Environment.NewLine);
         tokens = new List<Token>();
         ReadNextTokens();
+      }
+      public Tokenizer(StreamReader reader, NumberFormatInfo numberFormatInfo)
+        : this(reader, numberFormatInfo, ';') {
+      }
 …
         StringBuilder subStr = new StringBuilder();
         foreach (char c in line) {
           if (c == '@' || c == '=' || c == ';') {
+          if (c == ';') {
             yield return subStr.ToString();
             subStr = new StringBuilder();
 …
       private Token MakeToken(string strToken) {
         Token token = new Token(TokenTypeEnum.String, strToken);
+        if (strToken.Equals(AtToken.stringValue)) {
+          return AtToken;
+        } else if (strToken.Equals(AssignmentToken.stringValue)) {
+          return AssignmentToken;
+        } else if (strToken.Equals(SeparatorToken.stringValue)) {
+        if (strToken.Equals(SeparatorToken.stringValue)) {
           return SeparatorToken;
-        } else if (int.TryParse(strToken, NumberStyles.Integer, numberFormatInfo, out token.intValue)) {
-          token.type = TokenTypeEnum.Int;
-          return token;
         } else if (double.TryParse(strToken, NumberStyles.Float, numberFormatInfo, out token.doubleValue)) {
           token.type = TokenTypeEnum.Double;
 …
     #region parsing
     private void Parse(bool strict) {
       ParseMetaData(strict);
+    private void Parse() {
+      ParseVariableNames();
       if (!tokenizer.HasNext()) Error("Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber);
       ParseSampleData(strict);
       if (samplesList.Count == 0) Error("Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber);
+    }
     private void ParseSampleData(bool strict) {
+      ParseValues();
+      if (rowValues.Count == 0) Error("Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber);
+    }
+    private void ParseValues() {
       while (tokenizer.HasNext()) {
         List<double> row = new List<double>();
+        row.Add(NextValue(tokenizer, strict));
+        while (tokenizer.HasNext() && tokenizer.Peek() == Tokenizer.SeparatorToken) {
+          Expect(Tokenizer.SeparatorToken);
+          row.Add(NextValue(tokenizer, strict));
+        }
+        Expect(Tokenizer.NewlineToken);
+        // when parsing strictly all rows have to have the same number of values
+        if (strict) {
+          // the first row defines how many samples are needed
+          if (samplesList.Count > 0 && samplesList[0].Count != row.Count) {
+            Error("The first row of the dataset has " + samplesList[0].Count + " columns." +
+              "\nLine " + tokenizer.CurrentLineNumber + " has " + row.Count + " columns.", "", tokenizer.CurrentLineNumber);
+          }
+        } else if (samplesList.Count > 0) {
+          // when we are not strict then fill or drop elements as needed
+          if (samplesList[0].Count > row.Count) {
+            // fill with NAN
+            for (int i = row.Count; i < samplesList[0].Count; i++) {
+              row.Add(double.NaN);
+            }
+          } else if (samplesList[0].Count < row.Count) {
+            // drop last k elements where k = n - length of first row
+            row.RemoveRange(samplesList[0].Count - 1, row.Count - samplesList[0].Count);
+          }
+        }
+        row.Add(NextValue(tokenizer));
+        while (tokenizer.HasNext() && tokenizer.Peek() == tokenizer.SeparatorToken) {
+          Expect(tokenizer.SeparatorToken);
+          row.Add(NextValue(tokenizer));
+        }
+        Expect(tokenizer.NewlineToken);
+        // all rows have to have the same number of values
+        // the first row defines how many samples are needed
+        if (rowValues.Count > 0 && rowValues[0].Count != row.Count) {
+          Error("The first row of the dataset has " + rowValues[0].Count + " columns." +
+            "\nLine " + tokenizer.CurrentLineNumber + " has " + row.Count + " columns.", "", tokenizer.CurrentLineNumber);
+        }
         // add the current row to the collection of rows and start a new row
         samplesList.Add(row);
+        rowValues.Add(row);
         row = new List<double>();
+      }
+    }
     private double NextValue(Tokenizer tokenizer, bool strict) {
       if (tokenizer.Peek() == Tokenizer.SeparatorToken || tokenizer.Peek() == Tokenizer.NewlineToken) return double.NaN;
+    private double NextValue(Tokenizer tokenizer) {
+      if (tokenizer.Peek() == tokenizer.SeparatorToken || tokenizer.Peek() == tokenizer.NewlineToken) return double.NaN;
       Token current = tokenizer.Next();
       if (current.type == TokenTypeEnum.SemiColon || current.type == TokenTypeEnum.String) {
+      if (current.type == TokenTypeEnum.Separator || current.type == TokenTypeEnum.String) {
         return double.NaN;
       } else if (current.type == TokenTypeEnum.Double) {
         // just take the value
         return current.doubleValue;
+      } else if (current.type == TokenTypeEnum.Int) {
+        // translate the int value to double
+        return (double)current.intValue;
+      } else {
+        // found an unexpected token => throw error when parsing strictly
+        // when we are parsing non-strictly we also allow unreadable values inserting NAN instead
+        if (strict) {
+          Error("Unexpected token.", current.stringValue, tokenizer.CurrentLineNumber);
+        } else {
+          return double.NaN;
+        }
+      }
+      return double.NaN;
+    }
+    private void ParseMetaData(bool strict) {
+      while (tokenizer.HasNext() && tokenizer.Peek() == Tokenizer.AtToken) {
+        Expect(Tokenizer.AtToken);
+        Token nameToken = tokenizer.Next();
+        Expect(Tokenizer.AssignmentToken);
+      }
+      // found an unexpected token => throw error
+      Error("Unexpected token.", current.stringValue, tokenizer.CurrentLineNumber);
+      // this line is never executed because Error() throws an exception
+      throw new InvalidOperationException();
+    }
+    private void ParseVariableNames() {
+      // if the first line doesn't start with a double value then we assume that the
+      // first line contains variable names
+      if (tokenizer.HasNext() && tokenizer.Peek().type != TokenTypeEnum.Double) {
         List<Token> tokens = new List<Token>();
 …
         valueToken = tokenizer.Next();
         tokens.Add(valueToken);
         while (tokenizer.HasNext() && tokenizer.Peek() == Tokenizer.SeparatorToken) {
           Expect(Tokenizer.SeparatorToken);
+        while (tokenizer.HasNext() && tokenizer.Peek() == tokenizer.SeparatorToken) {
+          Expect(tokenizer.SeparatorToken);
           valueToken = tokenizer.Next();
           if (valueToken != Tokenizer.NewlineToken) {
+          if (valueToken != tokenizer.NewlineToken) {
             tokens.Add(valueToken);
+          }
+        }
         if (valueToken != Tokenizer.NewlineToken) {
           Expect(Tokenizer.NewlineToken);
+        }
         metadata[nameToken.stringValue] = tokens;
+        if (valueToken != tokenizer.NewlineToken) {
+          Expect(tokenizer.NewlineToken);
+        }
+        variableNames = tokens.Select(x => x.stringValue.Trim()).ToList();
+      }
+    }

trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/DataFormatException.cs

-                      r3262
+                      r3264
 #region License Information
 /* HeuristicLab
  * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
  * This file is part of HeuristicLab.
 …
 using System.Text;
 namespace HeuristicLab.DataAnalysis {
   public class DataFormatException : Exception{
+namespace HeuristicLab.Problems.DataAnalysis.Regression {
+  public class DataFormatException : Exception {
     private int line;
     public int Line {
 …
       get { return token; }
+    }
+    public DataFormatException(string message, string token, int line) : base(message+"\nToken: " + token + " (line: " + line + ")"){
+    public DataFormatException(string message, string token, int line)
+      : base(message + "\nToken: " + token + " (line: " + line + ")") {
       this.token = token;
       this.line = line;

trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/HeuristicLab.Problems.DataAnalysis.Regression-3.3.csproj

-                      r3257
+                      r3264
     <None Include="HeuristicLabProblemsDataAnalysisRegressionPlugin.cs.frame" />
     <None Include="Properties\AssemblyInfo.frame" />
+    <Compile Include="CsvFileParser.cs" />
+    <Compile Include="DataFormatException.cs" />
     <Compile Include="Symbolic\ArithmeticExpressionGrammar.cs" />
     <Compile Include="Symbolic\SimpleArithmeticExpressionEvaluator.cs" />
 …
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Symbolic\SymbolicRegressionProblemView.cs">
-      <SubType>UserControl</SubType>
     </Compile>
     <Compile Include="Symbolic\SymbolicRegressionProblemView.Designer.cs">
 …
     <Compile Include="Symbolic\Symbols\Variable.cs" />
     <Compile Include="Symbolic\Symbols\VariableTreeNode.cs" />
+    <Compile Include="RegressionProblemView.cs">
+      <SubType>UserControl</SubType>
+    </Compile>
+    <Compile Include="RegressionProblemView.Designer.cs">
+      <DependentUpon>RegressionProblemView.cs</DependentUpon>
+    </Compile>
   </ItemGroup>
   <ItemGroup>

trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/RegressionProblem.cs

-                      r3253
+                      r3264
 using HeuristicLab.Problems.DataAnalysis;
 using System.Drawing;
+using System.IO;
 namespace HeuristicLab.Problems.DataAnalysis.Regression {
 …
+    }
     #endregion
+    #region properties
+    public Dataset Dataset {
+      get { return DatasetParameter.Value; }
+      set { DatasetParameter.Value = value; }
+    }
+    public StringValue TargetVariable {
+      get { return TargetVariableParameter.Value; }
+      set { TargetVariableParameter.Value = value; }
+    }
+    public ItemList<StringValue> InputVariables {
+      get { return InputVariablesParameter.Value; }
+      set { InputVariablesParameter.Value = value; }
+    }
+    public IntValue TrainingSamplesStart {
+      get { return TrainingSamplesStartParameter.Value; }
+      set { TrainingSamplesStartParameter.Value = value; }
+    }
+    public IntValue TrainingSamplesEnd {
+      get { return TrainingSamplesEndParameter.Value; }
+      set { TrainingSamplesEndParameter.Value = value; }
+    }
+    public IntValue ValidationSamplesStart {
+      get { return ValidationSamplesStartParameter.Value; }
+      set { ValidationSamplesStartParameter.Value = value; }
+    }
+    public IntValue ValidationSamplesEnd {
+      get { return ValidationSamplesEndParameter.Value; }
+      set { ValidationSamplesEndParameter.Value = value; }
+    }
+    public IntValue TestSamplesStart {
+      get { return TestSamplesStartParameter.Value; }
+      set { TestSamplesStartParameter.Value = value; }
+    }
+    public IntValue TestSamplesEnd {
+      get { return TestSamplesEndParameter.Value; }
+      set { TestSamplesEndParameter.Value = value; }
+    }
+    #endregion
     public RegressionProblem()
 …
     private RegressionProblem(bool deserializing) : base() { }
+    #region ISingleObjectiveProblem Members
+    public IParameter MaximizationParameter {
+      get { throw new NotImplementedException(); }
+    public virtual void ImportFromFile(string fileName) {
+      var csvFileParser = new CsvFileParser();
+      csvFileParser.Parse(fileName);
+      Name = "Regression Problem (imported from " + Path.GetFileName(fileName);
+      Dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
+      Dataset.Name = Path.GetFileName(fileName);
+      TargetVariable = new StringValue(Dataset.VariableNames.First());
+      InputVariables = new ItemList<StringValue>(Dataset.VariableNames.Skip(1).Select(s => new StringValue(s)));
+      TrainingSamplesStart = new IntValue(0);
+      TrainingSamplesEnd = new IntValue(csvFileParser.Rows);
+      TestSamplesStart = new IntValue(0);
+      TestSamplesEnd = new IntValue(csvFileParser.Rows);
+    }
-    public IParameter BestKnownQualityParameter {
-      get { throw new NotImplementedException(); }
+    }
-    public ISingleObjectiveEvaluator Evaluator {
-      get { throw new NotImplementedException(); }
+    }
-    #endregion
+  }
+}

trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblemView.cs

r3253	r3264
12	12	[View("SymbolicRegressionProblem View")]
13	13	[Content(typeof(SymbolicRegressionProblem), true)]
14		public partial class SymbolicRegressionProblemView : ProblemView {
	14	public partial class SymbolicRegressionProblemView : RegressionProblemView {
15	15	public new SymbolicRegressionProblem Content {
16	16	get { return (SymbolicRegressionProblem)base.Content; }

trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs

-                      r3253
+                      r3264
     public Dataset()
       : this(new double[,] { { 0.0 } }) {
+    }
     public Dataset(double[,] data)
+      : this(new string[] { "x" }, new double[,] { { 0.0 } }) {
+    }
+    public Dataset(IEnumerable<string> variableNames, double[,] data)
       : base() {
       Name = "-";
+      if (variableNames.Count() != data.GetLength(1)) {
+        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
+      }
       Data = new DoubleMatrix(data);
+      string formatString = new StringBuilder().Append('#', (int)Math.Log10(this.data.Columns) + 1).ToString(); // >= 100 variables => ###
+      this.variableNames = new StringArray((from col in Enumerable.Range(1, this.data.Columns)
+                                            select "Var" + col.ToString(formatString)).ToArray());
+      this.variableNames = new StringArray(variableNames.ToArray());
+    }

Note: See TracChangeset for help on using the changeset viewer.