- Timestamp:
- 01/16/20 15:48:32 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/TimeSeries/TimeSeriesInstanceProvider.cs
r17401 r17403 58 58 using (var trainingReader = new StreamReader(trainingEntry.Open())) 59 59 using (var testReader = new StreamReader(testEntry.Open())) { 60 ParseMetadata(trainingReader, out var inputVariables, out string targetVariable );61 ParseMetadata(testReader, out _, out _ ); // ignore outputs60 ParseMetadata(trainingReader, out var inputVariables, out string targetVariable, out var classLabels); 61 ParseMetadata(testReader, out _, out _, out _); // ignore outputs 62 62 63 63 // Read data 64 64 var inputsData = new List<DoubleVector>[inputVariables.Count]; 65 65 for (int i = 0; i < inputsData.Length; i++) inputsData[i] = new List<DoubleVector>(); 66 var targetData = new List<double>(); 66 bool numericTarget = classLabels.All(label => !double.IsNaN(ParseNumber(label))); 67 IList targetData = numericTarget ? new List<double>() : new List<string>() as IList; 67 68 ReadData(trainingReader, inputsData, targetData, out int numTrainingRows); 68 69 ReadData(testReader, inputsData, targetData, out int numTestRows); 70 71 // Translate class values to numeric values 72 if (targetData is List<string> stringTargetData) { 73 var labelTranslation = classLabels 74 .Select((x, i) => new { Label = x, i }) 75 .ToDictionary(x => x.Label, x => (double)x.i); 76 targetData = stringTargetData.Select(label => labelTranslation[label]).ToList(); 77 } 69 78 70 79 // Build dataset 71 80 var dataset = new Dataset( 72 81 inputVariables.Concat(new[] { targetVariable }), 73 inputsData.C ast<IList>().Concat(new[] { targetData })82 inputsData.Concat(new[] { targetData }) 74 83 ); 75 84 Debug.Assert(dataset.Rows == numTrainingRows + numTestRows); … … 90 99 } 91 100 92 private static void ParseMetadata(StreamReader reader, out List<string> inputVariables, out string targetVariable ) {101 private static void ParseMetadata(StreamReader reader, out List<string> inputVariables, out string targetVariable, out List<string> classLabels) { 93 102 int nrOfInputs = 0; 103 IEnumerable<string> labels = null; 94 104 bool dataStart = false; 105 95 106 while (!reader.EndOfStream && !dataStart) { 96 107 var line = reader.ReadLine(); … … 101 112 var type = splits.First(); 102 113 var arguments = splits.Skip(1).ToList(); 103 switch (type .ToLowerInvariant()) {114 switch (type) { 104 115 case "@univariate": 105 116 bool univariate = bool.Parse(arguments[0]); … … 111 122 nrOfInputs = dimensions; 112 123 break; 124 case "@classLabel": 125 bool containLabels = bool.Parse(arguments[0]); 126 if (containLabels) 127 labels = arguments.Skip(1); 128 break; 113 129 case "@data": 114 130 dataStart = true; … … 126 142 127 143 targetVariable = "Y"; 128 } 129 130 private static void ReadData(StreamReader reader, List<DoubleVector>[] inputsData, List<double> targetData, out int count) { 144 145 classLabels = labels.ToList(); 146 } 147 148 private static void ReadData(StreamReader reader, List<DoubleVector>[] inputsData, IList targetData, out int count) { 149 var numericTargetData = targetData as List<double>; 150 var stringTargetData = targetData as List<string>; 151 131 152 count = 0; 132 153 while (!reader.EndOfStream) { … … 139 160 var numbers = variable 140 161 .Split(',') 141 .Select( d => double.Parse(d, CultureInfo.InvariantCulture))162 .Select(ParseNumber) 142 163 .ToList(); 143 164 inputsData[i].Add(new DoubleVector(numbers)); 144 165 } 145 166 146 var target = double.Parse(variables[variables.Length - 1], CultureInfo.InvariantCulture); 147 targetData.Add(target); 167 var target = variables[variables.Length - 1]; 168 if (numericTargetData != null) numericTargetData.Add(ParseNumber(target)); 169 else if (stringTargetData != null) stringTargetData.Add(target); 170 else throw new InvalidOperationException("Target must either be numeric or a string."); 148 171 149 172 count++; 150 173 } 174 } 175 176 private static double ParseNumber(string number) { 177 return 178 double.TryParse(number, NumberStyles.Float, CultureInfo.InvariantCulture, out double parsed) 179 ? parsed 180 : double.NaN; 151 181 } 152 182
Note: See TracChangeset
for help on using the changeset viewer.