1 | using System.Collections.Generic;
|
---|
2 | using System.Linq;
|
---|
3 | using HeuristicLab.Common;
|
---|
4 |
|
---|
5 | namespace WalkExporter {
|
---|
6 | public class InstancesStandardizer {
|
---|
7 | private IDictionary<string, (double Avg, double Stdev)> normalization;
|
---|
8 |
|
---|
9 | public IEnumerable<double> GetMeans() {
|
---|
10 | return normalization.Select(x => x.Value.Item1);
|
---|
11 | }
|
---|
12 | public IEnumerable<double> GetStdevs() {
|
---|
13 | return normalization.Select(x => x.Value.Item2);
|
---|
14 | }
|
---|
15 |
|
---|
16 | private InstancesStandardizer(IList<ProblemInstanceDescriptor> instances, IEnumerable<string> features) {
|
---|
17 | normalization = instances
|
---|
18 | .SelectMany(x => x.Features)
|
---|
19 | .GroupBy(x => x.Key)
|
---|
20 | .ToDictionary(x => x.Key,
|
---|
21 | x => {
|
---|
22 | var values = x.Select(y => y.GetNumericValue());
|
---|
23 | return (Avg: values.Average(), Stdev: values.StandardDeviation());
|
---|
24 | });
|
---|
25 | }
|
---|
26 |
|
---|
27 | public static InstancesStandardizer Create(IList<ProblemInstanceDescriptor> instances, IEnumerable<string> features) {
|
---|
28 | return new InstancesStandardizer(instances, features);
|
---|
29 |
|
---|
30 | }
|
---|
31 | public static InstancesStandardizer CreateAndApply(IList<ProblemInstanceDescriptor> instances, IEnumerable<string> features) {
|
---|
32 | var standardizer = Create(instances, features);
|
---|
33 | standardizer.Apply(instances);
|
---|
34 | return standardizer;
|
---|
35 | }
|
---|
36 |
|
---|
37 | public void Apply(IList<ProblemInstanceDescriptor> instances) {
|
---|
38 | for (var i = 0; i < instances.Count; i++) {
|
---|
39 | var inst = instances[i];
|
---|
40 | for (var x = 0; x < inst.Features.Count; x++) {
|
---|
41 | var feat = inst.Features[x];
|
---|
42 | feat.SetNumericValue((inst.Features[x].GetNumericValue() - normalization[feat.Key].Avg) / normalization[feat.Key].Stdev);
|
---|
43 | }
|
---|
44 | }
|
---|
45 | }
|
---|
46 | }
|
---|
47 | }
|
---|