Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.PGE/3.3/go-code/go-pge/problems/data.go @ 16080

Last change on this file since 16080 was 16080, checked in by hmaislin, 6 years ago

#2929 initial commit of working PGE version

File size: 13.3 KB
Line 
1package problems
2
3import (
4  "bufio"
5  "bytes"
6  "fmt"
7  "math"
8  "math/rand"
9  "os"
10)
11
12type Point struct {
13  indep []float64
14  depnd []float64
15}
16
17func (d *Point) NumIndep() int             { return len(d.indep) }
18func (d *Point) SetNumIndep(sz int)        { d.indep = make([]float64, sz) }
19func (d *Point) Indep(p int) float64       { return d.indep[p] }
20func (d *Point) SetIndep(p int, v float64) { d.indep[p] = v }
21func (d *Point) Indeps() []float64         { return d.indep }
22func (d *Point) SetIndeps(v []float64)     { d.indep = v }
23func (d *Point) NumDepnd() int             { return len(d.depnd) }
24func (d *Point) SetNumDepnd(sz int)        { d.depnd = make([]float64, sz) }
25func (d *Point) Depnd(p int) float64       { return d.depnd[p] }
26func (d *Point) SetDepnd(p int, v float64) { d.depnd[p] = v }
27func (d *Point) Depnds() []float64         { return d.depnd }
28func (d *Point) SetDepnds(v []float64)     { d.depnd = v }
29
30type PointSet struct {
31  filename string
32  id       int
33
34  numDim     int
35  indepNames []string
36  depndNames []string
37  sysNames   []string
38
39  dataPoints []Point
40  sysVals    []float64
41}
42
43func (d *PointSet) FN() string           { return d.filename }
44func (d *PointSet) SetFN(fn string)      { d.filename = fn }
45func (d *PointSet) ID() int              { return d.id }
46func (d *PointSet) SetID(id int)         { d.id = id }
47func (d *PointSet) SetNumPoints(cnt int) { d.dataPoints = make([]Point, cnt) }
48
49func (d *PointSet) NumIndep() int     { return len(d.indepNames) }
50func (d *PointSet) NumDepnd() int     { return len(d.depndNames) }
51func (d *PointSet) NumDim() int       { return d.numDim } // TODO check to see if TIME is a variable
52func (d *PointSet) SetNumDim(dim int) { d.numDim = dim }  // TODO check to see if TIME is a variable
53func (d *PointSet) NumSys() int       { return len(d.sysNames) }
54func (d *PointSet) NumPoints() int    { return len(d.dataPoints) }
55
56func (d *PointSet) IndepName(xi int) string      { return d.indepNames[xi] }
57func (d *PointSet) GetIndepNames() []string      { return d.indepNames }
58func (d *PointSet) SetIndepNames(names []string) { d.indepNames = names }
59func (d *PointSet) DepndName(xi int) string      { return d.depndNames[xi] }
60func (d *PointSet) GetDepndNames() []string      { return d.depndNames }
61func (d *PointSet) SetDepndNames(names []string) { d.depndNames = names }
62
63func (d *PointSet) SysName(si int) string      { return d.sysNames[si] }
64func (d *PointSet) GetSysNames() []string      { return d.sysNames }
65func (d *PointSet) SetSysNames(names []string) { d.sysNames = names }
66func (d *PointSet) SetSysVals(sv []float64)    { d.sysVals = sv }
67
68func (d *PointSet) Point(p int) *Point    { return &(d.dataPoints[p]) }
69func (d *PointSet) Points() []Point       { return d.dataPoints }
70func (d *PointSet) SetPoints(pts []Point) { d.dataPoints = pts }
71func (d *PointSet) SysVal(p int) float64  { return d.sysVals[p] }
72func (d *PointSet) SysVals() []float64    { return d.sysVals }
73
74// read function at end of file  [ func (d *PointSet) Read(filename string) ]
75
76type PntSubset struct {
77  ds *PointSet
78
79  index   []int
80  input   []Point
81  output  []Point
82  sysVals []float64
83}
84
85func (s *PntSubset) ID() int            { return s.ds.id }
86func (s *PntSubset) DS() *PointSet      { return s.ds }
87func (s *PntSubset) SetDS(ds *PointSet) { s.ds = ds }
88
89func (s *PntSubset) NumIndep() int  { return s.ds.NumIndep() }
90func (s *PntSubset) NumDepnd() int  { return s.ds.NumDepnd() }
91func (s *PntSubset) NumSys() int    { return s.ds.NumSys() }
92func (s *PntSubset) NumPoints() int { return len(s.index) }
93
94func (s *PntSubset) SysVals() []float64        { return s.sysVals }
95func (s *PntSubset) SetSysVals(svls []float64) { s.sysVals = svls }
96
97func (s *PntSubset) Index(p int) int       { return s.index[p] }
98func (s *PntSubset) Indexes() []int        { return s.index }
99func (s *PntSubset) SetIndexes(idxs []int) { s.index = idxs }
100func (s *PntSubset) Input(p int) *Point    { return &s.input[p] }
101func (s *PntSubset) Output(p int) *Point   { return &s.output[p] }
102
103func (s *PntSubset) AddPoint(p int, input, output *Point) {
104  s.index = append(s.index, p)
105  s.input = append(s.input, *input)
106  s.output = append(s.output, *output)
107}
108
109// using indexes, update the input/output data
110func (s *PntSubset) Refresh() {
111  L := len(s.index)
112  if s.input == nil {
113    s.input = make([]Point, L)
114  }
115  if s.output == nil {
116    s.output = make([]Point, L)
117  }
118
119  for i := 0; i < L; i++ {
120    s.input[i] = *s.ds.Point(s.index[i])
121    if s.index[i]+1 >= s.ds.NumPoints() {
122      continue
123    }
124    s.output[i] = *s.ds.Point(s.index[i] + 1)
125  }
126}
127
128func (d *PointSet) ReadPointSet(filename string) {
129  ftotal, err := os.OpenFile(filename, os.O_RDONLY, 0)
130  if err != nil {
131    fmt.Printf("err: %v\n", err)
132    return
133  }
134  defer ftotal.Close()
135  file := bufio.NewReader(ftotal)
136
137  var word string
138
139  // get independent variables (x_i...)
140  for i := 0; ; i++ {
141    _, err := fmt.Fscanf(file, "%s", &word)
142    if err != nil {
143      break
144    }
145    d.indepNames = append(d.indepNames, word)
146  }
147  d.numDim = len(d.indepNames)
148
149  // get dependent variables (y_j...)
150  for i := 0; ; i++ {
151    _, err := fmt.Fscanf(file, "%s", &word)
152    if err != nil {
153      break
154    }
155    d.depndNames = append(d.depndNames, word)
156  }
157
158  fmt.Printf("Var Names = %v | %v\n", d.depndNames, d.indepNames)
159
160  for i := 0; ; i++ {
161    var pnt Point
162    var dval, ival float64
163    if err != nil {
164      break
165    }
166
167    for j := 0; j < len(d.indepNames); j++ {
168      _, err = fmt.Fscanf(file, "%f", &ival)
169      if err != nil {
170        break
171      }
172      pnt.indep = append(pnt.indep, ival)
173    }
174
175    for j := 0; j < len(d.depndNames); j++ {
176      _, err = fmt.Fscanf(file, "%f\n", &dval)
177      if err != nil {
178        break
179      }
180      pnt.depnd = append(pnt.depnd, dval)
181    }
182
183    if len(pnt.indep) > 0 {
184      d.dataPoints = append(d.dataPoints, pnt)
185    }
186    if i%100 == 0 {
187      fmt.Println("Point(%d): %v\n", i, pnt)
188    }
189  }
190  fmt.Printf("Num Points: %v\n", len(d.dataPoints))
191}
192
193func ReadBytesPointSet(ftotal []byte) (d *PointSet) {
194  d = new(PointSet)
195  // ftotal, err := os.OpenFile(filename, os.O_RDONLY, 0)
196  // if err != nil {
197  //  fmt.Printf("err: %v\n", err)
198  //  return
199  // }
200  // defer ftotal.Close()
201  var err error
202  file := bytes.NewReader(ftotal)
203
204  var word string
205
206  // get independent variables (x_i...)
207  for i := 0; ; i++ {
208    _, err := fmt.Fscanf(file, "%s", &word)
209    if err != nil {
210      break
211    }
212    d.indepNames = append(d.indepNames, word)
213  }
214  d.numDim = len(d.indepNames)
215
216  // get dependent variables (y_j...)
217  for i := 0; ; i++ {
218    _, err := fmt.Fscanf(file, "%s", &word)
219    if err != nil {
220      break
221    }
222    d.depndNames = append(d.depndNames, word)
223  }
224
225  fmt.Printf("Var Names = %v | %v\n", d.depndNames, d.indepNames)
226
227  for i := 0; ; i++ {
228    var pnt Point
229    var dval, ival float64
230    if err != nil {
231      break
232    }
233
234    for j := 0; j < len(d.indepNames); j++ {
235      _, err = fmt.Fscanf(file, "%f", &ival)
236      if err != nil {
237        break
238      }
239      pnt.indep = append(pnt.indep, ival)
240    }
241
242    for j := 0; j < len(d.depndNames); j++ {
243      _, err = fmt.Fscanf(file, "%f\n", &dval)
244      if err != nil {
245        break
246      }
247      pnt.depnd = append(pnt.depnd, dval)
248    }
249
250    if len(pnt.indep) > 0 {
251      d.dataPoints = append(d.dataPoints, pnt)
252    }
253    if i%100 == 0 {
254      fmt.Println("Point(%d): %v\n", i, pnt)
255    }
256  }
257  fmt.Printf("Num Points: %v\n", len(d.dataPoints))
258  return d
259}
260
261func (d *PointSet) WritePointSet(filename string) {
262  fmt.Printf("Writing file: %s\n", filename)
263  ftotal, err := os.Create(filename)
264  if err != nil {
265    fmt.Printf("Error creating file: %s  %v\n", filename, err)
266    return
267  }
268  defer ftotal.Close()
269  file := bufio.NewWriter(ftotal)
270  defer file.Flush()
271
272  // write independent variable names (x_i...)
273  for i := 0; i < d.NumIndep(); i++ {
274    _, err := fmt.Fprintf(file, "%s ", d.IndepName(i))
275    if err != nil {
276      fmt.Errorf("error writing pointset to file: %v\n", err)
277      break
278    }
279  }
280  fmt.Fprintln(file)
281
282  // trite dependent variable names (y_j...)
283  for i := 0; i < d.NumDepnd(); i++ {
284    _, err := fmt.Fprintf(file, "%s ", d.DepndName(i))
285    if err != nil {
286      break
287    }
288  }
289  fmt.Fprintln(file)
290
291  // write points
292  points := d.Points()
293  for i := 0; i < d.NumPoints(); i++ {
294    indep := points[i].Indeps()
295    depnd := points[i].Depnds()
296    for j := 0; j < len(indep); j++ {
297      _, err = fmt.Fprintf(file, "%f ", indep[j])
298      if err != nil {
299        break
300      }
301    }
302    for j := 0; j < len(depnd); j++ {
303      _, err = fmt.Fprintf(file, "%f ", depnd[j])
304      if err != nil {
305        break
306      }
307    }
308    fmt.Fprintln(file)
309  }
310}
311
312func SplitPointSetTrainTest(pnts *PointSet, pcnt_train float64, seed int) (train, test *PointSet) {
313
314  train = new(PointSet)
315  test = new(PointSet)
316
317  train.filename, test.filename = pnts.filename, pnts.filename
318  train.id, test.id = pnts.id, pnts.id
319  train.indepNames, test.indepNames = pnts.indepNames, pnts.indepNames
320  train.depndNames, test.depndNames = pnts.depndNames, pnts.depndNames
321  train.sysNames, test.sysNames = pnts.sysNames, pnts.sysNames
322  train.sysVals, test.sysVals = pnts.sysVals, pnts.sysVals
323
324  L := len(pnts.dataPoints)
325  Tst := int(float64(L) * (1.0 - pcnt_train))
326
327  tmp := make([]Point, L)
328  copy(tmp, pnts.dataPoints)
329
330  rand.Seed(int64(seed))
331
332  for i := 0; i < Tst; i++ {
333    p := rand.Intn(L - i)
334    tmp[i], tmp[p] = tmp[p], tmp[i]
335  }
336
337  test.dataPoints = tmp[:Tst]
338  train.dataPoints = tmp[Tst:]
339
340  return
341}
342
343func (d *PointSet) ReadLakeFile(filename string) {
344  ftotal, err := os.OpenFile(filename, os.O_RDONLY, 0)
345  if err != nil {
346    fmt.Printf("err: %v\n", err)
347    return
348  }
349  defer ftotal.Close()
350  file := bufio.NewReader(ftotal)
351
352  var word string
353
354  // get independent variables (x_i...)
355  for i := 0; ; i++ {
356    _, err := fmt.Fscanf(file, "%s", &word)
357    if err != nil {
358      break
359    }
360    d.indepNames = append(d.indepNames, word)
361  }
362  d.numDim = len(d.indepNames)
363
364  // // get dependent variables (y_j...)
365  // for i := 0; ; i++ {
366  //  _, err := fmt.Fscanf(file, "%s", &word)
367  //  if err != nil {
368  //    break
369  //  }
370  //  d.depndNames = append(d.depndNames, word)
371  // }
372
373  // remove time names from indepNames
374  d.indepNames = d.indepNames[2:]
375
376  // fmt.Printf("Var Names = %v | %v\n", d.depndNames, d.indepNames)
377
378  for i := 0; ; i++ {
379    var pnt Point
380    var ival float64
381    if err != nil {
382      break
383    }
384
385    // read time values and disgaurd
386    var dummy string
387    for j := 0; j < 2; j++ {
388      _, err = fmt.Fscanf(file, "%s", &dummy)
389      // fmt.Println(i, dummy)
390      if err != nil {
391        // fmt.Println("err:", err)
392        break
393      }
394    }
395
396    // append dummy time value
397    pnt.indep = append(pnt.indep, -0.1)
398
399    // read real data
400    for j := 0; j < len(d.indepNames); j++ {
401      _, err = fmt.Fscanf(file, "%f\n", &ival)
402      if err != nil {
403        // fmt.Println("err:", err)
404        break
405      }
406      // hack for PAR
407      // if j == 1 {
408      //  ival /= 2000.0 * (math.Pi / 2.0)
409      // }
410
411      pnt.indep = append(pnt.indep, ival)
412    }
413
414    // fmt.Println(i, pnt)
415
416    // for j := 0; j < len(d.depndNames); j++ {
417    //  _, err = fmt.Fscanf(file, "%f\n", &dval)
418    //  if err != nil {
419    //    break
420    //  }
421    //  pnt.depnd = append(pnt.depnd, dval)
422    // }
423
424    if len(pnt.indep) > 1 {
425      for p := 0; p < len(pnt.indep); p++ {
426        if math.IsNaN(pnt.indep[p]) {
427          // fmt.Println("NaN @ ", i, p)
428          goto skip
429        }
430      }
431      d.dataPoints = append(d.dataPoints, pnt)
432    }
433  skip:
434  }
435
436  // calculate numerical derivatives
437  calcDerivs(d.dataPoints)
438
439  for i := 0; i < len(d.dataPoints); i++ {
440    p := d.dataPoints[i]
441    if len(p.Indeps()) == 0 || len(p.Depnds()) == 0 {
442      fmt.Println("Bad Point @", i)
443    }
444    // if i%100 == 0 {
445    //  fmt.Printf("Point(%d): %v\n", i, d.dataPoints[i])
446    // }
447  }
448
449  // fmt.Printf("Num Points: %v\n", len(d.dataPoints))
450}
451
452/* Calculate the first derivative of four points with: h = 0.25
453 * (from: http://www.trentfguidry.net/post/2010/09/04/Numerical-differentiation-formulas.aspx)
454 *
455 * xF0 = ( -3.0*xF4 + 16.0*xF3 - 36.0*xF2 + 48.0*xF1 - 25.0*xF0) / (12.0*h)
456 * xF1 = (      xF4 -  6.0-xF3 + 18.0*xF2 - 10.0*xF1 -  3.0*xF0) / (12.0*h)
457 * xF2 = (     -xF4 +  8.0*xF3           -   8.0*xF1 +      xF0) / (12.0*h)
458 * xF3 = (  3.0*xF4 + 10.0*xF3 - 18.0*xF2 +  6.0*xF1 -      xF0) / (12.0*h)
459 * xF4 = ( 25.0*xF4 - 48.0*xF3 + 36.0*xF2 - 16.0*xF1 +  3.0*xF0) / (12.0*h)
460 */
461func calcDerivs(pts []Point) {
462  h := 24.0 // / (24.0 * 60.0)
463
464  NP := len(pts)
465  ND := pts[0].NumIndep()
466
467  // for summing and averaging on a point/variable~wise basis
468  cnts := make([][]int, NP)
469  vals := make([][]float64, NP)
470  for p := 0; p < NP; p++ {
471    cnts[p] = make([]int, ND)
472    vals[p] = make([]float64, ND)
473  }
474
475  for p := 0; p < NP-5; p++ {
476    for i := 0; i < ND; i++ {
477      var F [5]float64
478      var dF [5]float64
479
480      for j := 0; j < 5; j++ {
481        F[j] = pts[p+j].Indep(i)
482      }
483      dF[0] = (-3.0*F[4] + 16.0*F[3] - 36.0*F[2] + 48.0*F[1] - 25.0*F[0]) / (12.0 * h)
484      dF[1] = (F[4] - 6.0 - F[3] + 18.0*F[2] - 10.0*F[1] - 3.0*F[0]) / (12.0 * h)
485      dF[2] = (-F[4] + 8.0*F[3] - 8.0*F[1] + F[0]) / (12.0 * h)
486      dF[3] = (3.0*F[4] + 10.0*F[3] - 18.0*F[2] + 6.0*F[1] - F[0]) / (12.0 * h)
487      dF[4] = (25.0*F[4] - 48.0*F[3] + 36.0*F[2] - 16.0*F[1] + 3.0*F[0]) / (12.0 * h)
488
489      for j := 0; j < 5; j++ {
490
491        vals[p+j][i] += dF[j]
492        cnts[p+j][i]++
493      }
494    }
495  }
496
497  for p := 0; p < NP; p++ {
498    depnds := make([]float64, ND)
499    for i := 0; i < ND; i++ {
500      depnds[i] = vals[p][i] / float64(cnts[p][i])
501    }
502    pts[p].SetDepnds(depnds)
503  }
504}
Note: See TracBrowser for help on using the repository browser.