PROBLEM SymbReg CODE << double[,] x; double[] y; string[] variableNames; int[] rows; Dictionary nameToCol; double GetValue(double[,] data, string varName, int row) { if(nameToCol == null) { /* init mapping */ nameToCol = new Dictionary(); for(int i=0; i xs, IEnumerable ys) { // calculate Pearson's correlation in one pass over xs and ys double sumx = 0.0; double sumy = 0.0; double sumxSq = 0.0; double sumySq = 0.0; double sumxy = 0.0; int n = 0; var xEnum = xs.GetEnumerator(); var yEnum = ys.GetEnumerator(); while(xEnum.MoveNext() & yEnum.MoveNext()) { sumx += xEnum.Current; sumy += yEnum.Current; sumxSq += xEnum.Current * xEnum.Current; sumySq += yEnum.Current * yEnum.Current; sumxy += xEnum.Current * yEnum.Current; n++; } System.Diagnostics.Debug.Assert(!(xEnum.MoveNext() | yEnum.MoveNext())); double num; double den; double r = 0.0; num = sumxy - ( ( sumx * sumy ) / n ); den = Math.Sqrt( ( sumxSq - ( sumx*sumx ) / n ) * ( sumySq - ( sumy*sumy ) / n ) ); if(den > 0){ r = num / den; } return r*r; } >> INIT << // generate 500 case of poly-10 benchmark function int n = 500; variableNames = new string[] {"x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" }; var rand = new System.Random(); x = new double[n, 10]; y = new double[n]; for(int row = 0; row < n; row++) { for(int col = 0; col < 10; col++) { x[row, col] = rand.NextDouble() * 2.0 - 1.0; } y[row] = x[row, 0] * x[row, 1] + x[row, 2] * x[row, 3] + x[row, 4] * x[row, 5] + x[row, 0] * x[row, 6] + x[row, 8] + x[row, 2] * x[row, 5] + x[row, 9]; } rows = System.Linq.Enumerable.Range(0, n).ToArray(); >> NONTERMINALS Model<>. RPB<>. Addition<>. Subtraction<>. Multiplication<>. Division<>. TERMINALS Const<> CONSTRAINTS val IN RANGE <<-100>> .. <<100>> . Var<> CONSTRAINTS varName IN SET <> weight IN RANGE <<-100>> .. <<100>> . RULES Model<> = RPB<> . RPB<> = LOCAL << string varName; double w; >> Addition<> | Subtraction<> | Division<> | Multiplication<> | Var<> SEM << val = w * GetValue(x, varName, row); >> | Const<> . Addition<> = LOCAL << double x1, x2; >> RPB<> RPB<> SEM << val = x1 + x2; >> . Subtraction<> = LOCAL << double x1, x2; >> RPB<> RPB<> SEM << val = x1 - x2; >> . Division<> = LOCAL << double x1, x2; >> RPB<> RPB<> SEM << val = x1 / x2; >> . Multiplication<> = LOCAL << double x1, x2; >> RPB<> RPB<> SEM << val = x1 * x2; >> . MAXIMIZE << var predicted = rows.Select(r => { double result; Model(r, out result); /* we can call the root symbol directly */ return result; }); return RSquared(predicted, y); >> END SymbReg.