1 | #include <stdio.h> |
---|
2 | #include <string.h> |
---|
3 | #include <stdlib.h> |
---|
4 | #include <ctype.h> |
---|
5 | |
---|
6 | #include "list.c" |
---|
7 | #include "pge.h" |
---|
8 | |
---|
9 | #define MAX_CHAR 255 |
---|
10 | //#define DEBUG |
---|
11 | |
---|
12 | char* trim(char *str) |
---|
13 | { |
---|
14 | char *end; |
---|
15 | while(isspace((unsigned char)*str)) { |
---|
16 | str++; |
---|
17 | } |
---|
18 | if(*str == 0) { |
---|
19 | return "\0"; |
---|
20 | } |
---|
21 | end = str + strlen(str) - 1; |
---|
22 | while(end > str && isspace((unsigned char)*end)) { |
---|
23 | end--; |
---|
24 | } |
---|
25 | end[1] = '\0'; |
---|
26 | return str; |
---|
27 | } |
---|
28 | |
---|
29 | GoInt CountCols(char *str) { |
---|
30 | GoInt res = 0; |
---|
31 | trim(str); |
---|
32 | char *partial = strtok(str, " "); |
---|
33 | while(partial != NULL) { |
---|
34 | res++; |
---|
35 | partial = strtok(NULL, " "); |
---|
36 | } |
---|
37 | return res; |
---|
38 | } |
---|
39 | |
---|
40 | char* lowerCase(char *str) { |
---|
41 | int i = 0; |
---|
42 | while(str[i] && str[i] != '\0'){ |
---|
43 | char c = tolower(str[i]); |
---|
44 | str[i] = c; |
---|
45 | i++; |
---|
46 | } |
---|
47 | return str; |
---|
48 | } |
---|
49 | |
---|
50 | char *strndup(char *str, int chars) |
---|
51 | { |
---|
52 | char *buffer; |
---|
53 | int n; |
---|
54 | |
---|
55 | buffer = (char *) malloc(chars +1); |
---|
56 | if (buffer) |
---|
57 | { |
---|
58 | for (n = 0; ((n < chars) && (str[n] != 0)) ; n++) buffer[n] = str[n]; |
---|
59 | buffer[n] = 0; |
---|
60 | } |
---|
61 | |
---|
62 | return buffer; |
---|
63 | } |
---|
64 | |
---|
65 | //Read train / test data |
---|
66 | GoFloat64* ReadInputData(char *fileName, GoInt* nEntries, GoInt* nCol, char** depHead, char** indHead) { |
---|
67 | FILE *fp; |
---|
68 | fp = fopen(fileName, "r"); |
---|
69 | if (fp == NULL) { |
---|
70 | char msg[MAX_CHAR]; |
---|
71 | strcpy(msg, "Error in ReadInputData, failed to open file: "); |
---|
72 | perror(strcat(msg, fileName)); |
---|
73 | exit(EXIT_FAILURE); |
---|
74 | } |
---|
75 | |
---|
76 | int lineNr = 0; |
---|
77 | int currentC = 0; |
---|
78 | char line[MAX_CHAR]; |
---|
79 | |
---|
80 | fgets(line, MAX_CHAR, fp); |
---|
81 | GoInt nIndCol = CountCols(strdup(line)); |
---|
82 | *indHead = strdup(trim(line)); //, nIndCol * 2 - 1); |
---|
83 | #ifdef DEBUG |
---|
84 | printf("Independent Variables %d\n", nIndCol); |
---|
85 | #endif |
---|
86 | |
---|
87 | fgets(line, MAX_CHAR, fp); |
---|
88 | GoInt nDepCol = CountCols(strdup(line)); |
---|
89 | *depHead = strdup(trim(line)); //, nDepCol * 2 - 1); |
---|
90 | #ifdef DEBUG |
---|
91 | printf("Dependent Variables %d\n", nDepCol); |
---|
92 | #endif |
---|
93 | |
---|
94 | *nCol = nDepCol + nIndCol; |
---|
95 | if(nDepCol < 0 || nIndCol < 0) { |
---|
96 | perror("There must be at least one independent and one dependent colum!\n"); |
---|
97 | exit(EXIT_FAILURE); |
---|
98 | } |
---|
99 | |
---|
100 | GoFloat64 *values = NULL; |
---|
101 | while(fgets(line, MAX_CHAR, fp)) { |
---|
102 | trim(line); |
---|
103 | currentC = 0; |
---|
104 | values = (GoFloat64 *) realloc(values, (lineNr + 1) * (*nCol) * sizeof(GoFloat64)); |
---|
105 | if(values == NULL) { |
---|
106 | perror("Out of memory!\n"); |
---|
107 | } |
---|
108 | char *partial = strtok(line, " "); |
---|
109 | while(partial != NULL) { |
---|
110 | GoFloat64 value = 0; |
---|
111 | value = strtod(partial, NULL); |
---|
112 | int pos = lineNr * (*nCol) + currentC; |
---|
113 | values[pos] = (GoFloat64) value; |
---|
114 | #ifdef DEBUG |
---|
115 | printf("Value (%d, %d) %d: %f\n", lineNr, currentC, pos, values[pos]); |
---|
116 | #endif |
---|
117 | currentC++; |
---|
118 | partial = strtok(NULL, " "); |
---|
119 | } |
---|
120 | lineNr++; |
---|
121 | } |
---|
122 | *nEntries = lineNr; |
---|
123 | fclose(fp); |
---|
124 | return values; |
---|
125 | } |
---|
126 | |
---|
127 | //Print Test / Train data |
---|
128 | void PrintInputData(double *values, int nEntries, int nCols) { |
---|
129 | for(int i = 0; i < nEntries; i++) { //Rows |
---|
130 | for(int j = 0; j < nCols; j++) { //Cols |
---|
131 | int pos = i * nCols + j; |
---|
132 | printf("Value (%d, %d) %d: %f\n", i, j, pos, values[pos]); |
---|
133 | } |
---|
134 | } |
---|
135 | } |
---|
136 | |
---|
137 | GoInt* CreateUseableVars(long n) { |
---|
138 | GoInt *data = malloc(sizeof(GoInt) * n); |
---|
139 | for(GoInt i = 0; i < n; i++) { |
---|
140 | data[i] = i; |
---|
141 | } |
---|
142 | return data; |
---|
143 | } |
---|
144 | |
---|
145 | int isTreeSetting(char *key) { |
---|
146 | if(strcmp(key, "roots") == 0 || strcmp(key, "nodes") == 0 || strcmp(key, "nontrig") == 0 || strcmp(key, "leafs") == 0) { |
---|
147 | return 1; |
---|
148 | } |
---|
149 | return 0; |
---|
150 | } |
---|
151 | |
---|
152 | char* MergeArgs(char **values, int nValues) { |
---|
153 | char margs[MAX_CHAR] = "\0"; |
---|
154 | for(int i = 0; i < nValues; i++) { |
---|
155 | strcat(margs, lowerCase(values[i])); |
---|
156 | if(i + 1 < nValues) { |
---|
157 | strcat(margs, " "); |
---|
158 | } |
---|
159 | } |
---|
160 | return strdup(margs); |
---|
161 | } |
---|
162 | |
---|
163 | void SetConfValue(char *keySetting, char **values, int nValues) { |
---|
164 | #ifdef DEBUG |
---|
165 | for(int i = 0; i < nValues; i++) { |
---|
166 | printf("Key: %s Nr: %d Value: %s\n", keySetting, i, values[i]); |
---|
167 | } |
---|
168 | #endif |
---|
169 | void *data = NULL; |
---|
170 | if (strcmp(keySetting, "sorttype") == 0) { |
---|
171 | data = (GoInt *)malloc(sizeof(GoInt)); |
---|
172 | *((GoInt *)data) = 0; |
---|
173 | if(strcmp("paretotesterror", lowerCase(values[0])) != 0) { |
---|
174 | *((GoInt *)data) = 1; |
---|
175 | } |
---|
176 | } else if (strcmp(keySetting, "usablevars") == 0) { |
---|
177 | data = (GoInt *)malloc(sizeof(GoInt)); |
---|
178 | *((GoInt *)data) = nValues; |
---|
179 | } else if(strcmp(keySetting, "zeroepsilon") == 0 || strcmp(keySetting, "hitratio") == 0) { |
---|
180 | data = (GoFloat64 *)malloc(sizeof(GoFloat64)); |
---|
181 | *((GoFloat64 *)data) = strtod(values[0], NULL); |
---|
182 | } else if(isTreeSetting(keySetting) == 1) { |
---|
183 | data = MergeArgs(values, nValues); |
---|
184 | } else if(strcmp(keySetting, "problemtype") == 0 || strcmp(keySetting, "name") == 0) { |
---|
185 | data = strdup(lowerCase(values[0])); |
---|
186 | } else { //Integer |
---|
187 | data = (GoInt *)malloc(sizeof(GoInt)); |
---|
188 | *((GoInt *)data) = strtol(values[0], NULL, 10); |
---|
189 | } |
---|
190 | configPrepend(strdup(keySetting), (void *)data); |
---|
191 | } |
---|
192 | |
---|
193 | void ReadConfigFile(char *filePath) { |
---|
194 | FILE *fp; |
---|
195 | fp = fopen(filePath, "r"); |
---|
196 | if (fp == NULL) { |
---|
197 | perror("Error in ReadConfigFile, failed to open file\n"); |
---|
198 | exit(EXIT_FAILURE); |
---|
199 | } |
---|
200 | char line[MAX_CHAR]; |
---|
201 | fgets(line, MAX_CHAR, fp); |
---|
202 | |
---|
203 | while(fgets(line, MAX_CHAR, fp)) { |
---|
204 | trim(line); |
---|
205 | char *partial = strtok(line, " "); |
---|
206 | int currentPart = 0; |
---|
207 | char keySetting[MAX_CHAR] = "\0"; |
---|
208 | int valid = 0; |
---|
209 | char **values = NULL; |
---|
210 | int nValues = 0; |
---|
211 | while(partial != NULL) { |
---|
212 | trim(partial); |
---|
213 | if(strlen(partial) > 0 && partial[0] == '#') { |
---|
214 | break; |
---|
215 | } |
---|
216 | if(strlen(partial) > 0) { |
---|
217 | if(currentPart == 0) { //Name / Key |
---|
218 | sscanf(partial, "%s", keySetting); |
---|
219 | } else if(currentPart == 1) { //Eq char |
---|
220 | if(strlen(keySetting) > 0) { |
---|
221 | valid = 1; |
---|
222 | } |
---|
223 | } else { //Value |
---|
224 | values = (char **) realloc(values, (nValues + 1) * MAX_CHAR * sizeof(char)); |
---|
225 | values[nValues] = partial; |
---|
226 | nValues++; |
---|
227 | } |
---|
228 | currentPart++; |
---|
229 | } |
---|
230 | partial = strtok(NULL, " "); |
---|
231 | } |
---|
232 | if(valid == 1) { |
---|
233 | SetConfValue(lowerCase(keySetting), values, nValues); |
---|
234 | } |
---|
235 | } |
---|
236 | } |
---|
237 | |
---|
238 | void RunTest(GoFloat64 *TrainData, int nTrainEntries, int nTrainCols, GoFloat64 *TestData, int nTestEntries, int nTestCols, char *depHeadTrain, char *indepHeadTrain, char *depHeadTest, char *indepHeadTest) { |
---|
239 | putenv("GOGC=off"); |
---|
240 | putenv("GODEBUG=cgocheck=0"); |
---|
241 | putenv("CGO_ENABLED=1"); |
---|
242 | #ifdef DEBUG |
---|
243 | putenv("PGEDEBUG=1"); |
---|
244 | printf("%s %s %s %s", indepHeadTest, indepHeadTrain, depHeadTest, depHeadTrain); |
---|
245 | #endif |
---|
246 | fflush(stdout); |
---|
247 | |
---|
248 | AddTestData(indepHeadTest, depHeadTest, TestData, nTestEntries); |
---|
249 | AddTrainData(indepHeadTrain, depHeadTrain, TrainData, nTrainEntries); |
---|
250 | printf("Added Train- and Testdata\n"); |
---|
251 | |
---|
252 | GoInt MaxGen = *((GoInt *)configGet("maxgen")); |
---|
253 | GoInt PgeRptEpoch = *((GoInt *)configGet("pgerptepoch")); |
---|
254 | GoInt PgeRptCount = *((GoInt *)configGet("pgerptcount")); |
---|
255 | GoInt PgeArchiveCap = *((GoInt *)configGet("pgearchivecap")); |
---|
256 | GoInt PeelCnt = *((GoInt *)configGet("peelcnt")); |
---|
257 | GoInt EvalrCount = *((GoInt *)configGet("evalcnt")); |
---|
258 | GoInt SortType = *((GoInt *)configGet("sorttype")); |
---|
259 | GoFloat64 ZeroEpsilon = *((GoFloat64 *)configGet("zeroepsilon")); |
---|
260 | char *InitMethod = (char *)configGet("initmethod"); |
---|
261 | char *GrowMethod = (char *)configGet("growmethod"); |
---|
262 | |
---|
263 | #ifdef DEBUG |
---|
264 | printf("1: %d 2: %d 3: %d 4: %d 5: %d 6: %d 7: %d 8: %lf 9: %s 10: %s\n", |
---|
265 | MaxGen, PgeRptEpoch, PgeRptCount, PgeArchiveCap, PeelCnt, EvalrCount, SortType, ZeroEpsilon, InitMethod, GrowMethod); |
---|
266 | #endif |
---|
267 | |
---|
268 | InitSearch(MaxGen, PgeRptEpoch, PgeRptCount, PgeArchiveCap, PeelCnt, EvalrCount, ZeroEpsilon, InitMethod, GrowMethod, SortType); |
---|
269 | printf("Initialized Search\n"); |
---|
270 | fflush(stdout); |
---|
271 | |
---|
272 | GoInt MaxSize = *((GoInt *)configGet("maxsize")); |
---|
273 | GoInt MinSize = *((GoInt *)configGet("minsize")); |
---|
274 | GoInt MaxDepth = *((GoInt *)configGet("maxdepth")); |
---|
275 | GoInt MinDepth = *((GoInt *)configGet("mindepth")); |
---|
276 | char *Roots = (char *)configGet("roots"); |
---|
277 | char *Nodes = (char *)configGet("nodes"); |
---|
278 | char *NonTrig = (char *)configGet("nontrig"); |
---|
279 | char *Leafs = (char *)configGet("leafs"); |
---|
280 | GoInt nUsableVars = *((GoInt *)configGet("usablevars")); |
---|
281 | |
---|
282 | #ifdef DEBUG |
---|
283 | printf("1: %d 2: %d 3: %d 4: %d 5: %s 6: %s 7: %s 8: %s 9: %d\n", MaxSize, MinSize, MaxDepth, MinDepth, Roots, Nodes, NonTrig, Leafs, nUsableVars); |
---|
284 | #endif |
---|
285 | |
---|
286 | InitTreeParams(Roots, Nodes, NonTrig, Leafs, nUsableVars, MaxSize, MinSize, MaxDepth, MinDepth); |
---|
287 | printf("Initialized TreeParams\n"); |
---|
288 | fflush(stdout); |
---|
289 | |
---|
290 | GoInt MaxIter = *((GoInt *)configGet("maxiter")); |
---|
291 | GoInt SearchVar = *((GoInt *)configGet("searchvar")); |
---|
292 | GoFloat64 HitRatio = *((GoFloat64 *)configGet("hitratio")); |
---|
293 | GoInt NumProcs = *((GoInt *)configGet("numprocs")); |
---|
294 | char *Name = (char *)configGet("name"); |
---|
295 | char *ProblemType = (char *)configGet("problemtype"); |
---|
296 | |
---|
297 | #ifdef DEBUG |
---|
298 | printf("1: %d 2: %d 3: %lf 4: %d 5: %s 6: %s\n", MaxIter, SearchVar, HitRatio, NumProcs, Name, ProblemType); |
---|
299 | #endif |
---|
300 | |
---|
301 | InitProblem(Name, MaxIter, HitRatio, SearchVar, ProblemType, NumProcs); |
---|
302 | printf("Initialized Problem\n"); |
---|
303 | fflush(stdout); |
---|
304 | |
---|
305 | for (int fs1 = 1; fs1 <= MaxIter; fs1++) { |
---|
306 | printf("******************************** StepW: %d **********************************\n", fs1); |
---|
307 | GoInt nresults = StepW(); |
---|
308 | |
---|
309 | for (int ires = 0; ires < nresults; ires++) { |
---|
310 | GoInt testscore = 0; |
---|
311 | GoInt ncoeff = 0; |
---|
312 | |
---|
313 | char* stepRes = GetStepResult(&testscore, &ncoeff); |
---|
314 | GoFloat64 *coeff = (GoFloat64 *)malloc(nUsableVars * sizeof(GoFloat64)); |
---|
315 | |
---|
316 | for(int icoeff = 0; icoeff < nUsableVars; icoeff++) { |
---|
317 | coeff[icoeff] = 0; |
---|
318 | } |
---|
319 | printf("C: push/pop (%d,%d) %s\n", fs1, ires, stepRes); |
---|
320 | printf("C: TestScore %d\n", testscore); |
---|
321 | for(int icoeff = 0; icoeff < ncoeff; icoeff++){ |
---|
322 | GoFloat64 coeffVal = GetCoeffResult(); |
---|
323 | printf("C: coeff vals: %f\n", coeffVal); |
---|
324 | } |
---|
325 | } |
---|
326 | fflush(stdout); |
---|
327 | } |
---|
328 | } |
---|
329 | |
---|
330 | //TODO: Dynamic overwrite |
---|
331 | void ParseArgs(int argc, char *argv[], char** problemName, char **initMethod, char **growMethod, GoInt *nPeel, GoInt *nEvals, GoInt *maxIter) { |
---|
332 | if(argc < 2 ){ |
---|
333 | printf("Usage: %s ProblemName [-evals=3] [-peel=3] [-iter=200] [-init=method1] [-grow=method1]\n\n", argv[0]); |
---|
334 | printf("Using default Problem: %s\n", *problemName); |
---|
335 | perror("Missing required argument 1: ProblemName, eg. Pagie_1, Korns_02\n"); |
---|
336 | } else { |
---|
337 | *problemName = strdup(argv[1]); |
---|
338 | for(int i = 2; i < argc; i++) { |
---|
339 | if(strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) { |
---|
340 | printf("Usage: %s ProblemName [-evals=3] [-peel=3] [-iter=200] [-init=method1] [-grow=method1]\n\n" , argv[0]); |
---|
341 | exit(EXIT_SUCCESS); |
---|
342 | } else if(strncmp(argv[i], "-evals", 6) == 0) { |
---|
343 | sscanf(argv[i], "-evals=%ld", nEvals); |
---|
344 | } else if(strncmp(argv[i], "-peel", 5) == 0) { |
---|
345 | sscanf(argv[i], "-peel=%ld", nPeel); |
---|
346 | } else if(strncmp(argv[i], "-iter", 5) == 0) { |
---|
347 | sscanf(argv[i], "-iter=%ld", maxIter); |
---|
348 | } else if(strncmp(argv[i], "-init", 5) == 0) { |
---|
349 | sscanf(argv[i], "-init=%s", *initMethod); |
---|
350 | } else if(strncmp(argv[i], "-grow", 5) == 0) { |
---|
351 | sscanf(argv[i], "-grow=%s", *growMethod); |
---|
352 | } else { |
---|
353 | char msg[MAX_CHAR]; |
---|
354 | strcpy(msg, "Unkown argument: "); |
---|
355 | perror(strcat(msg, argv[i])); |
---|
356 | } |
---|
357 | } |
---|
358 | } |
---|
359 | } |
---|
360 | |
---|
361 | int main(int argc, char *argv[]) { |
---|
362 | char *problemName = strdup("Pagie_1"); |
---|
363 | GoInt *nEvals = malloc(sizeof(GoInt)); |
---|
364 | GoInt *nPeel = malloc(sizeof(GoInt)); |
---|
365 | GoInt *maxIter = malloc(sizeof(GoInt)); |
---|
366 | GoInt *numProcs = malloc(sizeof(GoInt)); |
---|
367 | *nEvals = 3; |
---|
368 | *nPeel = 3; |
---|
369 | *maxIter = 200; |
---|
370 | *numProcs = 12; |
---|
371 | char *initMethod = strdup("method1"); |
---|
372 | char *growMethod = strdup("method1"); |
---|
373 | ParseArgs(argc, argv, &problemName, &initMethod, &growMethod, nPeel, nEvals, maxIter); |
---|
374 | |
---|
375 | printf("MaxIter: %ld\n", *maxIter); |
---|
376 | printf("Evals: %ld\n", *nEvals); |
---|
377 | printf("InitMethod: %s\n", initMethod); |
---|
378 | |
---|
379 | //* Testdata |
---|
380 | char tstPath[MAX_CHAR] = "go-code/go-pge/data/benchmark/"; //"../data/benchmark/"; |
---|
381 | strcat(tstPath, problemName); |
---|
382 | strcat(tstPath, ".tst"); |
---|
383 | |
---|
384 | printf("TestFile %s\n", tstPath); |
---|
385 | GoInt nTestEntries = 0; |
---|
386 | GoInt nTestCols = 0; |
---|
387 | char *TestDepHead, *TestIndHead; |
---|
388 | GoFloat64 *TestData = ReadInputData(tstPath, &nTestEntries, &nTestCols, &TestDepHead, &TestIndHead); |
---|
389 | printf("TestData: Entries %ld, Colums %ld\n", nTestEntries, nTestCols); |
---|
390 | #ifdef DEBUG |
---|
391 | PrintInputData(TestData, nTestEntries, nTestCols); |
---|
392 | #endif |
---|
393 | |
---|
394 | //* Traindata |
---|
395 | char trnPath[MAX_CHAR] = "go-code/go-pge/data/benchmark/"; //"../data/benchmark/"; |
---|
396 | strcat(trnPath, problemName); |
---|
397 | strcat(trnPath, ".trn"); |
---|
398 | |
---|
399 | printf("TrainFile %s\n", trnPath); |
---|
400 | GoInt nTrainEntries = 0; |
---|
401 | GoInt nTrainCols = 0; |
---|
402 | char *TrainDepHead, *TrainIndHead; |
---|
403 | GoFloat64 *TrainData = ReadInputData(trnPath, &nTrainEntries, &nTrainCols, &TrainDepHead, &TrainIndHead); |
---|
404 | printf("TrainData: Entries %ld, Colums %ld\n", nTrainEntries, nTrainCols); |
---|
405 | #ifdef DEBUG |
---|
406 | PrintInputData(TrainData, nTrainEntries, nTrainCols); |
---|
407 | #endif |
---|
408 | |
---|
409 | //* Configfiles |
---|
410 | char defaultConfPath[] = "go-code/go-pge/config/pge/pge_default.cfg"; //"../config/pge/pge_default.cfg"; |
---|
411 | printf("Default Config Path %s\n", defaultConfPath); |
---|
412 | ReadConfigFile(defaultConfPath); |
---|
413 | |
---|
414 | char confPath[MAX_CHAR] = "go-code/go-pge/config/prob/bench/"; //"../config/prob/bench/"; |
---|
415 | strcat(confPath, problemName); |
---|
416 | strcat(confPath, ".cfg"); |
---|
417 | printf("Config Path %s\n", confPath); |
---|
418 | ReadConfigFile(confPath); |
---|
419 | |
---|
420 | configPrint(); |
---|
421 | #ifdef DEBUG |
---|
422 | configPrint(); |
---|
423 | #endif |
---|
424 | |
---|
425 | //Overwrite with args |
---|
426 | configPrepend(strdup("growmethod"), growMethod); |
---|
427 | configPrepend(strdup("initmethod"), initMethod); |
---|
428 | configPrepend(strdup("evalcnt"), nEvals); |
---|
429 | configPrepend(strdup("peelcnt"), nPeel); |
---|
430 | configPrepend(strdup("maxiter"), maxIter); |
---|
431 | configPrepend(strdup("name"), problemName); |
---|
432 | configPrepend(strdup("numprocs"), numProcs); |
---|
433 | |
---|
434 | //* RunPGE |
---|
435 | RunTest(TrainData, nTrainEntries, nTrainCols, TestData, nTestEntries, nTestCols, TrainDepHead, TrainIndHead, TestDepHead, TestIndHead); |
---|
436 | |
---|
437 | //* Dispose |
---|
438 | free(TestData); |
---|
439 | TestData = NULL; |
---|
440 | |
---|
441 | free(TrainData); |
---|
442 | TrainData = NULL; |
---|
443 | |
---|
444 | configDispose(); |
---|
445 | |
---|
446 | return EXIT_SUCCESS; |
---|
447 | } |
---|