1 | #include "interpreter.h"
|
---|
2 |
|
---|
3 | #ifdef __cplusplus
|
---|
4 | extern "C" {
|
---|
5 | #endif
|
---|
6 |
|
---|
7 | constexpr size_t BUFSIZE = BATCHSIZE * sizeof(double);
|
---|
8 |
|
---|
9 | // slow (ish?)
|
---|
10 | __declspec(dllexport)
|
---|
11 | double __cdecl GetValue(instruction* code, int codeLength, int row) noexcept
|
---|
12 | {
|
---|
13 | return evaluate(code, codeLength, row);
|
---|
14 | }
|
---|
15 |
|
---|
16 | __declspec(dllexport)
|
---|
17 | void __cdecl GetValues(instruction* code, int codeLength, int* rows, int totalRows, double* result) noexcept
|
---|
18 | {
|
---|
19 | for (int i = 0; i < totalRows; ++i)
|
---|
20 | {
|
---|
21 | result[i] = evaluate(code, codeLength, rows[i]);
|
---|
22 | }
|
---|
23 | }
|
---|
24 |
|
---|
25 | __declspec(dllexport)
|
---|
26 | void __cdecl GetValuesVectorized(instruction* code, int codeLength, int* rows, int totalRows, double* __restrict result) noexcept
|
---|
27 | {
|
---|
28 | double* buffer = static_cast<double*>(_aligned_malloc(codeLength * BUFSIZE, 16));
|
---|
29 | for (int i = 0; i < codeLength; ++i)
|
---|
30 | {
|
---|
31 | instruction& in = code[i];
|
---|
32 | in.buf = buffer + (i * BATCHSIZE);
|
---|
33 |
|
---|
34 | if (in.opcode == OpCodes::Const)
|
---|
35 | {
|
---|
36 | load(in.buf, in.value);
|
---|
37 | }
|
---|
38 | }
|
---|
39 |
|
---|
40 | int remainingRows = totalRows % BATCHSIZE;
|
---|
41 | int total = totalRows - remainingRows;
|
---|
42 |
|
---|
43 | for (int rowIndex = 0; rowIndex < total; rowIndex += BATCHSIZE)
|
---|
44 | {
|
---|
45 | evaluate(code, codeLength, rows, rowIndex, BATCHSIZE);
|
---|
46 | std::memcpy(result + rowIndex, code[0].buf, BUFSIZE);
|
---|
47 | }
|
---|
48 |
|
---|
49 | // are there any rows left?
|
---|
50 | if (remainingRows > 0)
|
---|
51 | {
|
---|
52 | evaluate(code, codeLength, rows, total, remainingRows);
|
---|
53 | std::memcpy(result + total, code[0].buf, remainingRows * sizeof(double));
|
---|
54 | }
|
---|
55 | _aligned_free(buffer);
|
---|
56 | }
|
---|
57 |
|
---|
58 | #ifdef __cplusplus
|
---|
59 | }
|
---|
60 | #endif
|
---|