[16269] | 1 | #include "interpreter.h"
|
---|
| 2 |
|
---|
[16274] | 3 | #ifdef __cplusplus
|
---|
[16269] | 4 | extern "C" {
|
---|
[16274] | 5 | #endif
|
---|
[16269] | 6 |
|
---|
[16274] | 7 | constexpr size_t BUFSIZE = BATCHSIZE * sizeof(double);
|
---|
| 8 |
|
---|
[16269] | 9 | // slow (ish?)
|
---|
| 10 | __declspec(dllexport)
|
---|
| 11 | double __cdecl GetValue(instruction* code, int codeLength, int row) noexcept
|
---|
| 12 | {
|
---|
| 13 | return evaluate(code, codeLength, row);
|
---|
| 14 | }
|
---|
| 15 |
|
---|
| 16 | __declspec(dllexport)
|
---|
| 17 | void __cdecl GetValues(instruction* code, int codeLength, int* rows, int totalRows, double* result) noexcept
|
---|
| 18 | {
|
---|
| 19 | for (int i = 0; i < totalRows; ++i)
|
---|
| 20 | {
|
---|
| 21 | result[i] = evaluate(code, codeLength, rows[i]);
|
---|
| 22 | }
|
---|
| 23 | }
|
---|
| 24 |
|
---|
| 25 | __declspec(dllexport)
|
---|
[16274] | 26 | void __cdecl GetValuesVectorized(instruction* code, int codeLength, int* rows, int totalRows, double* __restrict result) noexcept
|
---|
[16269] | 27 | {
|
---|
[16274] | 28 | double* buffer = static_cast<double*>(_aligned_malloc(codeLength * BUFSIZE, 16));
|
---|
[16269] | 29 | for (int i = 0; i < codeLength; ++i)
|
---|
| 30 | {
|
---|
| 31 | instruction& in = code[i];
|
---|
[16274] | 32 | in.buf = buffer + (i * BATCHSIZE);
|
---|
[16269] | 33 |
|
---|
| 34 | if (in.opcode == OpCodes::Const)
|
---|
| 35 | {
|
---|
| 36 | load(in.buf, in.value);
|
---|
| 37 | }
|
---|
| 38 | }
|
---|
| 39 |
|
---|
[16274] | 40 | int remainingRows = totalRows % BATCHSIZE;
|
---|
[16269] | 41 | int total = totalRows - remainingRows;
|
---|
| 42 |
|
---|
[16274] | 43 | for (int rowIndex = 0; rowIndex < total; rowIndex += BATCHSIZE)
|
---|
[16269] | 44 | {
|
---|
[16274] | 45 | evaluate(code, codeLength, rows, rowIndex, BATCHSIZE);
|
---|
| 46 | std::memcpy(result + rowIndex, code[0].buf, BUFSIZE);
|
---|
[16269] | 47 | }
|
---|
| 48 |
|
---|
| 49 | // are there any rows left?
|
---|
[16274] | 50 | if (remainingRows > 0)
|
---|
| 51 | {
|
---|
[16333] | 52 | evaluate(code, codeLength, rows, total, remainingRows);
|
---|
| 53 | std::memcpy(result + total, code[0].buf, remainingRows * sizeof(double));
|
---|
[16269] | 54 | }
|
---|
[16274] | 55 | _aligned_free(buffer);
|
---|
[16269] | 56 | }
|
---|
| 57 |
|
---|
| 58 | #ifdef __cplusplus
|
---|
| 59 | }
|
---|
| 60 | #endif
|
---|