Changeset 16274 for trunk/HeuristicLab.ExtLibs
- Timestamp:
- 11/04/18 11:05:31 (6 years ago)
- Location:
- trunk/HeuristicLab.ExtLibs/HeuristicLab.NativeInterpreter/0.1
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/HeuristicLab.ExtLibs/HeuristicLab.NativeInterpreter/0.1/HeuristicLab.NativeInterpreter-0.1/DllImporter.cs
r16266 r16274 43 43 } 44 44 45 // x86 45 46 [DllImport(x86dll, EntryPoint = "GetValue", CallingConvention = CallingConvention.Cdecl)] 46 47 internal static extern double GetValue32(NativeInstruction[] code, int len, int row); … … 52 53 internal static extern void GetValuesVectorized32(NativeInstruction[] code, int len, int[] rows, int nRows, double[] result); 53 54 55 // x64 54 56 [DllImport(x64dll, EntryPoint = "GetValue", CallingConvention = CallingConvention.Cdecl)] 55 57 internal static extern double GetValue64(NativeInstruction[] code, int len, int row); -
trunk/HeuristicLab.ExtLibs/HeuristicLab.NativeInterpreter/0.1/NativeInterpreter-0.1/CMakeLists.txt
r16269 r16274 19 19 target_compile_options(hl-native-interpreter PRIVATE "/W4" "/Qvec-report:2" "$<$<CONFIG:Release>:/O2>") 20 20 target_compile_options(hl-native-interpreter-vdt PRIVATE "/W4" "/Qvec-report:2" "$<$<CONFIG:Release>:/O2>") 21 target_compile_definitions(hl-native-interpreter-vdt PRIVATE "USE_VDT") 21 22 else() 22 target_compile_options(hl-native-interpreter PRIVATE "-Wall" "-Wextra" "-Werror" "-fopt-info-vec-optimized" "$<$<CONFIG:Release>:-O fast>")23 target_compile_options(hl-native-interpreter-vdt PRIVATE "-Wall" "-Wextra" "-Werror" "-fopt-info-vec-optimized" "$<$<CONFIG:Release>:-O fast>")23 target_compile_options(hl-native-interpreter PRIVATE "-Wall" "-Wextra" "-Werror" "-fopt-info-vec-optimized" "$<$<CONFIG:Release>:-O3;-march=x86-64>") 24 target_compile_options(hl-native-interpreter-vdt PRIVATE "-Wall" "-Wextra" "-Werror" "-fopt-info-vec-optimized" "$<$<CONFIG:Release>:-O3;-march=x86-64>") 24 25 target_compile_definitions(hl-native-interpreter-vdt PRIVATE "USE_VDT") 26 set_target_properties(hl-native-interpreter PROPERTIES PREFIX "") 27 set_target_properties(hl-native-interpreter-vdt PROPERTIES PREFIX "") 25 28 endif() -
trunk/HeuristicLab.ExtLibs/HeuristicLab.NativeInterpreter/0.1/NativeInterpreter-0.1/README.md
r16269 r16274 43 43 ## TypeCoherent Grammar 44 44 45 | Rows | StandardInterpreter | LinerInterpreter | ILEmittingInterpreter | CompiledTreeInterpreter | Native-MSVC-Std | Native-MSVC-Vdt | Native-MinGW-Std | Native-MinGW-Vdt | Native-MSVC-Std[BatchSize=64] | Native-MSVC-Vdt[BatchSize=64] | Native-MinGW-Std[BatchSize=64] | Native-MinGW-Vdt[BatchSize=64] | 46 |-------|---------------------|------------------|-----------------------|-------------------------|-----------------|-----------------|------------------|------------------|-------------------------------|-------------------------------|--------------------------------|--------------------------------| 47 | 1000 | 0.0435 | 0.1522 | 0.0813 | 0.0732 | 0.3049 | 0.3074 | 0.2419 | 0.3098 | 0.7542 | 0.7928 | 0.3049 | 1.1368 | 48 | 2000 | 0.0441 | 0.1569 | 0.1254 | 0.1165 | 0.3066 | 0.3138 | 0.2407 | 0.3170 | 0.7932 | 0.8429 | 0.3199 | 1.2424 | 49 | 3000 | 0.0441 | 0.1536 | 0.1521 | 0.1476 | 0.3115 | 0.3088 | 0.2397 | 0.3136 | 0.8595 | 0.8675 | 0.3268 | 1.3074 | 50 | 4000 | 0.0440 | 0.1559 | 0.1708 | 0.1692 | 0.3122 | 0.3166 | 0.2420 | 0.3172 | 0.8443 | 0.8918 | 0.3247 | 1.3084 | 51 | 5000 | 0.0436 | 0.1543 | 0.1829 | 0.1852 | 0.3119 | 0.3138 | 0.2355 | 0.3175 | 0.8634 | 0.8872 | 0.3165 | 1.3024 | 52 | 6000 | 0.0439 | 0.1539 | 0.1950 | 0.1939 | 0.3091 | 0.3060 | 0.2389 | 0.3127 | 0.8758 | 0.9049 | 0.3284 | 1.3058 | 53 | 7000 | 0.0437 | 0.1553 | 0.2013 | 0.2055 | 0.3131 | 0.2835 | 0.2403 | 0.3141 | 0.8542 | 0.8989 | 0.3292 | 1.2941 | 54 | 8000 | 0.0442 | 0.1530 | 0.2084 | 0.2095 | 0.3109 | 0.3096 | 0.2381 | 0.3132 | 0.8804 | 0.9049 | 0.3286 | 1.3383 | 55 | 9000 | 0.0440 | 0.1552 | 0.2127 | 0.2222 | 0.3107 | 0.2944 | 0.2385 | 0.3035 | 0.8359 | 0.9049 | 0.3171 | 1.2707 | 56 | 10000 | 0.0439 | 0.1554 | 0.2133 | 0.2274 | 0.3118 | 0.2870 | 0.2385 | 0.3089 | 0.8424 | 0.9023 | 0.3187 | 1.3271 | 45 | Rows | StandardInterpreter | LinerInterpreter | ILEmittingInterpreter | CompiledTreeInterpreter | Native-MSVC-Std | Native-MSVC-Vdt | Native-MinGW-Std | Native-MinGW-Vdt | Native-MSVC-Std[BatchSize=64] | Native-MSVC-Vdt[BatchSize=64] | Native-MinGW-Std[BatchSize=64] | Native-MinGW-Vdt[BatchSize=64] | 46 |:-----:|:-------------------:|:----------------:|:---------------------:|:-----------------------:|:---------------:|:---------------:|:----------------:|:----------------:|:-----------------------------:|:-----------------------------:|:------------------------------:|:------------------------------:| 47 | 1000 | 0.0435 | 0.1522 | 0.0813 | 0.0732 | 0.3049 | 0.3074 | 0.2419 | 0.3098 | 0.7542 | 0.7928 | 0.3049 | 1.1368 | 48 | 2000 | 0.0441 | 0.1569 | 0.1254 | 0.1165 | 0.3066 | 0.3138 | 0.2407 | 0.3170 | 0.7932 | 0.8429 | 0.3199 | 1.2424 | 49 | 3000 | 0.0441 | 0.1536 | 0.1521 | 0.1476 | 0.3115 | 0.3088 | 0.2397 | 0.3136 | 0.8595 | 0.8675 | 0.3268 | 1.3074 | 50 | 4000 | 0.0440 | 0.1559 | 0.1708 | 0.1692 | 0.3122 | 0.3166 | 0.2420 | 0.3172 | 0.8443 | 0.8918 | 0.3247 | 1.3084 | 51 | 5000 | 0.0436 | 0.1543 | 0.1829 | 0.1852 | 0.3119 | 0.3138 | 0.2355 | 0.3175 | 0.8634 | 0.8872 | 0.3165 | 1.3024 | 52 | 6000 | 0.0439 | 0.1539 | 0.1950 | 0.1939 | 0.3091 | 0.3060 | 0.2389 | 0.3127 | 0.8758 | 0.9049 | 0.3284 | 1.3058 | 53 | 7000 | 0.0437 | 0.1553 | 0.2013 | 0.2055 | 0.3131 | 0.2835 | 0.2403 | 0.3141 | 0.8542 | 0.8989 | 0.3292 | 1.2941 | 54 | 8000 | 0.0442 | 0.1530 | 0.2084 | 0.2095 | 0.3109 | 0.3096 | 0.2381 | 0.3132 | 0.8804 | 0.9049 | 0.3286 | 1.3383 | 55 | 9000 | 0.0440 | 0.1552 | 0.2127 | 0.2222 | 0.3107 | 0.2944 | 0.2385 | 0.3035 | 0.8359 | 0.9049 | 0.3171 | 1.2707 | 56 | 10000 | 0.0439 | 0.1554 | 0.2133 | 0.2274 | 0.3118 | 0.2870 | 0.2385 | 0.3089 | 0.8424 | 0.9023 | 0.3187 | 1.3271 | 57 58 59 60 61 62 63 64 65 66 -
trunk/HeuristicLab.ExtLibs/HeuristicLab.NativeInterpreter/0.1/NativeInterpreter-0.1/lib/vdt/exp.h
r16269 r16274 153 153 //------------------------------------------------------------------------------ 154 154 155 void expv(const uint32_t size, double const * __restrict iarray, double* __restrict oarray);156 void fast_expv(const uint32_t size, double const * __restrict iarray, double* __restrict oarray);157 void expfv(const uint32_t size, float const * __restrict iarray, float* __restrict oarray);158 void fast_expfv(const uint32_t size, float const * __restrict iarray, float* __restrict oarray);159 160 155 } // end namespace vdt 161 156 -
trunk/HeuristicLab.ExtLibs/HeuristicLab.NativeInterpreter/0.1/NativeInterpreter-0.1/src/interpreter.cpp
r16269 r16274 1 #include <memory>2 3 1 #include "interpreter.h" 4 2 3 #ifdef __cplusplus 5 4 extern "C" { 5 #endif 6 7 constexpr size_t BUFSIZE = BATCHSIZE * sizeof(double); 6 8 7 9 // slow (ish?) … … 22 24 23 25 __declspec(dllexport) 24 void __cdecl GetValuesVectorized(instruction* code, int codeLength, int* rows, int totalRows, double* result) noexcept26 void __cdecl GetValuesVectorized(instruction* code, int codeLength, int* rows, int totalRows, double* __restrict result) noexcept 25 27 { 26 std::vector<double[BUFSIZE]> buffers(codeLength); 27 // initialize instruction buffers 28 double* buffer = static_cast<double*>(_aligned_malloc(codeLength * BUFSIZE, 16)); 28 29 for (int i = 0; i < codeLength; ++i) 29 30 { 30 31 instruction& in = code[i]; 31 in.buf = buffer s[i];32 in.buf = buffer + (i * BATCHSIZE); 32 33 33 34 if (in.opcode == OpCodes::Const) … … 37 38 } 38 39 39 int remainingRows = totalRows % B UFSIZE;40 int remainingRows = totalRows % BATCHSIZE; 40 41 int total = totalRows - remainingRows; 41 42 42 for (int rowIndex = 0; rowIndex < total; rowIndex += B UFSIZE)43 for (int rowIndex = 0; rowIndex < total; rowIndex += BATCHSIZE) 43 44 { 44 evaluate(code, codeLength, rows, rowIndex, B UFSIZE);45 std::memcpy(result + rowIndex, code[0].buf, BUFSIZE * sizeof(double));45 evaluate(code, codeLength, rows, rowIndex, BATCHSIZE); 46 std::memcpy(result + rowIndex, code[0].buf, BUFSIZE); 46 47 } 47 48 48 49 // are there any rows left? 49 if (remainingRows > 0) { 50 if (remainingRows > 0) 51 { 50 52 for (int rowIndex = total; rowIndex < totalRows; rowIndex += remainingRows) 51 53 { … … 54 56 } 55 57 } 58 _aligned_free(buffer); 56 59 } 57 60 -
trunk/HeuristicLab.ExtLibs/HeuristicLab.NativeInterpreter/0.1/NativeInterpreter-0.1/src/interpreter.h
r16269 r16274 2 2 #define NATIVE_TREE_INTERPRETER_CLANG_H 3 3 4 #include <cstring> 5 #include <vector> 6 #include <algorithm> 7 4 #include "vector_operations.h" 8 5 #include "instruction.h" 9 #include "vector_operations.h"10 6 11 7 inline double evaluate(instruction *code, int len, int row) noexcept … … 146 142 case OpCodes::Sub: 147 143 { 148 load(in.buf, code[in.childIndex].buf); 149 for (int j = 1; j < in.narg; ++j) 150 { 151 sub(in.buf, code[in.childIndex + j].buf); 152 } 153 if (in.narg == 1) 154 { 155 neg(in.buf); 144 if (in.narg == 1) 145 { 146 neg(in.buf, code[in.childIndex].buf); 147 break; 148 } 149 else 150 { 151 load(in.buf, code[in.childIndex].buf); 152 for (int j = 1; j < in.narg; ++j) 153 { 154 sub(in.buf, code[in.childIndex + j].buf); 155 } 156 156 } 157 157 break; … … 168 168 case OpCodes::Div: 169 169 { 170 load(in.buf, code[in.childIndex].buf); 171 for (int j = 1; j < in.narg; ++j) 172 { 173 div(in.buf, code[in.childIndex + j].buf); 174 } 175 if (in.narg == 1) 176 { 177 inv(in.buf); 170 if (in.narg == 1) 171 { 172 inv(in.buf, code[in.childIndex].buf); 173 break; 174 } 175 else 176 { 177 load(in.buf, code[in.childIndex].buf); 178 for (int j = 1; j < in.narg; ++j) 179 { 180 div(in.buf, code[in.childIndex + j].buf); 181 } 178 182 } 179 183 break; -
trunk/HeuristicLab.ExtLibs/HeuristicLab.NativeInterpreter/0.1/NativeInterpreter-0.1/src/vector_operations.h
r16269 r16274 2 2 #define VECTOR_OPERATIONS_H 3 3 4 #define _USE_MATH_DEFINES 5 #include <cmath> 4 6 #include <cstring> 5 #include <cmath>6 7 7 8 #ifdef USE_VDT … … 27 28 #endif 28 29 29 constexpr int B UFSIZE = 64;30 constexpr int BATCHSIZE = 64; 30 31 31 #define FOR(i) for(int i = 0; i < B UFSIZE; ++i)32 #define FOR(i) for(int i = 0; i < BATCHSIZE; ++i) 32 33 33 34 // When auto-vectorizing without __restrict, … … 36 37 37 38 // vector - vector operations 38 inline void load(double* __restrict a, double* __restrict b) noexcept { std::memcpy(a, b, BUFSIZE * sizeof(double)); } 39 inline void add(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] += b[i]; } 40 inline void sub(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] -= b[i]; } 41 inline void mul(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] *= b[i]; } 42 inline void div(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] /= b[i]; } 43 inline void exp(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] = hl_exp(b[i]); } 44 inline void log(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] = hl_log(b[i]); } 45 inline void sin(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] = hl_sin(b[i]); } 46 inline void cos(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] = hl_cos(b[i]); } 47 inline void tan(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] = hl_tan(b[i]); } 48 inline void sqrt(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] = hl_sqrt(b[i]); } 49 inline void pow(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] = hl_pow(a[i], hl_round(b[i])); }; 50 inline void root(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] = hl_pow(a[i], 1 / hl_round(b[i])); }; 51 inline void square(double* __restrict a, double* __restrict b) noexcept { FOR(i) a[i] = b[i] * b[i]; }; 39 inline void load(double* __restrict a, double const * __restrict b) noexcept { std::memcpy(a, b, BATCHSIZE * sizeof(double)); } 40 inline void add(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] += b[i]; } 41 inline void sub(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] -= b[i]; } 42 inline void mul(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] *= b[i]; } 43 inline void div(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] /= b[i]; } 44 inline void exp(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = hl_exp(b[i]); } 45 inline void log(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = hl_log(b[i]); } 46 inline void sin(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = hl_sin(b[i]); } 47 inline void cos(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = hl_cos(b[i]); } 48 inline void tan(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = hl_tan(b[i]); } 49 inline void sqrt(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = hl_sqrt(b[i]); } 50 inline void pow(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = hl_pow(a[i], hl_round(b[i])); }; 51 inline void root(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = hl_pow(a[i], 1. / hl_round(b[i])); }; 52 inline void square(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = hl_pow(b[i], 2.); }; 53 inline void inv(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = 1. / b[i]; } 54 inline void neg(double* __restrict a, double const * __restrict b) noexcept { FOR(i) a[i] = -b[i]; } 52 55 53 56 // vector - scalar operations … … 60 63 // vector operations 61 64 inline void neg(double* __restrict a) noexcept { FOR(i) a[i] = -a[i]; } 62 inline void inv(double* __restrict a) noexcept { FOR(i) a[i] = 1 / a[i]; }65 inline void inv(double* __restrict a) noexcept { FOR(i) a[i] = 1. / a[i]; } 63 66 inline void exp(double* __restrict a) noexcept { FOR(i) a[i] = hl_exp(a[i]); } 64 67 inline void log(double* __restrict a) noexcept { FOR(i) a[i] = hl_log(a[i]); } … … 67 70 inline void sqrt(double* __restrict a) noexcept { FOR(i) a[i] = hl_sqrt(a[i]); } 68 71 inline void round(double* __restrict a) noexcept { FOR(i) a[i] = hl_round(a[i]); } 69 inline void square(double* __restrict a) noexcept { FOR(i) a[i] = a[i] * a[i];; }72 inline void square(double* __restrict a) noexcept { FOR(i) a[i] = hl_pow(a[i], 2.); } 70 73 71 74 #undef FOR
Note: See TracChangeset
for help on using the changeset viewer.