using System; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; using System.Text; using System.Threading.Tasks; namespace ExpressionClustering { // /* file flann_example.c */ // # include "flann.h" // # include // # include // /* Function that reads a dataset */ // float* read_points(char* filename, int* rows, int* cols); // int main(int argc, char** argv) { // int rows, cols; // int t_rows, t_cols; // float speedup; // /* read dataset points from file dataset.dat */ // float* dataset = read_points("dataset.dat", &rows, &cols); // float* testset = read_points("testset.dat", &t_rows, &t_cols); // /* points in dataset and testset should have the same dimensionality */ // assert(cols == t_cols); // /* number of nearest neighbors to search */ // int nn = 3; // /* allocate memory for the nearest-neighbors indices */ // int* result = (int*)malloc(t_rows * nn * sizeof(int)); // /* allocate memory for the distances */ // float* dists = (float*)malloc(t_rows * nn * sizeof(float)); // /* index parameters are stored here */ // struct FLANNParameters p = DEFAULT_FLANN_PARAMETERS; // p.algorithm = FLANN_INDEX_AUTOTUNED; /* or FLANN_INDEX_KDTREE, FLANN_INDEX_KMEANS, ... /* // p.target_precision = 0.9; /* want 90% target precision */ // /* compute the 3 nearest-neighbors of each point in the testset */ // flann_find_nearest_neighbors(dataset, rows, cols, testset, t_rows, // result, dists, nn, &p); // ... // free(dataset); // free(testset); // free(result); // free(dists); // return 0; // } public static class Flann { public enum flann_algorithm_t { FLANN_INDEX_LINEAR = 0, FLANN_INDEX_KDTREE = 1, FLANN_INDEX_KMEANS = 2, FLANN_INDEX_COMPOSITE = 3, FLANN_INDEX_KDTREE_SINGLE = 3, FLANN_INDEX_SAVED = 254, FLANN_INDEX_AUTOTUNED = 255 }; public enum flann_centers_init_t { FLANN_CENTERS_RANDOM = 0, FLANN_CENTERS_GONZALES = 1, FLANN_CENTERS_KMEANSPP = 2 }; public enum flann_log_level_t { FLANN_LOG_NONE = 0, FLANN_LOG_FATAL = 1, FLANN_LOG_ERROR = 2, FLANN_LOG_WARN = 3, FLANN_LOG_INFO = 4 }; public enum flann_distance_t { FLANN_DIST_EUCLIDEAN = 1, // squared euclidean distance FLANN_DIST_MANHATTAN = 2, FLANN_DIST_MINKOWSKI = 3, FLANN_DIST_HIST_INTERSECT = 5, FlANN_DIST_HELLINGER = 6, FLANN_DIST_CHI_SQUARE = 7, // chi-square FLANN_DIST_KULLBACK_LEIBLER = 8, // kullback-leibler divergence }; public struct FLANNParameters { public flann_algorithm_t algorithm; /* the algorithm to use */ /* search time parameters */ public int checks; /* how many leafs (features) to check in one search */ public float cb_index; /* cluster boundary index. Used when searching the kmeans tree */ public float eps; /* kdtree index parameters */ public int trees; /* number of randomized trees to use (for kdtree) */ public int leaf_max_size; /* kmeans index parameters */ public int branching; /* branching factor (for kmeans tree) */ public int iterations; /* max iterations to perform in one kmeans cluetering (kmeans tree) */ public flann_centers_init_t centers_init; /* algorithm used for picking the initial cluster centers for kmeans tree */ /* autotuned index parameters */ public float target_precision; /* precision desired (used for autotuning, -1 otherwise) */ public float build_weight; /* build tree time weighting factor */ public float memory_weight; /* index memory weigthing factor */ public float sample_fraction; /* what fraction of the dataset to use for autotuning */ public uint table_number_; public uint key_size_; public uint multi_probe_level_; /* other parameters */ public flann_log_level_t log_level; /* determines the verbosity of each flann function */ public long random_seed; /* random seed to use */ }; // struct FLANNParameters DEFAULT_FLANN_PARAMETERS = { // FLANN_INDEX_KDTREE, // 32, 0.2f, 0.0f, // 4, 4, // 32, 11, FLANN_CENTERS_RANDOM, // 0.9f, 0.01f, 0, 0.1f, // FLANN_LOG_NONE, 0 // }; public static FLANNParameters DEFAULT_FLANN_PARAMETERS = new FLANNParameters() { algorithm = flann_algorithm_t.FLANN_INDEX_KDTREE, checks = 32, cb_index = 0.2f, trees = 4, branching = 32, iterations = 11, centers_init = flann_centers_init_t.FLANN_CENTERS_RANDOM, target_precision = 0.9f, build_weight = 0.01f, memory_weight = 0, sample_fraction = 0.1f, log_level = flann_log_level_t.FLANN_LOG_NONE, random_seed = 0, }; [DllImport("flann-1.7.1.dll")] public static extern int flann_find_nearest_neighbors(float[] dataset, int rows, int cols, float[] testset, int t_rows, int[] result, float[] dist, int nn, ref FLANNParameters flann_params); [DllImport("flann-1.7.1.dll")] public static extern IntPtr flann_build_index(float[] dataset, int rows, int cols, ref float speedup, ref FLANNParameters flann_params); [DllImport("flann-1.7.1.dll")] public static extern int flann_find_nearest_neighbors_index(IntPtr index_id, float[] testset, int trows, int[] indices, float[] dists, int nn, int checks, ref FLANNParameters flann_params); [DllImport("flann-1.7.1.dll")] public static extern void flann_set_distance_type(flann_distance_t distance_type, int order); [DllImport("flann-1.7.1.dll")] public static extern int flann_compute_cluster_centers(float[] dataset, int rows, int cols, int clusters, float[] result, ref FLANNParameters flann_params); public static int FindClusters(List dataset, out List results, out List distances, int nClusters) { var _nRows = dataset.Count; var _dists = new float[_nRows]; var _result = new int[_nRows]; var _dim = dataset.First().Length; FLANNParameters p = DEFAULT_FLANN_PARAMETERS; p.algorithm = flann_algorithm_t.FLANN_INDEX_LINEAR; p.centers_init = flann_centers_init_t.FLANN_CENTERS_RANDOM; p.target_precision = 0.9f; p.log_level = flann_log_level_t.FLANN_LOG_INFO; // copy training set var _ds = new float[dataset.Count * _dim]; var i = 0; foreach (var e in dataset) { for (int d = 0; d < _dim; d++) { _ds[i++] = (float)e[d]; } } flann_set_distance_type(flann_distance_t.FLANN_DIST_EUCLIDEAN, 0); float[] centers = new float[nClusters * _dim]; int actualClusters = flann_compute_cluster_centers(_ds, rows: dataset.Count, cols: _dim, clusters: nClusters, result: centers, flann_params: ref p); var res = flann_find_nearest_neighbors(centers, actualClusters, _dim, _ds, _nRows, _result, _dists, 1, ref p); distances = _dists.Select(fi => (double)fi).ToList(); results = _result.ToList(); return res; } public static int FindNearestNeighbours(List dataset, List queryset, out List results, out List distances, int nearestNeighbours = 3) { var _nn = nearestNeighbours; var _tRows = queryset.Count; var _dists = new float[_nn * _tRows]; var _result = new int[_nn * _tRows]; var _dim = dataset.First().Length; FLANNParameters p = DEFAULT_FLANN_PARAMETERS; p.algorithm = flann_algorithm_t.FLANN_INDEX_LINEAR; p.centers_init = flann_centers_init_t.FLANN_CENTERS_RANDOM; p.target_precision = 0.9f; p.log_level = flann_log_level_t.FLANN_LOG_INFO; // copy training set var _ds = new float[dataset.Count * _dim]; var i = 0; for (int d = 0; d < _dim; d++) { foreach (var e in dataset) { _ds[i++] = (float)e[d]; } } flann_set_distance_type(flann_distance_t.FLANN_DIST_EUCLIDEAN, 0); // int nClusters = 100; // float[] centers = new float[nClusters * _dim]; // flann_compute_cluster_centers(_ds, rows: dataset.Count, cols: _dim, clusters: nClusters, result: centers, flann_params: ref p); // for each point in the training set find the nearest cluster // float speedup = -1.0f; // _ds must be a rows × cols matrix stored in row-major order (one feature on each row) //var index = flann_build_index(_ds, rows: dataset.Count, cols: _dim, speedup: ref speedup, flann_params: ref p); // copy testset var _testset = new float[_tRows * _dim]; i = 0; for (int d = 0; d < _dim; d++) { foreach (var e in queryset) { _testset[i++] = (float)e[d]; } } //var res = flann_find_nearest_neighbors_index(index, _testset, _tRows, _result, _dists, _nn, 10, ref p); var res = flann_find_nearest_neighbors( _ds, dataset.Count, _dim, _testset, _tRows, _result, _dists, _nn, ref p); distances = _dists.Select(fi => (double)fi).ToList(); results = _result.ToList(); return res; } } }